Code: Select all
import os
import re
from pathlib import Path
import pathlib
import glob
import gspread
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
import pandas as pd
import numpy as np
## authorization
gauth = GoogleAuth()
gauth.LocalWebserverAuth()
drive = GoogleDrive(gauth)
# list containing all folders in drive
folder_list = drive.ListFile({'q': "mimeType = 'application/vnd.google-apps.folder'"}).GetList()
# find PICTURES id
PICTURES = [file for file in folder_list if file['title'] == 'PICTURES']
PICTURES = PICTURES[0]
pics_id = '1dM4X2VCSBEB-yjqnAPBkmLoXdrMwBF9j'
rows = drive.ListFile({'q': f"'{pics_id}' in parents"}).GetList()
row_data = {}
for row in rows:
id = row['id']
row_title = row['title']
row_title_formatted = row_title.split()
if len(row_title_formatted) == 2:
row_title_formatted = row_title_formatted[-1]
if len(row_title_formatted) == 3:
row_title_formatted = f'{row_title_formatted[1]}R'
# print(row_title)
# print(f'formatted:{row_title_formatted}')
# print(id)
pictures = drive.ListFile({'q': f"'{id}' in parents"}).GetList()
pics_dict = {}
for p in pictures:
pic_title = p['title'].split('.')
if len(pic_title) == 4:
r, row_idx, pic_idx, j = pic_title
if len(pic_idx) == 1:
pic_idx = f'0{pic_idx}'
formatted_title = f'{row_idx}_{pic_idx}'
rear = False
if len(pic_title) == 5:
r, row_idx, rear, pic_idx, j = pic_title
if len(pic_idx) == 1:
pic_idx = f'0{pic_idx}'
formatted_title = f'{row_idx}R_{pic_idx}'
format_url = f"https://drive.google.com/uc?export=view&id={p['id']}"
p_dict = {
'title' : formatted_title,
'id' : p['id'],
'row_idx': row_idx,
'rear' : rear,
'pic_idx': pic_idx,
'url_str': f'=IMAGE("{format_url}", 4, 300, 400)'
}
print(p_dict['url_str'])
pics_dict.update({f'{formatted_title}': p_dict})
row_data.update({f'{row_title_formatted}': pics_dict})
dfs = []
for row, pics in row_data.items():
for k, v in pics.items():
columns = v.keys()
df = pd.DataFrame.from_dict(pics, orient='index', dtype=str, columns=columns)
dfs.append(df)
combined_dfs = pd.concat(dfs, ignore_index=True)
# combined_dfs['url_str'] = combined_dfs['url_str'].astype(str)
combined_dfs.to_excel('pic_data.xlsx')
print(row_data)
print(dfs)
print()

Wenn ich jedoch die Tabelle erneut mit
lade
Code: Select all
df = pd.read_excel('pic_data.xlsx', dtype=str, keep_default_na=False)

Es scheint, dass wir die Spalte „url_str“ als Float-Standbild speichern, obwohl ich sie als Zeichenfolge festgelegt habe. Irgendeine Idee, wo ich das vermassele?
Mobile version