Wie kann ich Links von Google Images -Ergebnis mit Selenium, Python abkratzen?Python

Python-Programme
Anonymous
 Wie kann ich Links von Google Images -Ergebnis mit Selenium, Python abkratzen?

Post by Anonymous »

Ich versuche, an einem Projekt zu arbeiten, und ich muss die Links von Google Image-Ergebnissen abrufen.

Code: Select all

from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from tqdm import tqdm
from modules.download_file import download_file
from modules.lsDir import lsDir
import time
from colorama import Fore, Style, init
import os
import base64
from urllib.parse import urlparse

def google_images(driver, name, intAmount):
for x in lsDir("gg_downloads"):
os.remove(os.path.join("gg_downloads", x))

print(f"[*] Opening Google...")
driver.get(f"https://www.google.com/search?sclient=img&udm=2&q={name}")
time.sleep(3)

print(f"[*] Scrolling down...")
for _ in tqdm(range(intAmount)):
ActionChains(driver).scroll_by_amount(0, 10000).perform()
time.sleep(1)

print(f"[*] Gathering profile photos...")
image_elements = driver.find_elements(By.CSS_SELECTOR, "div.eA0Zlc img")
images = []
for profile in tqdm(image_elements,desc="Gathering Photos"):
images.append(profile.get_attribute("src"))
images = images[::2]

print(f"[*] Gathering links...")
links = driver.find_elements(By.XPATH, "/html/body/div[3]/div/div[14]/div/div[2]/div[2]/div/div/div/div/div[1]/div/div/div/div[2]/h3/a")
image_links = []
for link in tqdm(links, desc="Gathering Links"):
image_links.append(link.get_attribute("href"))
print(image_links)
print(f"[*] Downloading profile photos...")
for i, image_url in tqdm(enumerate(images), desc="Downloading Photos", total=len(images)):
try:
if image_url.startswith('data:'):
# Handle data URL
header, encoded = image_url.split(",", 1)
content_type = header.split(":")[1].split(";")[0]
file_extension = content_type.split("/")[-1]

# Decode base64 content
image_data = base64.b64decode(encoded)

# Save the file
with open(f"gg_downloads/{i}.{file_extension}", "wb") as f:
f.write(image_data)
else:
# Handle regular URL
parsed_url = urlparse(image_url)
file_extension = os.path.splitext(parsed_url.path)[1]
if not file_extension:
file_extension = '.jpg'  # Default to .jpg if no extension found

download_file(image_url, f"gg_downloads/{i}{file_extension}")
except Exception as e:
print(f"Error downloading image {i}: {str(e)}")

if __name__ == "__main__":
# Make sure you use pip to install stuff.
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from webdriver_manager.firefox import GeckoDriverManager
#...
options = Options()
if settings.headless:
options.add_argument('-headless')
service = Service(GeckoDriverManager().install())
driver = webdriver.Firefox(options=options, service=service)
google_images(driver, "John Smith", 10)
Beim Drucken von Image_Links ist das Array alles nicht . Ich habe den VSCODE -Debugger ausprobiert und auch CSS -Selektoren ausprobiert, aber es funktioniert immer noch nicht. Ich verwende Firefox für den Treiber und hier ist der Code, um den Treiber zu initialisieren: < /p>
# Imports
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
from selenium.webdriver.firefox.service import Service
from webdriver_manager.firefox import GeckoDriverManager
#...
options = Options()
if settings.headless:
options.add_argument('-headless')
service = Service(GeckoDriverManager().install())
driver = webdriver.Firefox(options=options, service=service)
< /code>
Hilfe wäre sehr geschätzt! Danke!

Quick Reply

Change Text Case: 
   
  • Similar Topics
    Replies
    Views
    Last post