Problembeschreibung: Ich habe versucht, eine Website mit Selen zu kratzen, aber ich war nicht erfolgreich, weil ich nicht in der Lage war, die Webelemente zu erkennen. Ich muss täglich Produktinformationen von 6 bis 10 verschiedenen Websites sammeln. Dies ist die Website: https://threil.com/casey-rec-menu/?dtch ... D=Products.
Code-Snippet:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.edge.service import Service
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.edge.options import Options
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from bs4 import BeautifulSoup
import pandas as pd
import time # To manage delays
# Set up Selenium WebDriver (using Microsoft Edge)
edge_options = Options()
# Initialize WebDriver
service = Service("C:\\Users\\iyush\\Documents\\VS Code\\Selenium\\msedgedriver.exe")
driver = webdriver.Edge(service=service, options=edge_options)
# URL of the website to scrape
url = "https://thriveil.com/casey-rec-menu/?dtche%5Bpath%5D=products"
driver.get(url)
# WebDriver Wait
wait = WebDriverWait(driver, 30)
# Handle ad close button
try:
close_button = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "terpli-close")))
close_button.click()
except TimeoutException:
print("Ad close button not found. Continuing...")
# Handle cookie consent
try:
accept_button = wait.until(EC.element_to_be_clickable((By.ID, "wt-cli-accept-all-btn")))
accept_button.click()
except TimeoutException:
print("Cookie consent button not found. Continuing...")
# Initialize storage
product_list = []
current_page = 1
# Lists to store scraped data
names = []
brand_names = []
brand_links = []
strains = []
potencys = []
prices = []
effects = []
while True: # Loop through pages
products = []
# Get all product elements
product_elements = driver.find_elements(By.CLASS_NAME, "full-card_Wrapper-sc-11z5u35-0")
for product in product_elements:
try:
product_url = product.find_element(By.TAG_NAME, "a").get_attribute("href")
products.append(product_url)
except Exception as e:
print(f"Error extracting product link: {e}")
continue
# Open each product in a new tab, scrape details, then close tab
for product_url in products:
driver.execute_script(f"window.open('{product_url}', '_blank');")
driver.switch_to.window(driver.window_handles[-1]) # Switch to new tab
try:
wait.until(EC.presence_of_element_located((By.TAG_NAME, "h1"))) # Ensure page loads
product_soup = BeautifulSoup(driver.page_source, 'html.parser')
# Scrape the product data
name = driver.find_element(By.CSS_SELECTOR, "h1[data-testid='product-name']")
names.append(name.text.strip())
brand_element = driver.find_element(By.CSS_SELECTOR, "a[href]")
brand = driver.find_element(By.CSS_SELECTOR, ".typography__Brand-sc-1q7gvs8-2.fyoohd")
brand_names.append(brand.text.strip())
brand_links.append(brand_element.get_attribute('href'))
strain = driver.find_element(By.CSS_SELECTOR, "span[data-testid='info-chip']")
strains.append(strain.text.strip())
potencies = driver.find_elements(By.CSS_SELECTOR, "span.info-chip__InfoChipText-sc-11n9ujc-0")
# Extract text and remove anything before ':'
potency_values = [p.text.split(":")[-1].strip() for p in potencies]
# Join them as a single string (optional, useful for CSV)
potency_text = potency_values[1]
potencys.append(potency_text)
price = driver.find_element(By.CSS_SELECTOR, "div.price__PriceText-sc-diymzm-2")
prices.append(price.text.strip())
effect_elements = driver.find_elements(By.CSS_SELECTOR, "span.effect-tile__Text-sc-1as4rkm-1")
effects.append(", ".join([e.text.strip() for e in effect_elements]))
product_data = {
"name": names,
"brand_name": brand_names,
"brand_link": brand_links,
"strain": strains,
"potency": potencys,
"price": prices,
"effects": effects
}
product_list.append(product_data)
except Exception as e:
print(f"Error scraping product details: {e}")
driver.close()
driver.switch_to.window(driver.window_handles[0]) # Switch back to main tab
print(f"Page {current_page} scraped successfully.")
# Click the next page button if available
try:
next_button = driver.find_element(By.CSS_SELECTOR, 'button[aria-label*="next page"]')
next_button.click()
current_page += 1
time.sleep(5) # Allow the next page to load
except NoSuchElementException:
print("No more pages found. Exiting loop.")
break
# Check if data was scraped
if product_list:
# Initialize DataFrame with the scraped data
df = pd.DataFrame(product_list) # Wrap the dict in a list
# Save DataFrame to CSV (append mode if the file exists)
df.to_csv("thriveil_products.csv", mode='a', header=not pd.io.common.file_exists("thriveil_products.csv"), index=False)
print("Scraping completed. Data saved to 'thriveil_products.csv'.")
else:
print("No data to save.")
# Save DataFrame to Excel
#df = pd.DataFrame(product_list)
#df.to_excel("thriveil_products.xlsx", index=False)
#print("Scraping completed. Data saved to 'thriveil_products.xlsx'.")
driver.quit()
Thriveil sample 1 copy.py
Displaying thriveil.py.
Problembeschreibung: Ich habe versucht, eine Website mit Selen zu kratzen, aber ich war nicht erfolgreich, weil ich nicht in der Lage war, die Webelemente zu erkennen. Ich muss täglich Produktinformationen von 6 bis 10 verschiedenen Websites sammeln. Dies ist die Website: https://threil.com/casey-rec-menu/?dtche%5BPath%5D=Products. Code-Snippet: [code]from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.edge.service import Service from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.edge.options import Options from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import NoSuchElementException, TimeoutException from bs4 import BeautifulSoup import pandas as pd import time # To manage delays
# Set up Selenium WebDriver (using Microsoft Edge) edge_options = Options()
# URL of the website to scrape url = "https://thriveil.com/casey-rec-menu/?dtche%5Bpath%5D=products" driver.get(url)
# WebDriver Wait wait = WebDriverWait(driver, 30)
# Handle ad close button try: close_button = wait.until(EC.element_to_be_clickable((By.CLASS_NAME, "terpli-close"))) close_button.click() except TimeoutException: print("Ad close button not found. Continuing...")
# Lists to store scraped data names = [] brand_names = [] brand_links = [] strains = [] potencys = [] prices = [] effects = []
while True: # Loop through pages products = []
# Get all product elements product_elements = driver.find_elements(By.CLASS_NAME, "full-card_Wrapper-sc-11z5u35-0")
for product in product_elements: try: product_url = product.find_element(By.TAG_NAME, "a").get_attribute("href") products.append(product_url) except Exception as e: print(f"Error extracting product link: {e}") continue
# Open each product in a new tab, scrape details, then close tab for product_url in products: driver.execute_script(f"window.open('{product_url}', '_blank');") driver.switch_to.window(driver.window_handles[-1]) # Switch to new tab
potencies = driver.find_elements(By.CSS_SELECTOR, "span.info-chip__InfoChipText-sc-11n9ujc-0") # Extract text and remove anything before ':' potency_values = [p.text.split(":")[-1].strip() for p in potencies] # Join them as a single string (optional, useful for CSV) potency_text = potency_values[1] potencys.append(potency_text)
effect_elements = driver.find_elements(By.CSS_SELECTOR, "span.effect-tile__Text-sc-1as4rkm-1") effects.append(", ".join([e.text.strip() for e in effect_elements]))
# Click the next page button if available try: next_button = driver.find_element(By.CSS_SELECTOR, 'button[aria-label*="next page"]') next_button.click() current_page += 1 time.sleep(5) # Allow the next page to load except NoSuchElementException: print("No more pages found. Exiting loop.") break
# Check if data was scraped if product_list: # Initialize DataFrame with the scraped data df = pd.DataFrame(product_list) # Wrap the dict in a list
# Save DataFrame to CSV (append mode if the file exists) df.to_csv("thriveil_products.csv", mode='a', header=not pd.io.common.file_exists("thriveil_products.csv"), index=False) print("Scraping completed. Data saved to 'thriveil_products.csv'.") else: print("No data to save.")
# Save DataFrame to Excel #df = pd.DataFrame(product_list) #df.to_excel("thriveil_products.xlsx", index=False) #print("Scraping completed. Data saved to 'thriveil_products.xlsx'.")
Ich habe mehrere Abstürze aus Crashlytics:
java.lang.IllegalStateException - A migration from 8 to 10 was required but not found.
Please provide the necessary Migration path via...
Das Problem besteht darin, dass das for-Element beim erneuten Rendern ein neues Dom-Element erstellt. Das heißt, wenn ein Element im for-Element keinen Übergang durchführen kann, wenn sich der Status...
Das Problem besteht darin, dass das for-Element beim erneuten Rendern ein neues Dom-Element erstellt. Das heißt, wenn ein Element im for-Element keinen Übergang durchführen kann, wenn sich der Status...
Das Problem besteht darin, dass das for-Element beim erneuten Rendern ein neues Dom-Element erstellt. Das heißt, wenn ein Element im for-Element keinen Übergang durchführen kann, wenn sich der Status...
Ich kann auf der Facebook -Anmeldeseite nicht auf die Schaltfläche ein Konto erstellen klicken. Wir können das Optionsfeld nicht in die Liste speichern, da es kein separates Webelement als Geschlecht...