Ich versuche, Bewertungen aus dem Glassdoor abzukratzen. Ich könnte Textbewertungen verschrotten, aber ich habe Probleme, die Empfehlung zu kratzen (ja/nein). Hier ist der Screenshot von dem, woran ich arbeite. Auf Empfehlung überprüft, hat ihr D -Attribut 8,835 und nein 18,299. Ich habe kein Problem mit dem anderen Teil, aber nur für # 3. Scrape -Empfehlungspart
print(svg_elements)
< /code>
Es wird eine leere Liste angezeigt. Unten ist mein aktueller Code, der meine ID und mein Passwort entfernen. Vielen Dank im Voraus für Ihre Hilfe. < /P>
import csv
import time
from seleniumbase import SB
from selenium.webdriver.common.by import By
def scrape_stackoverflow_cloudflare_and_save_csv(csv_filename="cloudflare_questions.csv"):
"""
Scrapes text from Glassdoor reviews pages (pros, cons, recommendations) for pages 1 to 5, then saves data to a CSV file.
"""
try:
with SB(uc=True) as sb:
base_url = "https://www.glassdoor.com"
start_url = "https://www.glassdoor.com/Reviews/Amazon-Reviews-E6036.htm"
all_pros = []
all_cons = []
all_recommendations = []
# Loop through pages 1 to 5
for page_num in range(1, 6):
print(f"Scraping page {page_num}...")
if page_num == 1:
sb.uc_open_with_reconnect(start_url, 6)
else:
next_page_link = f"/Reviews/Amazon-Reviews-E6036_P{page_num}.htm"
sb.open(base_url + next_page_link)
if page_num == 2:
email_input = sb.find_element('input[data-test="emailInput-input"]')
email_input.send_keys("my id")
sb.sleep(2) # Wait for the email to be entered
continue_button = sb.find_element('button[data-test="email-form-button"]')
continue_button.click()
sb.sleep(2) # Wait for the next page to load
password_input = sb.find_element('input[data-test="passwordInput-input"]')
password_input.send_keys("my password")
sb.sleep(2) # Wait for the password to be entered
sign_in_button = sb.find_element('button[data-role-variant="primary"][type="submit"]')
sign_in_button.click()
sb.sleep(2) # Wait for the sign-in process to complete
sb.uc_gui_click_captcha()
sb.sleep(4) # Wait for the page to load
# 1. Scrape PROS
pros_elements = sb.find_elements('span[data-test="review-text-PROS"]')
pros_texts = [elem.text.strip() for elem in pros_elements if elem.text.strip()]
# 2. Scrape CONS
cons_elements = sb.find_elements('span[data-test="review-text-CONS"]')
cons_texts = [elem.text.strip() for elem in cons_elements if elem.text.strip()]
# 3. Scrape Recommendations (Yes/No)
svg_elements = sb.find_elements(By.XPATH, '//div[contains(@id, "empReview")]/div[2]/div[2]/div[1]/svg/path')
recommendations = []
for svg in svg_elements:
d_attribute = svg.get_attribute('d')
if d_attribute:
if '8.835 17.64' in d_attribute: # Unique part of the "Yes" SVG
recommendations.append('Yes')
elif '18.299 5.327' in d_attribute: # Unique part of the "No" SVG
recommendations.append('No')
# Collect data from this page
all_pros.extend(pros_texts)
all_cons.extend(cons_texts)
all_recommendations.extend(recommendations)
# Debug: Print collected data for this page
print(f"Page {page_num} - Pros: {len(pros_texts)}, Cons: {len(cons_texts)}, Recommendations: {len(recommendations)}")
# Save all collected data to CSV
print("Saving data to CSV...")
with open(csv_filename, mode="w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["pros_text", "cons_text", "recommendation"]) # Add "recommendation" here
for pros, cons, rec in zip(all_pros, all_cons, all_recommendations):
writer.writerow([pros, cons, rec])
print("Scraping completed successfully!")
except Exception as e:
print(f"An error occurred: {e}")
finally:
print("Exiting function (finally block).")
# Example usage:
if __name__ == "__main__":
scrape_stackoverflow_cloudflare_and_save_csv()
Ich versuche, Bewertungen aus dem Glassdoor abzukratzen. Ich könnte Textbewertungen verschrotten, aber ich habe Probleme, die Empfehlung zu kratzen (ja/nein). Hier ist der Screenshot von dem, woran ich arbeite. Auf Empfehlung überprüft, hat ihr D -Attribut 8,835 und nein 18,299. Ich habe kein [url=viewtopic.php?t=26065]Problem[/url] mit dem anderen Teil, aber nur für # 3. Scrape -Empfehlungspart[code]//*[@id="empReview_##"]/div[2]/div[2]/div[1]/svg/path[/code]
10 Bewertungen werden von jeder Seite gesammelt, 0 für Empfehlungen in jedoch 0[code]print(svg_elements) < /code> Es wird eine leere Liste angezeigt. Unten ist mein aktueller Code, der meine ID und mein Passwort entfernen. Vielen Dank im Voraus für Ihre Hilfe. < /P> import csv import time from seleniumbase import SB from selenium.webdriver.common.by import By
def scrape_stackoverflow_cloudflare_and_save_csv(csv_filename="cloudflare_questions.csv"): """ Scrapes text from Glassdoor reviews pages (pros, cons, recommendations) for pages 1 to 5, then saves data to a CSV file. """ try: with SB(uc=True) as sb: base_url = "https://www.glassdoor.com" start_url = "https://www.glassdoor.com/Reviews/Amazon-Reviews-E6036.htm"
# Loop through pages 1 to 5 for page_num in range(1, 6): print(f"Scraping page {page_num}...") if page_num == 1: sb.uc_open_with_reconnect(start_url, 6) else: next_page_link = f"/Reviews/Amazon-Reviews-E6036_P{page_num}.htm" sb.open(base_url + next_page_link) if page_num == 2: email_input = sb.find_element('input[data-test="emailInput-input"]') email_input.send_keys("my id") sb.sleep(2) # Wait for the email to be entered continue_button = sb.find_element('button[data-test="email-form-button"]') continue_button.click() sb.sleep(2) # Wait for the next page to load
password_input = sb.find_element('input[data-test="passwordInput-input"]') password_input.send_keys("my password") sb.sleep(2) # Wait for the password to be entered sign_in_button = sb.find_element('button[data-role-variant="primary"][type="submit"]') sign_in_button.click() sb.sleep(2) # Wait for the sign-in process to complete sb.uc_gui_click_captcha() sb.sleep(4) # Wait for the page to load
# 1. Scrape PROS pros_elements = sb.find_elements('span[data-test="review-text-PROS"]') pros_texts = [elem.text.strip() for elem in pros_elements if elem.text.strip()]
# 2. Scrape CONS cons_elements = sb.find_elements('span[data-test="review-text-CONS"]') cons_texts = [elem.text.strip() for elem in cons_elements if elem.text.strip()]
# 3. Scrape Recommendations (Yes/No) svg_elements = sb.find_elements(By.XPATH, '//div[contains(@id, "empReview")]/div[2]/div[2]/div[1]/svg/path') recommendations = [] for svg in svg_elements: d_attribute = svg.get_attribute('d') if d_attribute: if '8.835 17.64' in d_attribute: # Unique part of the "Yes" SVG recommendations.append('Yes') elif '18.299 5.327' in d_attribute: # Unique part of the "No" SVG recommendations.append('No')
# Collect data from this page all_pros.extend(pros_texts) all_cons.extend(cons_texts) all_recommendations.extend(recommendations)
# Debug: Print collected data for this page print(f"Page {page_num} - Pros: {len(pros_texts)}, Cons: {len(cons_texts)}, Recommendations: {len(recommendations)}")
# Save all collected data to CSV print("Saving data to CSV...") with open(csv_filename, mode="w", newline="", encoding="utf-8") as f: writer = csv.writer(f) writer.writerow(["pros_text", "cons_text", "recommendation"]) # Add "recommendation" here
for pros, cons, rec in zip(all_pros, all_cons, all_recommendations): writer.writerow([pros, cons, rec])
print("Scraping completed successfully!")
except Exception as e: print(f"An error occurred: {e}") finally: print("Exiting function (finally block).")
# Example usage: if __name__ == "__main__": scrape_stackoverflow_cloudflare_and_save_csv() [/code]
Problembeschreibung: Ich habe versucht, eine Website mit Selen zu kratzen, aber ich war nicht erfolgreich, weil ich nicht in der Lage war, die Webelemente zu erkennen. Ich muss täglich...
Ich verwende Python und Selenium , um Daten von einer Website zu kratzen. Die Seite enthält eine interaktive Karte von Provinzen. In diesem Abschnitt gibt es eine Tabelle mit Informationen wie:...
Also verwende ich Selenium, um regelmäßig etwas über eine Website zu buchen. Es ist geplant, die Reservierung am Dienstag für Mittwoch acht Tage später vorzunehmen.
Ich habe das Programm zur...