Programmiererforum

by **Anonymous** » 26 Aug 2025, 09:10

Ich versuche, Bewertungen aus dem Glassdoor abzukratzen. Ich könnte Textbewertungen verschrotten, aber ich habe Probleme, die Empfehlung zu kratzen (ja/nein). Hier ist der Screenshot von dem, woran ich arbeite. Auf Empfehlung überprüft, hat ihr D -Attribut 8,835 und nein 18,299. Ich habe kein Problem mit dem anderen Teil, aber nur für # 3. Scrape -Empfehlungspart

Code: Select all

//*[@id="empReview_##"]/div[2]/div[2]/div[1]/svg/path

10 Bewertungen werden von jeder Seite gesammelt, 0 für Empfehlungen in
jedoch 0

Code: Select all

print(svg_elements)
< /code>
Es wird eine leere Liste angezeigt. Unten ist mein aktueller Code, der meine ID und mein Passwort entfernen. Vielen Dank im Voraus für Ihre Hilfe. < /P>
import csv
import time
from seleniumbase import SB
from selenium.webdriver.common.by import By

def scrape_stackoverflow_cloudflare_and_save_csv(csv_filename="cloudflare_questions.csv"):
"""
Scrapes text from Glassdoor reviews pages (pros, cons, recommendations) for pages 1 to 5, then saves data to a CSV file.
"""
try:
with SB(uc=True) as sb:
base_url = "https://www.glassdoor.com"
start_url = "https://www.glassdoor.com/Reviews/Amazon-Reviews-E6036.htm"

all_pros = []
all_cons = []
all_recommendations = []

# Loop through pages 1 to 5
for page_num in range(1, 6):
print(f"Scraping page {page_num}...")
if page_num == 1:
sb.uc_open_with_reconnect(start_url, 6)
else:
next_page_link = f"/Reviews/Amazon-Reviews-E6036_P{page_num}.htm"
sb.open(base_url + next_page_link)
if page_num == 2:
email_input = sb.find_element('input[data-test="emailInput-input"]')
email_input.send_keys("my id")
sb.sleep(2)  # Wait for the email to be entered
continue_button = sb.find_element('button[data-test="email-form-button"]')
continue_button.click()
sb.sleep(2)  # Wait for the next page to load

password_input = sb.find_element('input[data-test="passwordInput-input"]')
password_input.send_keys("my password")
sb.sleep(2)  # Wait for the password to be entered
sign_in_button = sb.find_element('button[data-role-variant="primary"][type="submit"]')
sign_in_button.click()
sb.sleep(2)  # Wait for the sign-in process to complete
sb.uc_gui_click_captcha()
sb.sleep(4)  # Wait for the page to load

# 1. Scrape PROS
pros_elements = sb.find_elements('span[data-test="review-text-PROS"]')
pros_texts = [elem.text.strip() for elem in pros_elements if elem.text.strip()]

# 2. Scrape CONS
cons_elements = sb.find_elements('span[data-test="review-text-CONS"]')
cons_texts = [elem.text.strip() for elem in cons_elements if elem.text.strip()]

# 3.  Scrape Recommendations (Yes/No)
svg_elements = sb.find_elements(By.XPATH, '//div[contains(@id, "empReview")]/div[2]/div[2]/div[1]/svg/path')
recommendations = []
for svg in svg_elements:
d_attribute = svg.get_attribute('d')
if d_attribute:
if '8.835 17.64' in d_attribute:  # Unique part of the "Yes" SVG
recommendations.append('Yes')
elif '18.299 5.327' in d_attribute:  # Unique part of the "No" SVG
recommendations.append('No')

# Collect data from this page
all_pros.extend(pros_texts)
all_cons.extend(cons_texts)
all_recommendations.extend(recommendations)

# Debug: Print collected data for this page
print(f"Page {page_num} - Pros: {len(pros_texts)}, Cons: {len(cons_texts)}, Recommendations: {len(recommendations)}")

# Save all collected data to CSV
print("Saving data to CSV...")
with open(csv_filename, mode="w", newline="", encoding="utf-8") as f:
writer = csv.writer(f)
writer.writerow(["pros_text", "cons_text", "recommendation"])  # Add "recommendation" here

for pros, cons, rec in zip(all_pros, all_cons, all_recommendations):
writer.writerow([pros, cons, rec])

print("Scraping completed successfully!")

except Exception as e:
print(f"An error occurred: {e}")
finally:
print("Exiting function (finally block).")

# Example usage:
if __name__ == "__main__":
scrape_stackoverflow_cloudflare_and_save_csv()

Programmiererforum

XPath kann nicht mit Selenium kratzen

Post a reply

Expand view Topic review: XPath kann nicht mit Selenium kratzen