from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from undetected_chromedriver import Chrome
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.common.action_chains import ActionChains
from typing import cast
from collections.abc import Iterator
import pandas as pd
# Constants
URL = "https://www.zoopla.co.uk/house-prices/north-east-england/?new_homes=include&q=North+East&orig_q=england&identifier=england&view_type=list&search_source=home&recent_search=true"
TIMEOUT = 5
# Helper function to extract text from a WebElement
def etext(e: WebElement) -> str:
if e:
if t := e.text.strip():
return t
if (p := e.get_property("textContent")) and isinstance(p, str):
return p.strip()
return ""
# Click the WebElement
def click(driver: WebDriver, e: WebElement) -> None:
ActionChains(driver).click(e).perform()
# Get all WebElements that match the given CSS selector
def get_all(driver: WebDriver, css: str) -> Iterator[WebElement]:
wait = WebDriverWait(driver, TIMEOUT)
sel = (By.CSS_SELECTOR, css)
try:
yield from wait.until(EC.presence_of_all_elements_located(sel))
except TimeoutException:
pass
# Look for the Next button and click it
def click_next(driver: WebDriver) -> None:
for a in get_all(driver, "a[aria-live=polite] > div > div:nth-child(2)"):
if etext(a) == "Next":
click(driver, a)
break
# Handle cookie consent popup
def click_through(driver: WebDriver) -> None:
try:
wait = WebDriverWait(driver, TIMEOUT)
shadow_root = driver.find_element(By.ID, "usercentrics-root").shadow_root
button = wait.until(EC.element_to_be_clickable(
(By.CSS_SELECTOR, "button[data-testid=uc-deny-all-button]")
))
click(driver, button)
except Exception:
pass # Ignore if cookies popup is not present
# Scrape data from each page
def scrape_page(driver: WebDriver) -> list[dict]:
result = []
for house in get_all(driver, "div[data-testid=result-item]"):
try:
address = etext(house.find_element(By.CSS_SELECTOR, "h2"))
Number_of_rooms = etext(house.find_element(By.CSS_SELECTOR, "._1pbf8i51 div:nth-child(2) p"))
result.append({"Address": address,"Number of rooms": Number_of_rooms})
except NoSuchElementException:
continue # Skip missing elements
return result
# Main script execution
if __name__ == "__main__":
with Chrome() as driver:
driver.get(URL)
click_through(driver) # Handle cookies
all_results = []
prev_url = ""
npages = 0
while prev_url != driver.current_url: # Check for Cloudflare intervention
prev_url = driver.current_url
all_results.extend(scrape_page(driver))
click_next(driver)
npages += 1
# Convert results to DataFrame
df = pd.DataFrame(all_results)
print(df) # Display results
print(f"Processed {npages} pages")
< /code>
Aber wenn Sie auf eine Auflistung klicken, finden Sie die EPC -Bewertung, ich möchte auch dies kratzen: < /p>
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
import pandas as pd # Ensure you import pandas
import time
# Initialize WebDriver
driver = webdriver.Chrome()
# Open listing URL
listing_url = "https://www.zoopla.co.uk/property/uprn/4510108329/"
driver.get(listing_url)
# Wait for the main content to load (adjust time as needed)
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "_1vhryas1"))
)
# Initialize result list to store data
result = []
# Find all house elements
houses = driver.find_elements(By.CLASS_NAME, "_1vhryas1")
# Extract and print addresses
for house in houses:
try:
item = {
"EPC rating": house.find_element(By.XPATH,'//*[@id="main-content"]/div[1]/div/div/div[1]/div/div[2]/div[4]/div/div[3]/div').text
# "Flood risk": house.find_element(By.CSS_SELECTOR, '#main-content > div.z3kgis2 > div > div > div._1365ry20 > div > div._10b7mfk0 > dialog:nth-child(5) > div > div._1eyq7or1d._1eyq7or1f._1eyq7or1q > div > div._10m149w1._10m149w2 > div._10m149w3 > h3 > div > div').text
}
result.append(item) # Append to the result list
except Exception as e:
print(f"Error extracting address or date: {e}")
# Store the result into a dataframe after the loop
df = pd.DataFrame(result)
# Show the result
print(df)
# Close the driver
driver.quit()
Wenn Sie den Code separat ausführen, werden Sie feststellen >
Ich möchte das Ergebnis so, dass es alle Adressen, Zahlen von Räumen und EPC -Bewertungen für alle Eigenschaften in tabellischer Form zurückgibt. < /p>
Ich habe einige Informationen von einer Haupt -URL abgeschafft, die die Adresse und die Anzahl der Räume anzeigt, die mehrere Seiten schleifen: < /p> [code]from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from undetected_chromedriver import Chrome from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.remote.webdriver import WebDriver from selenium.common.exceptions import TimeoutException, NoSuchElementException from selenium.webdriver.common.action_chains import ActionChains from typing import cast from collections.abc import Iterator import pandas as pd
# Helper function to extract text from a WebElement def etext(e: WebElement) -> str: if e: if t := e.text.strip(): return t if (p := e.get_property("textContent")) and isinstance(p, str): return p.strip() return ""
# Get all WebElements that match the given CSS selector def get_all(driver: WebDriver, css: str) -> Iterator[WebElement]: wait = WebDriverWait(driver, TIMEOUT) sel = (By.CSS_SELECTOR, css) try: yield from wait.until(EC.presence_of_all_elements_located(sel)) except TimeoutException: pass
# Look for the Next button and click it def click_next(driver: WebDriver) -> None: for a in get_all(driver, "a[aria-live=polite] > div > div:nth-child(2)"): if etext(a) == "Next": click(driver, a) break
# Handle cookie consent popup def click_through(driver: WebDriver) -> None: try: wait = WebDriverWait(driver, TIMEOUT) shadow_root = driver.find_element(By.ID, "usercentrics-root").shadow_root button = wait.until(EC.element_to_be_clickable( (By.CSS_SELECTOR, "button[data-testid=uc-deny-all-button]") )) click(driver, button) except Exception: pass # Ignore if cookies popup is not present
# Scrape data from each page def scrape_page(driver: WebDriver) -> list[dict]: result = [] for house in get_all(driver, "div[data-testid=result-item]"): try: address = etext(house.find_element(By.CSS_SELECTOR, "h2")) Number_of_rooms = etext(house.find_element(By.CSS_SELECTOR, "._1pbf8i51 div:nth-child(2) p")) result.append({"Address": address,"Number of rooms": Number_of_rooms}) except NoSuchElementException: continue # Skip missing elements return result
# Main script execution if __name__ == "__main__": with Chrome() as driver: driver.get(URL) click_through(driver) # Handle cookies
all_results = [] prev_url = "" npages = 0
while prev_url != driver.current_url: # Check for Cloudflare intervention prev_url = driver.current_url all_results.extend(scrape_page(driver)) click_next(driver) npages += 1
# Convert results to DataFrame df = pd.DataFrame(all_results) print(df) # Display results print(f"Processed {npages} pages") < /code> Aber wenn Sie auf eine Auflistung klicken, finden Sie die EPC -Bewertung, ich möchte auch dies kratzen: < /p> from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium import webdriver import pandas as pd # Ensure you import pandas import time
# Open listing URL listing_url = "https://www.zoopla.co.uk/property/uprn/4510108329/" driver.get(listing_url)
# Wait for the main content to load (adjust time as needed) WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CLASS_NAME, "_1vhryas1")) )
# Initialize result list to store data result = []
# Find all house elements houses = driver.find_elements(By.CLASS_NAME, "_1vhryas1")
# Extract and print addresses for house in houses: try: item = { "EPC rating": house.find_element(By.XPATH,'//*[@id="main-content"]/div[1]/div/div/div[1]/div/div[2]/div[4]/div/div[3]/div').text # "Flood risk": house.find_element(By.CSS_SELECTOR, '#main-content > div.z3kgis2 > div > div > div._1365ry20 > div > div._10b7mfk0 > dialog:nth-child(5) > div > div._1eyq7or1d._1eyq7or1f._1eyq7or1q > div > div._10m149w1._10m149w2 > div._10m149w3 > h3 > div > div').text } result.append(item) # Append to the result list except Exception as e: print(f"Error extracting address or date: {e}")
# Store the result into a dataframe after the loop df = pd.DataFrame(result)
# Show the result print(df)
# Close the driver driver.quit() [/code] Wenn Sie den Code separat ausführen, werden Sie feststellen > Ich möchte das Ergebnis so, dass es alle Adressen, Zahlen von Räumen und EPC -Bewertungen für alle Eigenschaften in tabellischer Form zurückgibt. < /p>
Ich habe einige Informationen von einer Haupt -URL abgeschafft, die die Adresse und die Anzahl der Räume anzeigt, die mehrere Seiten schleifen:
from selenium.webdriver.support.ui import...
CSS für das aktuelle Projekt My-Dasboard funktioniert nicht im Browser, um das Projekt über NPM Start . mal. Wie in der neuesten Version in node_modules/tailwindcss/lib/cli.js libcs existiert...
Ich habe Schwierigkeiten, die gesammelten Informationen von der API in mein Haupt-StoryBoard zu übertragen. Ich habe eine Funktion, die sich mit dem Sammeln aller benötigten Informationen befasst,...
Ich mache eine einzelne Wahrnehmung, in der ich mein Modell vorhersagen muss, ob der Benutzer ein T oder ein L mit einigen Schaltflächen auf einer Website herstellt. RN Ich versuche, die Gewichte und...
Problem: Lesen Sie die Chrom- /Edge-Cookies zum Extrahieren der XSRF-TOOKE- und .aspnet.cookies-Werte eines Benutzers im Browser, der von einer Desktop-Anwendung gestartet wurde, und speichern Sie...