from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from undetected_chromedriver import Chrome
from selenium.webdriver.remote.webelement import WebElement
from selenium.webdriver.remote.webdriver import WebDriver
from selenium.common.exceptions import TimeoutException, NoSuchElementException
from selenium.webdriver.common.action_chains import ActionChains
from typing import cast
from collections.abc import Iterator
import pandas as pd
# Constants
URL = "https://www.zoopla.co.uk/house-prices/north-east-england/?new_homes=include&q=North+East&orig_q=england&identifier=england&view_type=list&search_source=home&recent_search=true"
TIMEOUT = 5
# Helper function to extract text from a WebElement
def etext(e: WebElement) -> str:
if e:
if t := e.text.strip():
return t
if (p := e.get_property("textContent")) and isinstance(p, str):
return p.strip()
return ""
# Click the WebElement
def click(driver: WebDriver, e: WebElement) -> None:
ActionChains(driver).click(e).perform()
# Get all WebElements that match the given CSS selector
def get_all(driver: WebDriver, css: str) -> Iterator[WebElement]:
wait = WebDriverWait(driver, TIMEOUT)
sel = (By.CSS_SELECTOR, css)
try:
yield from wait.until(EC.presence_of_all_elements_located(sel))
except TimeoutException:
pass
# Look for the Next button and click it
def click_next(driver: WebDriver) -> None:
for a in get_all(driver, "a[aria-live=polite] > div > div:nth-child(2)"):
if etext(a) == "Next":
click(driver, a)
break
# Handle cookie consent popup
def click_through(driver: WebDriver) -> None:
try:
wait = WebDriverWait(driver, TIMEOUT)
shadow_root = driver.find_element(By.ID, "usercentrics-root").shadow_root
button = wait.until(EC.element_to_be_clickable(
(By.CSS_SELECTOR, "button[data-testid=uc-deny-all-button]")
))
click(driver, button)
except Exception:
pass # Ignore if cookies popup is not present
# Scrape data from each page
def scrape_page(driver: WebDriver) -> list[dict]:
result = []
for house in get_all(driver, "div[data-testid=result-item]"):
try:
address = etext(house.find_element(By.CSS_SELECTOR, "h2"))
Number_of_rooms = etext(house.find_element(By.CSS_SELECTOR, "._1pbf8i51 div:nth-child(2) p"))
result.append({"Address": address,"Number of rooms": Number_of_rooms})
except NoSuchElementException:
continue # Skip missing elements
return result
# Main script execution
if __name__ == "__main__":
with Chrome() as driver:
driver.get(URL)
click_through(driver) # Handle cookies
all_results = []
prev_url = ""
npages = 0
while prev_url != driver.current_url: # Check for Cloudflare intervention
prev_url = driver.current_url
all_results.extend(scrape_page(driver))
click_next(driver)
npages += 1
# Convert results to DataFrame
df = pd.DataFrame(all_results)
print(df) # Display results
print(f"Processed {npages} pages")
< /code>
Aber wenn Sie auf eine Auflistung klicken, finden Sie die EPC -Bewertung, ich möchte auch dies kratzen: < /p>
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium import webdriver
import pandas as pd # Ensure you import pandas
import time
# Initialize WebDriver
driver = webdriver.Chrome()
# Open listing URL
listing_url = "https://www.zoopla.co.uk/property/uprn/4510108329/"
driver.get(listing_url)
# Wait for the main content to load (adjust time as needed)
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.CLASS_NAME, "_1vhryas1"))
)
# Initialize result list to store data
result = []
# Find all house elements
houses = driver.find_elements(By.CLASS_NAME, "_1vhryas1")
# Extract and print addresses
for house in houses:
try:
item = {
"EPC rating": house.find_element(By.XPATH,'//*[@id="main-content"]/div[1]/div/div/div[1]/div/div[2]/div[4]/div/div[3]/div').text
# "Flood risk": house.find_element(By.CSS_SELECTOR, '#main-content > div.z3kgis2 > div > div > div._1365ry20 > div > div._10b7mfk0 > dialog:nth-child(5) > div > div._1eyq7or1d._1eyq7or1f._1eyq7or1q > div > div._10m149w1._10m149w2 > div._10m149w3 > h3 > div > div').text
}
result.append(item) # Append to the result list
except Exception as e:
print(f"Error extracting address or date: {e}")
# Store the result into a dataframe after the loop
df = pd.DataFrame(result)
# Show the result
print(df)
# Close the driver
driver.quit()
Wenn Sie den Code separat ausführen, werden Sie feststellen >
Ich möchte das Ergebnis so, dass es alle Adressen, Zahlen von Räumen und EPC -Bewertungen für alle Eigenschaften in tabellischer Form zurückgibt. < /p>
Ich habe einige Informationen von einer Haupt -URL abgeschafft, die die Adresse und die Anzahl der Räume anzeigt, die mehrere Seiten schleifen: < /p> [code]from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.common.by import By from undetected_chromedriver import Chrome from selenium.webdriver.remote.webelement import WebElement from selenium.webdriver.remote.webdriver import WebDriver from selenium.common.exceptions import TimeoutException, NoSuchElementException from selenium.webdriver.common.action_chains import ActionChains from typing import cast from collections.abc import Iterator import pandas as pd
# Helper function to extract text from a WebElement def etext(e: WebElement) -> str: if e: if t := e.text.strip(): return t if (p := e.get_property("textContent")) and isinstance(p, str): return p.strip() return ""
# Get all WebElements that match the given CSS selector def get_all(driver: WebDriver, css: str) -> Iterator[WebElement]: wait = WebDriverWait(driver, TIMEOUT) sel = (By.CSS_SELECTOR, css) try: yield from wait.until(EC.presence_of_all_elements_located(sel)) except TimeoutException: pass
# Look for the Next button and click it def click_next(driver: WebDriver) -> None: for a in get_all(driver, "a[aria-live=polite] > div > div:nth-child(2)"): if etext(a) == "Next": click(driver, a) break
# Handle cookie consent popup def click_through(driver: WebDriver) -> None: try: wait = WebDriverWait(driver, TIMEOUT) shadow_root = driver.find_element(By.ID, "usercentrics-root").shadow_root button = wait.until(EC.element_to_be_clickable( (By.CSS_SELECTOR, "button[data-testid=uc-deny-all-button]") )) click(driver, button) except Exception: pass # Ignore if cookies popup is not present
# Scrape data from each page def scrape_page(driver: WebDriver) -> list[dict]: result = [] for house in get_all(driver, "div[data-testid=result-item]"): try: address = etext(house.find_element(By.CSS_SELECTOR, "h2")) Number_of_rooms = etext(house.find_element(By.CSS_SELECTOR, "._1pbf8i51 div:nth-child(2) p")) result.append({"Address": address,"Number of rooms": Number_of_rooms}) except NoSuchElementException: continue # Skip missing elements return result
# Main script execution if __name__ == "__main__": with Chrome() as driver: driver.get(URL) click_through(driver) # Handle cookies
all_results = [] prev_url = "" npages = 0
while prev_url != driver.current_url: # Check for Cloudflare intervention prev_url = driver.current_url all_results.extend(scrape_page(driver)) click_next(driver) npages += 1
# Convert results to DataFrame df = pd.DataFrame(all_results) print(df) # Display results print(f"Processed {npages} pages") < /code> Aber wenn Sie auf eine Auflistung klicken, finden Sie die EPC -Bewertung, ich möchte auch dies kratzen: < /p> from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium import webdriver import pandas as pd # Ensure you import pandas import time
# Open listing URL listing_url = "https://www.zoopla.co.uk/property/uprn/4510108329/" driver.get(listing_url)
# Wait for the main content to load (adjust time as needed) WebDriverWait(driver, 10).until( EC.presence_of_element_located((By.CLASS_NAME, "_1vhryas1")) )
# Initialize result list to store data result = []
# Find all house elements houses = driver.find_elements(By.CLASS_NAME, "_1vhryas1")
# Extract and print addresses for house in houses: try: item = { "EPC rating": house.find_element(By.XPATH,'//*[@id="main-content"]/div[1]/div/div/div[1]/div/div[2]/div[4]/div/div[3]/div').text # "Flood risk": house.find_element(By.CSS_SELECTOR, '#main-content > div.z3kgis2 > div > div > div._1365ry20 > div > div._10b7mfk0 > dialog:nth-child(5) > div > div._1eyq7or1d._1eyq7or1f._1eyq7or1q > div > div._10m149w1._10m149w2 > div._10m149w3 > h3 > div > div').text } result.append(item) # Append to the result list except Exception as e: print(f"Error extracting address or date: {e}")
# Store the result into a dataframe after the loop df = pd.DataFrame(result)
# Show the result print(df)
# Close the driver driver.quit() [/code] Wenn Sie den Code separat ausführen, werden Sie feststellen > Ich möchte das Ergebnis so, dass es alle Adressen, Zahlen von Räumen und EPC -Bewertungen für alle Eigenschaften in tabellischer Form zurückgibt. < /p>
Ich habe einige Informationen von einer Haupt -URL abgeschafft, die die Adresse und die Anzahl der Räume anzeigt, die mehrere Seiten schleifen:
from selenium.webdriver.support.ui import...
CSS für das aktuelle Projekt My-Dasboard funktioniert nicht im Browser, um das Projekt über NPM Start . mal. Wie in der neuesten Version in node_modules/tailwindcss/lib/cli.js libcs existiert...
Ich mache eine einzelne Wahrnehmung, in der ich mein Modell vorhersagen muss, ob der Benutzer ein T oder ein L mit einigen Schaltflächen auf einer Website herstellt. RN Ich versuche, die Gewichte und...
Ich muss einen Flow , ich weiß nicht, wie man dem Text entkommt, um die richtige Syntax anzuzeigen)
Mein aktueller Code:
COMPOSE/View -Ebene:
var filterVal by remember { mutableStateOf( ) }
var...
Da ich neu im Selen -Tool bin, ist die Tabelle, aus der ich Daten kriechen möchte, ein AG -Netz, das zu Problemen führt, die beim Versuch, Daten aus jedem Zellwert Gridcell aus der Zeile zu kriechen....