Web -Scraping, um die erste Auflistung mit Python zu erfassenPython

Python-Programme
Anonymous
 Web -Scraping, um die erste Auflistung mit Python zu erfassen

Post by Anonymous »

Ich versuche auf Python geschätzte Umsätze durchzuziehen. 1]
Ich versuche, den Umsatz von Python geschätzt zu haben.
Das habe ich derzeit geschrieben, aber es zieht keine Ergebnisse durch. Ich bin mir nicht sicher, wie es die erste Auflistung finden würde, da es mehrere

Code: Select all

def fetch_sas_est_sales(driver, sas_url):
"""Fetch estimated sales from SAS for the first product listing."""
try:
driver.get(sas_url)
time.sleep(3)  # Allow page to load
soup = BeautifulSoup(driver.page_source, "html.parser")

# ✅ Locate the first product listing
first_product = soup.select_one("li[style='cursor: pointer;']")
if first_product:
est_sales_tag = first_product.select_one('.panel-body.qi-estimated-sales-pnl.criteria-info .productList-estimated-sales')
if est_sales_tag:
est_sales_match = re.search(r"(\d+)/mo", est_sales_tag.text)
if est_sales_match:
return int(est_sales_match.group(1))

return "N/A"

except Exception as e:
print(f"⚠️ Error fetching SAS estimated sales: {e}")
return "N/A"
Es wird nur N/A in den Discord -Server für diese Funktion durchläuft. > Die Site erfordert sich anmelden, damit ich Selenium verwendet habe, um mich bei der Ausführung des Skripts zu protokollieren. Letzte Zeichen des Codes, um zu sehen, ob er etwas erkennt. < /p>

Code: Select all

from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException, NoSuchElementException
import time

def fetch_sas_est_sales(driver, sas_url):
"""Fetch estimated sales from SAS by specifically targeting
and its span."""

try:
driver.get(sas_url)

# Wait for page to load
WebDriverWait(driver, 30).until(EC.presence_of_element_located((By.TAG_NAME, "body")))
print("✅ Page loaded. Now scrolling...")

# 🔄 Scroll multiple times to trigger lazy-loading elements
for _ in range(3):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)

# 🔍 Print part of the body content after scrolling for debugging
page_source = driver.find_element(By.TAG_NAME, "body").get_attribute("innerHTML")
print("\n🔍 DEBUG: FULL PAGE BODY AFTER SCROLLING (First 2000 chars):\n" + page_source[:2000])

# 🔍 Look for the estimated sales panel
try:
all_sales_panels = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located(
(By.CSS_SELECTOR, "div.panel-body.qi-estimated-sales-pnl.criteria-info")
)
)
print(f"🔍 Found {len(all_sales_panels)} estimated sales panels.")

for i, panel in enumerate(all_sales_panels):
try:
est_sales_element = panel.find_element(
By.CSS_SELECTOR, "span.productList-estimated-sales.pseudolink"
)
estimated_sales = est_sales_element.text.strip()

if estimated_sales:
print(f"✅ Extracted estimated sales from panel {i+1}: {estimated_sales}")
return estimated_sales
except NoSuchElementException:
print(f"⚠️ Estimated sales span not found in panel {i+1}.")

except TimeoutException:
print("⚠️ No estimated sales panels found.  Retrying after refreshing page...")
driver.refresh()
time.sleep(5)

# Scroll again after refresh
for _ in range(3):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)

# Final attempt to find sales panels
try:
all_sales_panels = WebDriverWait(driver, 10).until(
EC.presence_of_all_elements_located(
(By.CSS_SELECTOR, "div.panel-body.qi-estimated-sales-pnl.criteria-info")
)
)
print(f"🔍 Found {len(all_sales_panels)} estimated sales panels after refresh.")

for i, panel in enumerate(all_sales_panels):
try:
est_sales_element = panel.find_element(
By.CSS_SELECTOR, "span.productList-estimated-sales.pseudolink"
)
estimated_sales = est_sales_element.text.strip()

if estimated_sales:
print(f"✅ Extracted estimated sales from panel {i+1} after refresh: {estimated_sales}")
return estimated_sales
except NoSuchElementException:
print(f"⚠️ Estimated sales span still not found in panel {i+1} after refresh.")

except TimeoutException:
print("⚠️ Still could not find the estimated sales panel after refresh. Returning 'N/A'.")

return "N/A"

except Exception as e:
print(f"⚠️ Error fetching SAS data: {e}")
return "N/A"
< /code>
Wenn ausgeführt wird, wird erfolgreich der erste und letzte Code der Seite angezeigt, jedoch lautet ⚠️ geschätztes Verkaufspanel auf dieser Seite. < /p>
Dies ist das ist das Element, das ich möchte < /p>



Est. Sales

10/mo



Quick Reply

Change Text Case: 
   
  • Similar Topics
    Replies
    Views
    Last post