Warum sind Standortdetails unterschiedlich, nachdem Daten aus Google Maps gekratzt sind?

Post a reply

Smilies
:) :( :oops: :chelo: :roll: :wink: :muza: :sorry: :angel: :read: *x) :clever:
View more smilies

BBCode is ON
[img] is ON
[flash] is OFF
[url] is ON
Smilies are ON

Topic review
   

Expand view Topic review: Warum sind Standortdetails unterschiedlich, nachdem Daten aus Google Maps gekratzt sind?

by Anonymous » 26 Aug 2025, 09:01

In diesem Web -Schaber kratzt es die Geschäftsdaten von Google Maps und speichert die Daten in der Excel -Datei, aber in diesen Excel -Blättern unterscheiden sich Breitengrad und Längengrad immer von den tatsächlichen, die es in Google Maps gezeigt hat. Code:

Code: Select all

def run_scraper(search_term: str, total_results: int = 100) -> list[dict]:
business_list = BusinessList()

with sync_playwright() as p:
browser = p.chromium.launch(headless=True)
page = browser.new_page()
page.goto("https://www.google.com/maps", timeout=60000)

print(f"Searching for: {search_term}")
search_input = page.locator('//input[@id="searchboxinput"]')
search_input.fill(search_term)
page.keyboard.press("Enter")

# Wait for initial results
page.wait_for_selector('//a[contains(@href, "https://www.google.com/maps/place")]', timeout=20000)

scrollable = page.locator('div[role="main"] div[aria-label]')
scroll_pause_time = 1.5
prev_count = 0
max_attempts = 20
attempts = 0

# Scroll loop until we get at least `total_results` or exhaust attempts
while True:
page.mouse.wheel(0, 5000)
time.sleep(scroll_pause_time)

all_cards = page.locator('//div[contains(@class, "Nv2PK")]')
count = all_cards.count()

if count >= total_results or attempts >= max_attempts:
break

if count == prev_count:
attempts += 1
else:
attempts = 0
prev_count = count

print(f"Total business cards loaded: {count}")

listings = page.locator('//div[contains(@class, "Nv2PK")]').all()[:total_results]

for i, listing in enumerate(listings, start=1):
try:
listing.scroll_into_view_if_needed()
listing.click()
time.sleep(4)

business = Business()

try:
business.name = page.locator('//h1[contains(@class, "lfPIob")]').inner_text()
except:
business.name = "N/A"

try:
business.address = page.locator(
'//button[@data-item-id="address"]//div[contains(@class, "fontBodyMedium")]'
).inner_text()
except:
business.address = "N/A"

try:
business.phone_number = page.locator(
'//button[contains(@data-item-id, "phone:tel:")]//div[contains(@class, "fontBodyMedium")]'
).inner_text()
except:
business.phone_number = "N/A"

try:
business.website = page.locator('//a[@data-item-id="authority"]').get_attribute("href") or "N/A"
except:
business.website = "N/A"

business.latitude, business.longitude = extract_coordinates_from_url(page.url)

business_list.business_list.append(business)

print(f"Scraped {i}: {business.name}, {business.address}, {business.phone_number}, {business.website}")

except Exception as e:
print(f"Error scraping listing {i}: {e}")
continue

browser.close()

Top