Probleme beim Identifizieren des Art-Deco-Schaltflächenelements „Connect“ in Linkedin für Scraping via Selenium
Posted: 03 Jan 2025, 06:12
Code: Select all
def send_linkedin_requests(speakers):
"""Send LinkedIn connection requests to scraped speakers."""
driver = None
try:
print("\nStarting LinkedIn connection process...")
driver = create_chrome_driver()
driver.get("https://www.linkedin.com/login")
wait = WebDriverWait(driver, 20)
wait.until(EC.presence_of_element_located((By.ID, "username"))).send_keys(LINKEDIN_EMAIL)
driver.find_element(By.ID, "password").send_keys(LINKEDIN_PASSWORD)
driver.find_element(By.XPATH, "//button[@type='submit']").click()
time.sleep(5)
for speaker in speakers:
try:
speaker_name = normalize_name(speaker["name"])
print(f"\nSearching for {speaker['name']}...")
names = speaker_name.split()
if len(names) < 2:
print(f"Skipping {speaker['name']} - insufficient name information")
continue
first_name, last_name = names[:2]
search_query = f"https://www.linkedin.com/search/results/people/?keywords={first_name}%20{last_name}"
driver.get(search_query)
time.sleep(5)
# Wait for and print number of results if shown
try:
results_count = driver.find_element(By.CSS_SELECTOR, ".search-results-container h2").text
print(f"LinkedIn shows: {results_count}")
except:
pass
try:
# Try multiple selectors to find search results
selectors = [
"div.search-results-container ul.reusable-search__entity-result-list",
"div.search-results-container div.mb3",
".search-results-container li.reusable-search__result-container",
".entity-result__item"
]
search_results = []
for selector in selectors:
try:
results = driver.find_elements(By.CSS_SELECTOR, selector)
if results:
search_results = results
print(f"Found {len(results)} results using selector: {selector}")
break
except:
continue
if not search_results:
print("No search results found using any selector")
continue
print(f"Processing {len(search_results)} results...")
matches_found = 0
for result in search_results[:5]:
try:
# Try multiple selectors for the name
name_selectors = [
".entity-result__title-text span[aria-hidden='true']",
".entity-result__title-text",
"span.actor-name",
".app-aware-link span"
]
profile_name = None
for selector in name_selectors:
try:
name_element = result.find_element(By.CSS_SELECTOR, selector)
profile_name = normalize_name(name_element.text.strip())
if profile_name:
break
except:
continue
if not profile_name:
print("Could not find name in result, skipping...")
continue
print(f"\nFound profile: {profile_name}")
print(f"Looking for: {speaker_name}")
# Check for name match
if first_name in profile_name and last_name in profile_name:
print("Name match found!")
# Look for connect button
connect_button = None
button_selectors = [
"button.artdeco-button--secondary",
"button.artdeco-button[aria-label*='Connect']",
"button.artdeco-button[aria-label*='Invite']"
]
for selector in button_selectors:
try:
buttons = result.find_elements(By.CSS_SELECTOR, selector)
for button in buttons:
if 'connect' in button.text.lower():
connect_button = button
break
except:
continue
if connect_button:
print("Found Connect button")
if input(f"Send connection request? (yes/no): ").strip().lower() == "yes":
driver.execute_script("arguments[0].click();", connect_button)
time.sleep(2)
note = (
f"{first_name.title()}, hope our paths cross soon! At Kintsugi, we're developing novel voice biomarker AI to screen "
"clinical depression and anxiety from 20 seconds of free-form speech. We were recently featured in Forbes AI 50 and Fierce 15.\n\nWarmly,\nGrace"
)
print(f"\nDraft note:\n{note}")
if input("Confirm sending note? (yes/no): ").strip().lower() == "yes":
try:
add_note_button = wait.until(EC.element_to_be_clickable((
By.XPATH, "//button[contains(text(), 'Add a note')]"
)))
driver.execute_script("arguments[0].click();", add_note_button)
time.sleep(1)
textarea = wait.until(EC.presence_of_element_located((
By.XPATH, "//textarea"
)))
textarea.send_keys(note)
send_button = wait.until(EC.element_to_be_clickable((
By.XPATH, "//button[contains(text(), 'Send')]"
)))
driver.execute_script("arguments[0].click();", send_button)
print(f"Connection request sent!")
time.sleep(3)
except Exception as e:
print(f"Error sending connection request: {e}")
else:
print("No Connect button found - may already be connected or have pending request")
matches_found += 1
if matches_found >= 3:
break
else:
print("Name does not match, skipping...")
except Exception as e:
print(f"Error processing result: {e}")
continue
except Exception as e:
print(f"Error processing search results: {e}")
except Exception as e:
print(f"Error processing {speaker['name']}: {e}")
continue
except Exception as e:
print(f"Error in LinkedIn connection process: {e}")
finally:
if driver:
driver.quit()
Ich habe mehrere Selektoren zum Suchen von Namen und die Schaltfläche „Verbinden“ hinzugefügt , besseres Debugging und bessere Fehlerbehandlung; Allerdings kann ich die Sprecher immer noch nicht mit meiner Liste zur Nachverfolgung identifizieren.
Irgendwelche Gedanken dazu, wie man die Erfassung der Übereinstimmung und der Connect-Sequenz verbessern kann? Danke!