Probleme beim Identifizieren des Art-Deco-Schaltflächenelements „Connect“ in Linkedin für Scraping via SeleniumPython

Python-Programme
Guest
 Probleme beim Identifizieren des Art-Deco-Schaltflächenelements „Connect“ in Linkedin für Scraping via Selenium

Post by Guest »

Code: Select all

def send_linkedin_requests(speakers):
"""Send LinkedIn connection requests to scraped speakers."""
driver = None
try:
print("\nStarting LinkedIn connection process...")
driver = create_chrome_driver()
driver.get("https://www.linkedin.com/login")

wait = WebDriverWait(driver, 20)
wait.until(EC.presence_of_element_located((By.ID, "username"))).send_keys(LINKEDIN_EMAIL)
driver.find_element(By.ID, "password").send_keys(LINKEDIN_PASSWORD)
driver.find_element(By.XPATH, "//button[@type='submit']").click()
time.sleep(5)

for speaker in speakers:
try:
speaker_name = normalize_name(speaker["name"])
print(f"\nSearching for {speaker['name']}...")

names = speaker_name.split()
if len(names) <  2:
print(f"Skipping {speaker['name']} - insufficient name information")
continue

first_name, last_name = names[:2]
search_query = f"https://www.linkedin.com/search/results/people/?keywords={first_name}%20{last_name}"
driver.get(search_query)
time.sleep(5)

# Wait for and print number of results if shown
try:
results_count = driver.find_element(By.CSS_SELECTOR, ".search-results-container h2").text
print(f"LinkedIn shows: {results_count}")
except:
pass

try:
# Try multiple selectors to find search results
selectors = [
"div.search-results-container ul.reusable-search__entity-result-list",
"div.search-results-container div.mb3",
".search-results-container li.reusable-search__result-container",
".entity-result__item"
]

search_results = []
for selector in selectors:
try:
results = driver.find_elements(By.CSS_SELECTOR, selector)
if results:
search_results = results
print(f"Found {len(results)} results using selector: {selector}")
break
except:
continue

if not search_results:
print("No search results found using any selector")
continue

print(f"Processing {len(search_results)} results...")
matches_found = 0

for result in search_results[:5]:
try:
# Try multiple selectors for the name
name_selectors = [
".entity-result__title-text span[aria-hidden='true']",
".entity-result__title-text",
"span.actor-name",
".app-aware-link span"
]

profile_name = None
for selector in name_selectors:
try:
name_element = result.find_element(By.CSS_SELECTOR, selector)
profile_name = normalize_name(name_element.text.strip())
if profile_name:
break
except:
continue

if not profile_name:
print("Could not find name in result, skipping...")
continue

print(f"\nFound profile: {profile_name}")
print(f"Looking for:  {speaker_name}")

# Check for name match
if first_name in profile_name and last_name in profile_name:
print("Name match found!")

# Look for connect button
connect_button = None
button_selectors = [
"button.artdeco-button--secondary",
"button.artdeco-button[aria-label*='Connect']",
"button.artdeco-button[aria-label*='Invite']"
]

for selector in button_selectors:
try:
buttons = result.find_elements(By.CSS_SELECTOR, selector)
for button in buttons:
if 'connect' in button.text.lower():
connect_button = button
break
except:
continue

if connect_button:
print("Found Connect button")
if input(f"Send connection request? (yes/no): ").strip().lower() == "yes":
driver.execute_script("arguments[0].click();", connect_button)
time.sleep(2)

note = (
f"{first_name.title()}, hope our paths cross soon! At Kintsugi, we're developing novel voice biomarker AI to screen "
"clinical depression and anxiety from 20 seconds of free-form speech.  We were recently featured in Forbes AI 50 and Fierce 15.\n\nWarmly,\nGrace"
)
print(f"\nDraft note:\n{note}")

if input("Confirm sending note? (yes/no): ").strip().lower() == "yes":
try:
add_note_button = wait.until(EC.element_to_be_clickable((
By.XPATH, "//button[contains(text(), 'Add a note')]"
)))
driver.execute_script("arguments[0].click();", add_note_button)
time.sleep(1)

textarea = wait.until(EC.presence_of_element_located((
By.XPATH, "//textarea"
)))
textarea.send_keys(note)

send_button = wait.until(EC.element_to_be_clickable((
By.XPATH, "//button[contains(text(), 'Send')]"
)))
driver.execute_script("arguments[0].click();", send_button)
print(f"Connection request sent!")
time.sleep(3)
except Exception as e:
print(f"Error sending connection request: {e}")
else:
print("No Connect button found - may already be connected or have pending request")

matches_found += 1
if matches_found >= 3:
break
else:
print("Name does not match, skipping...")

except Exception as e:
print(f"Error processing result: {e}")
continue

except Exception as e:
print(f"Error processing search results: {e}")

except Exception as e:
print(f"Error processing {speaker['name']}: {e}")
continue

except Exception as e:
print(f"Error in LinkedIn connection process: {e}")

finally:
if driver:
driver.quit()
Die obige Funktion wird verwendet, um mithilfe der Personensuche und der Schaltflächenauswahl „Verbinden“ eine Verbindung zu einer Liste von Rednern auf LinkedIn herzustellen. Allerdings werden Personen, obwohl sie in den Suchergebnissen angezeigt werden, nicht identifiziert und es wird „Keine ersten Ergebnisse“ zurückgegeben.
Ich habe mehrere Selektoren zum Suchen von Namen und die Schaltfläche „Verbinden“ hinzugefügt , besseres Debugging und bessere Fehlerbehandlung; Allerdings kann ich die Sprecher immer noch nicht mit meiner Liste zur Nachverfolgung identifizieren.
Irgendwelche Gedanken dazu, wie man die Erfassung der Übereinstimmung und der Connect-Sequenz verbessern kann? Danke!

Quick Reply

Change Text Case: 
   
  • Similar Topics
    Replies
    Views
    Last post