Ich versuche, zu Forschungszwecken URLs von einer TikTok-Benutzerseite abzurufen. Es funktioniert gut mit headless=false, aber wenn ich es auf headless=true setze, löst es eine Captcha-Überprüfung aus. Irgendwelche Ideen, wie man das beheben kann? Ich verwende Dramatiker
import time
import random
from playwright.sync_api import sync_playwright
def fetch_tiktok_video_urls(profile_url, max_videos):
video_urls = []
try:
with sync_playwright() as p:
browser = p.chromium.launch_persistent_context(
user_data_dir="./user_data", headless=False
)
page = browser.new_page()
page.goto(profile_url)
page.wait_for_load_state('load')
# page.screenshot(path="tiktok_profile_screenshot.png")
simulate_human_browsing(page)
# page.screenshot(path="tiktok_profile_scroll.png")
while len(video_urls) < max_videos:
page.mouse.wheel(0, random.randint(200, 500))
simulate_human_browsing(page)
# page.screenshot(path="tiktok_profile_video_scroll.png")
video_elements = page.locator("a[href*='/video/']")
links = video_elements.evaluate_all("elements => elements.map(e => e.href)")
for link in links:
if link not in video_urls:
video_urls.append(link)
if len(video_urls) >= max_videos:
break
browser.close()
except Exception as e:
print(f"Error: {e}")
return video_urls[:max_videos]
def simulate_human_browsing(page):
# Random scrolling
scroll_steps = random.randint(3, 8)
for _ in range(scroll_steps):
page.mouse.wheel(0, random.randint(200, 500))
time.sleep(random.uniform(1.5, 3.5))
# Random mouse movements
for _ in range(random.randint(2, 5)):
page.mouse.move(
random.randint(0, 800), random.randint(0, 600),
steps=random.randint(5, 15)
)
time.sleep(random.uniform(0.5, 1.5))
if __name__ == "__main__":
tiktok_url = "https://www.tiktok.com/@nail_videos2024"
urls = fetch_tiktok_video_urls(tiktok_url, max_videos=30)
print("Fetched URLs:", urls)
Ich habe versucht, Benutzeragenten wie folgt hinzuzufügen:
browser = p.chromium.launch_persistent_context(
user_data_dir="./user_data", headless=True,
args=[
"--no-sandbox",
'--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
]
)
CAPTCHA-Probleme im Headless-Modus beim Abrufen von TikTok-URLs ⇐ Python
-
- Similar Topics
- Replies
- Views
- Last post