Wie kann ich Multiprocessing für Web Scraping in Selenium Python verwenden, indem ich eine Funktion in einem anderen TrePython

Python-Programme
Guest
 Wie kann ich Multiprocessing für Web Scraping in Selenium Python verwenden, indem ich eine Funktion in einem anderen Tre

Post by Guest »

Ich muss beim Web-Scraping eine Sequenz ausführen, die jedoch viel Zeit in Anspruch nimmt. Anstatt die einzelne Funktion einzeln aufzurufen, möchte ich sie als verschiedene Prozesse übergeben. Wenn ich das tue, muss ich mich also mit Konflikten mehrerer Treiber auseinandersetzen Wie kann ich das überwinden?
Ohne Konflikte möchte ich, dass mehrere Prozesse Aufgaben in ihrem eigenen Treiber mit geladenen vorherigen Schritten ausführen.
< strong>Hier ist der Code:

Code: Select all

#importing important libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from test_sheet import connection_sheet

options = Options()
# options.add_experimental_option("detach", True)
# options.add_argument('headless')
driver = webdriver.Firefox()

data = {
'Device':'-',
'Processor':'-',
'Memory Capacity':'-',
'Storage Capacity':'-',
'Condition':'-',
'Battery Health':'-',
'Include Charger':'-',
'Fully Functional':'-',
'Price':'-'
}

def find_and_fetch():
global data
try:
# Use JavaScript to hide the banner
driver.execute_script("""
var banner = document.querySelector('a.message-banner');
if (banner) {
banner.style.display = 'none';
}""")
except:
print("banner disabled")

time.sleep(1)
if driver.find_element(By.XPATH,"//h5[@class='ng-binding']").text:
text = driver.find_element(By.XPATH,"//h5[@class='ng-binding']").text
# find processor elements and do forloop for it and call this function recursively
if 'Processor' in text:
processor = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
processor.append(i.get_attribute('aria-label'))
for prces in processor:
time.sleep(1)
data['Processor']=prces
print("=> processor : ",prces)
find_and_click(prces)
find_and_fetch()
# break
except Exception as e :
# print("Error While passing to processor selection!")
print(e)
data['Processor']='-'
return

# find memory capacity elements and do forloop on it and call this function recursively
if "memory capacity" in text:
capas = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
capas.append(i.get_attribute('aria-label'))
for capa in capas:
time.sleep(1)
data['Memory Capacity']=capa
print("=> memory capacity : ",capa)
find_and_click(capa)
find_and_fetch()
# break
small_back_button()
except :
print("Error While passing to memory capacity!")
data['Memory Capacity']='-'
return

# find storage capacity elements and do forloop on it and call this function recursively
if "storage capacity" in text:
storage = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
storage.append(i.get_attribute('aria-label'))
for store in storage:
time.sleep(1)
start_time = time.time()
data['Storage Capacity']=store
print("=>  storage capacity : ",store)
find_and_click(store)
find_and_fetch()
# break
end_time = time.time()
time_taken = end_time-start_time
print(f"The code block took {time_taken:.4f} seconds to execute.")
driver.quit()
break
small_back_button()

except :
print("Error While passing to storage selection!")
data['Storage Capacity']='-'
return

# find condition elemetns and do forloop on it and call this function recursively
if "condition" in text:
conditions = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
conditions.append(i.get_attribute('aria-label'))
for condition in conditions:
time.sleep(1)
data['Condition']=condition
print("=> condition : ",condition)
find_and_click(condition)
next_button()
find_and_fetch()
# break
small_back_button()
except :
print("Error While passing to condition selection!")
data['Condition']='-'
return

# find battery health elements and do forloop on it and call this function recursively
if "battery health" in text:
health = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
health.append(i.get_attribute('aria-label'))
for hlth in health:
time.sleep(1)
data['Battery Health']=hlth
print("=> bettery health : ",hlth)
find_and_click(hlth)
find_and_fetch()
hlth = None
time.sleep(1)
small_back_button()
except:
print("Error While passing to battery health selection!")
data["Battery Health"]='-'
return

# find including charger elements and do forloop on it and call this function recursively
if "charger" in text:
charger = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
charger.append(i.get_attribute('aria-label'))
for crgr in charger:
time.sleep(1)
data['Include Charger'] = crgr
print("=> include charger : ",crgr)
find_and_click(crgr)
find_and_fetch()
time.sleep(2)
small_back_button()
except :
print("Error While passing to including charger selection!")
data['Include Charger']='-'
return

# find fully functional elements and do forloop on it and call this function recursively
if "fully functional"  in text:
functional = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
functional.append(i.get_attribute('aria-label'))
time.sleep(1)
data['Fully Functional']=functional[0]
find_and_click(functional[0])
find_and_fetch()
small_back_button()

except :
print("Error While passing to fully functional selection!")
data['Fully Functional']='-'
return
# find final price page and call fetch details function and return all details
time.sleep(1)
if driver.find_element(By.XPATH, "//h3[@class='your-offer']").text:
text = driver.find_element(By.XPATH, "//h3[@class='your-offer']").text
if "Your device is valued at" in text:
print('in the final page')
fetch_info()
time.sleep(1)
large_back_button()
return
return

# if nothing is found on page
else:
try:
small_back_button()
except:pass
return

def find_and_click(elem):
time.sleep(1)
try:
elem = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.XPATH, f"//div[@aria-label='{elem}']"))
)
driver.execute_script("arguments[0].scrollIntoView(true);", elem)
action = ActionChains(driver)
action.move_to_element(elem).click().perform()
except:print("could not find any clickable element")
return

def next_button():
time.sleep(1)
next_button = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable(
(By.XPATH, "//button[@class='button success right']")
)
)
driver.execute_script("arguments[0].scrollIntoView(true);", next_button)

action = ActionChains(driver)
action.move_to_element(next_button).click().perform()
return

def small_back_button():
time.sleep(1)
back = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable(
(By.XPATH, "//button[@class='button secondary left']")
)
)
# using this page will be scrolled to the element
driver.execute_script("arguments[0].scrollIntoView(true);", back)
# action chain to perform moving to the element and click it
action = ActionChains(driver)
action.move_to_element(back).click().perform()
return

def large_back_button():
time.sleep(1)
back = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable(
(
By.XPATH,
"//button[@class='button secondary large left no-margin']",
)
)
)
# using this page will be scrolled to the element
driver.execute_script("arguments[0].scrollIntoView(true);", back)
# action chain to perform moving to the element and click it
action = ActionChains(driver)
action.move_to_element(back).click().perform()
return

def fetch_info():
global data
time.sleep(1)
elem = driver.find_element(By.XPATH ,"//div[@class='pricing-form-final-offer']").text
text = elem.split('\n')
price = text[3]
text = text[0].split(',')
device_name = text[0].split(':')
data['Device']=device_name[0]
data['Price']=price
print(data)
connection_sheet(spreadsheet_id='1Ze7Uam6GhNGYPXvXYF3TLZPydkZQ6u5l4rmdc7CxLOU',data=data,user_sheet_name='MacInfo')
return

def load_page(url):
driver.get(url)
time.sleep(2)
find_and_fetch()

load_page(url='https://www.itsworthmore.com/sell/macbook-pro-m1/macbook-pro-16-m4')
Also möchte ich den Speicher in verschiedene Treiber aufteilen, die ihr eigenes Scraping durchführen, ohne dass es zu Konflikten untereinander kommt.

Quick Reply

Change Text Case: 
   
  • Similar Topics
    Replies
    Views
    Last post