Ich muss beim Web-Scraping eine Sequenz ausführen, die jedoch viel Zeit in Anspruch nimmt. Anstatt die einzelne Funktion einzeln aufzurufen, möchte ich sie als verschiedene Prozesse übergeben. Wenn ich das tue, muss ich mich also mit Konflikten mehrerer Treiber auseinandersetzen Wie kann ich das überwinden?
Ohne Konflikte möchte ich, dass mehrere Prozesse Aufgaben in ihrem eigenen Treiber mit geladenen vorherigen Schritten ausführen.
< strong>Hier ist der Code:
#importing important libraries
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
import time
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from test_sheet import connection_sheet
options = Options()
# options.add_experimental_option("detach", True)
# options.add_argument('headless')
driver = webdriver.Firefox()
data = {
'Device':'-',
'Processor':'-',
'Memory Capacity':'-',
'Storage Capacity':'-',
'Condition':'-',
'Battery Health':'-',
'Include Charger':'-',
'Fully Functional':'-',
'Price':'-'
}
def find_and_fetch():
global data
try:
# Use JavaScript to hide the banner
driver.execute_script("""
var banner = document.querySelector('a.message-banner');
if (banner) {
banner.style.display = 'none';
}""")
except:
print("banner disabled")
time.sleep(1)
if driver.find_element(By.XPATH,"//h5[@class='ng-binding']").text:
text = driver.find_element(By.XPATH,"//h5[@class='ng-binding']").text
# find processor elements and do forloop for it and call this function recursively
if 'Processor' in text:
processor = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
processor.append(i.get_attribute('aria-label'))
for prces in processor:
time.sleep(1)
data['Processor']=prces
print("=> processor : ",prces)
find_and_click(prces)
find_and_fetch()
# break
except Exception as e :
# print("Error While passing to processor selection!")
print(e)
data['Processor']='-'
return
# find memory capacity elements and do forloop on it and call this function recursively
if "memory capacity" in text:
capas = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
capas.append(i.get_attribute('aria-label'))
for capa in capas:
time.sleep(1)
data['Memory Capacity']=capa
print("=> memory capacity : ",capa)
find_and_click(capa)
find_and_fetch()
# break
small_back_button()
except :
print("Error While passing to memory capacity!")
data['Memory Capacity']='-'
return
# find storage capacity elements and do forloop on it and call this function recursively
if "storage capacity" in text:
storage = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
storage.append(i.get_attribute('aria-label'))
for store in storage:
time.sleep(1)
start_time = time.time()
data['Storage Capacity']=store
print("=> storage capacity : ",store)
find_and_click(store)
find_and_fetch()
# break
end_time = time.time()
time_taken = end_time-start_time
print(f"The code block took {time_taken:.4f} seconds to execute.")
driver.quit()
break
small_back_button()
except :
print("Error While passing to storage selection!")
data['Storage Capacity']='-'
return
# find condition elemetns and do forloop on it and call this function recursively
if "condition" in text:
conditions = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
conditions.append(i.get_attribute('aria-label'))
for condition in conditions:
time.sleep(1)
data['Condition']=condition
print("=> condition : ",condition)
find_and_click(condition)
next_button()
find_and_fetch()
# break
small_back_button()
except :
print("Error While passing to condition selection!")
data['Condition']='-'
return
# find battery health elements and do forloop on it and call this function recursively
if "battery health" in text:
health = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
health.append(i.get_attribute('aria-label'))
for hlth in health:
time.sleep(1)
data['Battery Health']=hlth
print("=> bettery health : ",hlth)
find_and_click(hlth)
find_and_fetch()
hlth = None
time.sleep(1)
small_back_button()
except:
print("Error While passing to battery health selection!")
data["Battery Health"]='-'
return
# find including charger elements and do forloop on it and call this function recursively
if "charger" in text:
charger = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
charger.append(i.get_attribute('aria-label'))
for crgr in charger:
time.sleep(1)
data['Include Charger'] = crgr
print("=> include charger : ",crgr)
find_and_click(crgr)
find_and_fetch()
time.sleep(2)
small_back_button()
except :
print("Error While passing to including charger selection!")
data['Include Charger']='-'
return
# find fully functional elements and do forloop on it and call this function recursively
if "fully functional" in text:
functional = []
try:
elem = driver.find_element(By.XPATH,"//div[@class='answers']")
elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']")
for i in elems:
functional.append(i.get_attribute('aria-label'))
time.sleep(1)
data['Fully Functional']=functional[0]
find_and_click(functional[0])
find_and_fetch()
small_back_button()
except :
print("Error While passing to fully functional selection!")
data['Fully Functional']='-'
return
# find final price page and call fetch details function and return all details
time.sleep(1)
if driver.find_element(By.XPATH, "//h3[@class='your-offer']").text:
text = driver.find_element(By.XPATH, "//h3[@class='your-offer']").text
if "Your device is valued at" in text:
print('in the final page')
fetch_info()
time.sleep(1)
large_back_button()
return
return
# if nothing is found on page
else:
try:
small_back_button()
except:pass
return
def find_and_click(elem):
time.sleep(1)
try:
elem = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable((By.XPATH, f"//div[@aria-label='{elem}']"))
)
driver.execute_script("arguments[0].scrollIntoView(true);", elem)
action = ActionChains(driver)
action.move_to_element(elem).click().perform()
except:print("could not find any clickable element")
return
def next_button():
time.sleep(1)
next_button = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable(
(By.XPATH, "//button[@class='button success right']")
)
)
driver.execute_script("arguments[0].scrollIntoView(true);", next_button)
action = ActionChains(driver)
action.move_to_element(next_button).click().perform()
return
def small_back_button():
time.sleep(1)
back = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable(
(By.XPATH, "//button[@class='button secondary left']")
)
)
# using this page will be scrolled to the element
driver.execute_script("arguments[0].scrollIntoView(true);", back)
# action chain to perform moving to the element and click it
action = ActionChains(driver)
action.move_to_element(back).click().perform()
return
def large_back_button():
time.sleep(1)
back = WebDriverWait(driver, 5).until(
EC.element_to_be_clickable(
(
By.XPATH,
"//button[@class='button secondary large left no-margin']",
)
)
)
# using this page will be scrolled to the element
driver.execute_script("arguments[0].scrollIntoView(true);", back)
# action chain to perform moving to the element and click it
action = ActionChains(driver)
action.move_to_element(back).click().perform()
return
def fetch_info():
global data
time.sleep(1)
elem = driver.find_element(By.XPATH ,"//div[@class='pricing-form-final-offer']").text
text = elem.split('\n')
price = text[3]
text = text[0].split(',')
device_name = text[0].split(':')
data['Device']=device_name[0]
data['Price']=price
print(data)
connection_sheet(spreadsheet_id='1Ze7Uam6GhNGYPXvXYF3TLZPydkZQ6u5l4rmdc7CxLOU',data=data,user_sheet_name='MacInfo')
return
def load_page(url):
driver.get(url)
time.sleep(2)
find_and_fetch()
load_page(url='https://www.itsworthmore.com/sell/macbook-pro-m1/macbook-pro-16-m4')
Also möchte ich den Speicher in verschiedene Treiber aufteilen, die ihr eigenes Scraping durchführen, ohne dass es zu Konflikten untereinander kommt.
Ich muss beim Web-Scraping eine Sequenz ausführen, die jedoch viel Zeit in Anspruch nimmt. Anstatt die einzelne Funktion einzeln aufzurufen, möchte ich sie als verschiedene Prozesse übergeben. Wenn ich das tue, muss ich mich also mit Konflikten mehrerer Treiber auseinandersetzen Wie kann ich das überwinden? Ohne Konflikte möchte ich, dass mehrere Prozesse Aufgaben in ihrem eigenen Treiber mit geladenen vorherigen Schritten ausführen. < strong>Hier ist der Code: [code]#importing important libraries from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.common.action_chains import ActionChains import time from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.options import Options from test_sheet import connection_sheet
def find_and_fetch(): global data try: # Use JavaScript to hide the banner driver.execute_script(""" var banner = document.querySelector('a.message-banner'); if (banner) { banner.style.display = 'none'; }""") except: print("banner disabled")
time.sleep(1) if driver.find_element(By.XPATH,"//h5[@class='ng-binding']").text: text = driver.find_element(By.XPATH,"//h5[@class='ng-binding']").text # find processor elements and do forloop for it and call this function recursively if 'Processor' in text: processor = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: processor.append(i.get_attribute('aria-label')) for prces in processor: time.sleep(1) data['Processor']=prces print("=> processor : ",prces) find_and_click(prces) find_and_fetch() # break except Exception as e : # print("Error While passing to processor selection!") print(e) data['Processor']='-' return
# find memory capacity elements and do forloop on it and call this function recursively if "memory capacity" in text: capas = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: capas.append(i.get_attribute('aria-label')) for capa in capas: time.sleep(1) data['Memory Capacity']=capa print("=> memory capacity : ",capa) find_and_click(capa) find_and_fetch() # break small_back_button() except : print("Error While passing to memory capacity!") data['Memory Capacity']='-' return
# find storage capacity elements and do forloop on it and call this function recursively if "storage capacity" in text: storage = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: storage.append(i.get_attribute('aria-label')) for store in storage: time.sleep(1) start_time = time.time() data['Storage Capacity']=store print("=> storage capacity : ",store) find_and_click(store) find_and_fetch() # break end_time = time.time() time_taken = end_time-start_time print(f"The code block took {time_taken:.4f} seconds to execute.") driver.quit() break small_back_button()
except : print("Error While passing to storage selection!") data['Storage Capacity']='-' return
# find condition elemetns and do forloop on it and call this function recursively if "condition" in text: conditions = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: conditions.append(i.get_attribute('aria-label')) for condition in conditions: time.sleep(1) data['Condition']=condition print("=> condition : ",condition) find_and_click(condition) next_button() find_and_fetch() # break small_back_button() except : print("Error While passing to condition selection!") data['Condition']='-' return
# find battery health elements and do forloop on it and call this function recursively if "battery health" in text: health = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: health.append(i.get_attribute('aria-label')) for hlth in health: time.sleep(1) data['Battery Health']=hlth print("=> bettery health : ",hlth) find_and_click(hlth) find_and_fetch() hlth = None time.sleep(1) small_back_button() except: print("Error While passing to battery health selection!") data["Battery Health"]='-' return
# find including charger elements and do forloop on it and call this function recursively if "charger" in text: charger = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: charger.append(i.get_attribute('aria-label')) for crgr in charger: time.sleep(1) data['Include Charger'] = crgr print("=> include charger : ",crgr) find_and_click(crgr) find_and_fetch() time.sleep(2) small_back_button() except : print("Error While passing to including charger selection!") data['Include Charger']='-' return
# find fully functional elements and do forloop on it and call this function recursively if "fully functional" in text: functional = [] try: elem = driver.find_element(By.XPATH,"//div[@class='answers']") elems = elem.find_elements(By.XPATH, "//div[@ng-keydown='selectAnswer($index, $event)']") for i in elems: functional.append(i.get_attribute('aria-label')) time.sleep(1) data['Fully Functional']=functional[0] find_and_click(functional[0]) find_and_fetch() small_back_button()
except : print("Error While passing to fully functional selection!") data['Fully Functional']='-' return # find final price page and call fetch details function and return all details time.sleep(1) if driver.find_element(By.XPATH, "//h3[@class='your-offer']").text: text = driver.find_element(By.XPATH, "//h3[@class='your-offer']").text if "Your device is valued at" in text: print('in the final page') fetch_info() time.sleep(1) large_back_button() return return
# if nothing is found on page else: try: small_back_button() except:pass return
def small_back_button(): time.sleep(1) back = WebDriverWait(driver, 5).until( EC.element_to_be_clickable( (By.XPATH, "//button[@class='button secondary left']") ) ) # using this page will be scrolled to the element driver.execute_script("arguments[0].scrollIntoView(true);", back) # action chain to perform moving to the element and click it action = ActionChains(driver) action.move_to_element(back).click().perform() return
def large_back_button(): time.sleep(1) back = WebDriverWait(driver, 5).until( EC.element_to_be_clickable( ( By.XPATH, "//button[@class='button secondary large left no-margin']", ) ) ) # using this page will be scrolled to the element driver.execute_script("arguments[0].scrollIntoView(true);", back) # action chain to perform moving to the element and click it action = ActionChains(driver) action.move_to_element(back).click().perform() return
def fetch_info(): global data time.sleep(1) elem = driver.find_element(By.XPATH ,"//div[@class='pricing-form-final-offer']").text text = elem.split('\n') price = text[3] text = text[0].split(',') device_name = text[0].split(':') data['Device']=device_name[0] data['Price']=price print(data) connection_sheet(spreadsheet_id='1Ze7Uam6GhNGYPXvXYF3TLZPydkZQ6u5l4rmdc7CxLOU',data=data,user_sheet_name='MacInfo') return
load_page(url='https://www.itsworthmore.com/sell/macbook-pro-m1/macbook-pro-16-m4') [/code] Also möchte ich den Speicher in verschiedene Treiber aufteilen, die ihr eigenes Scraping durchführen, ohne dass es zu Konflikten untereinander kommt.
Ich muss beim Web-Scraping eine Sequenz ausführen, die jedoch viel Zeit in Anspruch nimmt. Anstatt die einzelne Funktion einzeln aufzurufen, möchte ich sie als verschiedene Prozesse übergeben. Wenn...
Wie mache ich es so, dass jedes Bild, das ich aus Web -Scraping erzielt habe, dann in einem Ordner gespeichert wird? Ich benutze Google Colab derzeit, da ich nur Sachen übe. Ich möchte sie in meinem...
Ich arbeite an einem Web-Scraping-Projekt mit ASP.NET C# und muss CAPTCHA-Bilder automatisch verarbeiten. Konkret muss ich Text aus CAPTCHA-Bildern extrahieren und ihn ohne manuellen Eingriff in das...
Problembeschreibung:
Ich versuche, den Firmennamen und die Telefonnummer des Verkäufers von Amazon -Produktseiten mit Selenium und BeautifulSoup zu extrahieren. Mein Code navigiert zum...