Selenium script - stuck - could someone take a look?
Veek M
veekm at foo.com
Sat May 29 05:40:35 EDT 2021
Script: http://paste.debian.net/1199271/
It mostly works but line 78 is supposed to extract
<span class="price-unit">100 pieces / lot</span> No matter what I
try it's failed and I DON'T KNOW WHY? It's a simple div.classname
match..
Could someone take a look and figure it out - I'm stuck.
--------------------------------------------------------
import re, sys, time
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import StaleElementReferenceException
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
url = 'https://www.aliexpress.com'
caps = DesiredCapabilities().FIREFOX; caps["pageLoadStrategy"] = 'eager'
ignored_exceptions=(NoSuchElementException,StaleElementReferenceException,)
fh = open('/tmp/log.html', 'w')
fh.write('<!doctype html> <title>parts</title><body>\n<table>\n')
def convert(m):
money = m.group()
return str(round(float(money) * 72.4, 3))
import re
def process_fields(txt):
if '$' in txt:
txt = txt.replace('+', '')
txt = txt.replace('$', '')
txt = txt.replace('US', '')
txt = txt.replace('Shipping:', '')
r = re.sub(r'(\s*[0-9]+\.[0-9]+)', convert, txt)
return str(r)
def ali_search(url, txt):
driver.get(url)
assert 'AliExpress' in driver.title
try:
srch_elem = WebDriverWait(driver, 3600,
ignored_exceptions=ignored_exceptions).until(
EC.presence_of_element_located((By.XPATH, '//div[@class="search-key-box"]')))
print('search')
x = driver.find_element_by_id('search-key')
if 'input' in x.tag_name:
print 'success'
finally:
for c in list(txt):
time.sleep(1)
x.send_keys(c)
x.send_keys(Keys.RETURN)
try:
element = WebDriverWait(driver, 3600,
ignored_exceptions=ignored_exceptions).until(
EC.presence_of_element_located((By.XPATH, '//div[@class="product-container"]')))
finally:
print('product-container')
x = driver.find_element_by_xpath('//body')
x.send_keys(Keys.HOME)
for i in range(1,10):
print('send END')
time.sleep(1)
x.send_keys(Keys.PAGE_DOWN)
time.sleep(1)
#driver.execute_script("window.scrollTo(0,
document.body.scrollHeight);")
# EC.presence_of_element_located((By.XPATH, '//div[contains(@class, "
product-list")]')))
divs = element.find_elements_by_xpath('//li[@class="list-item
packaging_sale"]')
for c, div in enumerate(divs):
fh.write('<tr>')
for param in ['price-current', 'item-price-row packaging-sale',
'shipping-value', 'store-name']:
try:
if 'store' in param:
fh.write('<td>' +
div.find_elements_by_class_name(param)[0].text + '</td>')
elif 'sale' in param:
print param
lot = div.find_elements_by_class_name(param)
fh.write('<td>' + str(lot) + '</td>')
else:
fh.write('<td>' +
process_fields(div.find_elements_by_class_name(param).text) + '</td>')
except Exception as e:
fh.write('<td>' + str(e) + '</td>')
fh.write('</tr>\n')
fh.write('\n</table></body>')
fh.close()
def part_lookup():
global driver
with webdriver.Firefox(executable_path=r'/mnt/sdb1/root/geckodriver',
firefox_binary='/mnt/sdb1/firefox/firefox-bin', capabilities=caps) as driver:
if len(sys.argv) == 2:
ali_search(url, sys.argv[1])
time.sleep(3600)
part_lookup()
More information about the Python-list
mailing list