diff --git a/crawler.py b/crawler.py index 404949e..f71ebc9 100644 --- a/crawler.py +++ b/crawler.py @@ -934,10 +934,19 @@ def login_loadsavecookie(): username = url.split("/")[-1] return driver, username -def zhihu(): - # #crawl articles links +def downloaddriver(): + url = "https://msedgedriver.azureedge.net/116.0.1938.62/edgedriver_win64.zip" if not os.path.exists(driverpath): - response = requests.get("https://msedgedriver.azureedge.net/114.0.1823.67/edgedriver_win64.zip") + ret = requests.get("https://developer.microsoft.com/en-us/microsoft-edge/tools/webdriver/") + ret = BeautifulSoup(ret.content, 'html.parser') + ddl = ret.find_all('a', class_='driver-download__link') + for k in ddl: + key = k.attrs.keys() + href = k.attrs['href'] + if 'href' in key and "win64" in href: + url = href + break + response = requests.get(url) if response.status_code==200: with open(os.path.join(abspath, 'msedgedriver/edgedriver.zip'), 'wb') as obj: obj.write(response.content) @@ -958,7 +967,15 @@ def zhihu(): if kk < 0: break - driver, username = login_loadsavecookie() +def zhihu(): + # #crawl articles links + try: + downloaddriver() + driver, username = login_loadsavecookie() + except Exception as e: + os.remove(os.path.join(abspath, 'msedgedriver', "msedgedriver.exe")) + downloaddriver() + driver, username = login_loadsavecookie() # #crawl think links if crawl_think: