使用 selenium 抓取电影数据,磁力链,迅雷下载地址
使用 selenium 抓取某电影网站,电影数据,磁力链,迅雷下载地址, 并生成json文件保存。
website = 'https://www.piaohua.com/html/kehuan/2018/0509/33652.html' FILM_TITLE = '' FILM_PICTURE = '' FILM_DESC = [] FILM_MAGNETS = [] FILM_TUNDERS = [] FILM_JSON = {} from selenium import webdriver from selenium.webdriver.common.action_chains import ActionChains import json mydriver = webdriver.Chrome('C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe') mydriver.get(website) mydriver.implicitly_wait(5) #标题 title = mydriver.find_element_by_id("show") title = title.find_element_by_css_selector("h3") print(title.text) FILM_TITLE = title.text desc = mydriver.find_element_by_id("showinfo") #链接图片 picture = desc.find_element_by_css_selector('img') print(picture.get_attribute('src')) FILM_PICTURE = picture.get_attribute('src') #剧情介绍 desclist = desc.find_elements_by_css_selector("div") desctext = "" for d in desclist: print(d.text) FILM_DESC.append(d.text) #下载地址(magnet) magnet_link = mydriver.find_elements_by_css_selector('td[style="word-break: break-all; line-height: 18px"]') for l in magnet_link: magnet_link1 = l.find_element_by_css_selector("a") linktext = magnet_link1.get_attribute('href') if linktext[0:6]=='magnet': print(linktext) FILM_MAGNETS.append(linktext) #下载地址迅雷(thunder) thunder_link = mydriver.find_elements_by_css_selector('a[onclick="return OnDownloadClick_Simple(this,2)"]') for l in thunder_link: #ActionChains(mydriver).context_click(l).perform() mydriver.execute_script('ThunderNetwork_SetHref(arguments[0])', l) text = l.get_attribute('href') print(text) FILM_TUNDERS.append(text) FILM_JSON['title'] = FILM_TITLE FILM_JSON['picture'] = FILM_PICTURE FILM_JSON['desc'] = FILM_DESC FILM_JSON['magnets'] = FILM_MAGNETS FILM_JSON['thunders'] = FILM_TUNDERS filename = FILM_TITLE + '.json' file = open(filename, 'w', encoding='utf-8') jj = json.dump(FILM_JSON, file, ensure_ascii=False) file.close() mydriver.close() mydriver.quit()
发布: 2018/6/19 分类: 开发之路 阅读: 次 评论: 0次