本篇內(nèi)容主要講解“Selenium爬蟲(chóng)登錄生成Cookie的方法”,感興趣的朋友不妨來(lái)看看。本文介紹的方法操作簡(jiǎn)單快捷,實(shí)用性強(qiáng)。下面就讓小編來(lái)帶大家學(xué)習(xí)“Selenium爬蟲(chóng)登錄生成Cookie的方法”吧!
成都創(chuàng)新互聯(lián)服務(wù)項(xiàng)目包括鄂州網(wǎng)站建設(shè)、鄂州網(wǎng)站制作、鄂州網(wǎng)頁(yè)制作以及鄂州網(wǎng)絡(luò)營(yíng)銷(xiāo)策劃等。多年來(lái),我們專(zhuān)注于互聯(lián)網(wǎng)行業(yè),利用自身積累的技術(shù)優(yōu)勢(shì)、行業(yè)經(jīng)驗(yàn)、深度合作伙伴關(guān)系等,向廣大中小型企業(yè)、政府機(jī)構(gòu)等提供互聯(lián)網(wǎng)行業(yè)的解決方案,鄂州網(wǎng)站推廣取得了明顯的社會(huì)效益與經(jīng)濟(jì)效益。目前,我們服務(wù)的客戶(hù)以成都為中心已經(jīng)輻射到鄂州省份的部分城市,未來(lái)相信會(huì)繼續(xù)擴(kuò)大服務(wù)區(qū)域并繼續(xù)獲得客戶(hù)的支持與信任!
Selenium爬蟲(chóng)登錄生成Cookie
網(wǎng)站登錄,本質(zhì)上就是登錄完成之后服務(wù)器給客戶(hù)端一個(gè)憑據(jù),這個(gè)憑據(jù)可以存在cookie里,也可以存在別的地方,客戶(hù)端下次請(qǐng)求網(wǎng)頁(yè)的時(shí)候帶上這個(gè)憑據(jù),服務(wù)端就會(huì)認(rèn)為這個(gè)客戶(hù)端是登錄過(guò)的。所以對(duì)于爬蟲(chóng)來(lái)說(shuō),生成cookies是代理使用成為了一個(gè)必須要做的事情。
示例
安裝chrome和對(duì)應(yīng)driver
下載chrome對(duì)應(yīng)版本的chrome deriver
下載對(duì)應(yīng)版本 driver
https://chromedriver.chromium.org/downloads
例如:
生成cookies
我們使用chrome driver來(lái)進(jìn)行登錄和cookie的生成
import os
import time
import zipfile
from selenium import webdriver
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
class GenCookies(object):
USER_AGENT = open('useragents.txt').readlines()
# 16yun 代理配置
PROXY_HOST = 't.16yun.cn' # proxy or host 億牛云代理www.16yun.cn
PROXY_PORT = 31111 # port
PROXY_USER = 'USERNAME' # username
PROXY_PASS = 'PASSWORD' # password
@classmethod
def get_chromedriver(cls, use_proxy=False, user_agent=None):
manifest_json = """
{
"version": "1.0.0",
"manifest_version": 2,
"name": "Chrome Proxy",
"permissions": [
"proxy",
"tabs",
"unlimitedStorage",
"storage",
"
"webRequest",
"webRequestBlocking"
],
"background": {
"scripts": ["background.js"]
},
"minimum_chrome_version":"22.0.0"
}
"""
background_js = """
var config = {
mode: "fixed_servers",
rules: {
singleProxy: {
scheme: "http",
host: "%s",
port: parseInt(%s)
},
bypassList: ["localhost"]
}
};
chrome.proxy.settings.set({value: config, scope: "regular"}, function() {});
function callbackFn(details) {
return {
authCredentials: {
username: "%s",
password: "%s"
}
};
}
chrome.webRequest.onAuthRequired.addListener(
callbackFn,
{urls: ["
['blocking']
);
""" % (cls.PROXY_HOST, cls.PROXY_PORT, cls.PROXY_USER, cls.PROXY_PASS)
path = os.path.dirname(os.path.abspath(__file__))
chrome_options = webdriver.ChromeOptions()
if use_proxy:
pluginfile = 'proxy_auth_plugin.zip'
with zipfile.ZipFile(pluginfile, 'w') as zp:
zp.writestr("manifest.json", manifest_json)
zp.writestr("background.js", background_js)
chrome_options.add_extension(pluginfile)
if user_agent:
chrome_options.add_argument('--user-agent=%s' % user_agent)
driver = webdriver.Chrome(
os.path.join(path, 'chromedriver'),
chrome_options=chrome_options)
return driver
def __init__(self, username, password):
self.url = 'https://passport.weibo.cn/signin/login?entry=mweibo&r=https://m.weibo.cn/'
self.browser = self.get_chromedriver(use_proxy=True, user_agent=self.USER_AGENT)
self.wait = WebDriverWait(self.browser, 20)
self.username = username
self.password = password
def open(self):
"""
打開(kāi)網(wǎng)頁(yè)輸入用戶(hù)名密碼并點(diǎn)擊
:return: None
"""
self.browser.delete_all_cookies()
self.browser.get(self.url)
username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginName')))
password = self.wait.until(EC.presence_of_element_located((By.ID, 'loginPassword')))
submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction')))
username.send_keys(self.username)
password.send_keys(self.password)
time.sleep(1)
submit.click()
def password_error(self):
"""
判斷是否密碼錯(cuò)誤
:return:
"""
try:
return WebDriverWait(self.browser, 5).until(
EC.text_to_be_present_in_element((By.ID, 'errorMsg'), '用戶(hù)名或密碼錯(cuò)誤'))
except TimeoutException:
return False
def get_cookies(self):
"""
獲取Cookies
:return:
"""
return self.browser.get_cookies()
def main(self):
"""
入口
:return:
"""
self.open()
if self.password_error():
return {
'status': 2,
'content': '用戶(hù)名或密碼錯(cuò)誤'
}
# 如果不需要驗(yàn)證碼直接登錄成功
cookies = self.get_cookies()
return {
'status': 1,
'content': cookies
}
if __name__ == '__main__':
result = GenCookies(
username='180000000',
password='16yun',
).main()
print(result)
import osimport timeimport zipfilefrom selenium import webdriverfrom selenium.common.exceptions import TimeoutExceptionfrom selenium.webdriver.common.by import Byfrom selenium.webdriver.support import expected_conditions as ECfrom selenium.webdriver.support.ui import WebDriverWaitclass GenCookies(object): USER_AGENT = open('useragents.txt').readlines() # 16yun 代理配置 PROXY_HOST = 't.16yun.cn' # proxy or host 億牛云代理www.16yun.cn PROXY_PORT = 31111 # port PROXY_USER = 'USERNAME' # username PROXY_PASS = 'PASSWORD' # password @classmethod def get_chromedriver(cls, use_proxy=False, user_agent=None): manifest_json = """ { "version": "1.0.0", "manifest_version": 2, "name": "Chrome Proxy", "permissions": [ "proxy", "tabs", "unlimitedStorage", "storage", "", "webRequest", "webRequestBlocking" ], "background": { "scripts": ["background.js"] }, "minimum_chrome_version":"22.0.0" } """ background_js = """ var config = { mode: "fixed_servers", rules: { singleProxy: { scheme: "http", host: "%s", port: parseInt(%s) }, bypassList: ["localhost"] } }; chrome.proxy.settings.set({value: config, scope: "regular"}, function() {}); function callbackFn(details) { return { authCredentials: { username: "%s", password: "%s" } }; } chrome.webRequest.onAuthRequired.addListener( callbackFn, {urls: [" "]}, ['blocking'] ); """ % (cls.PROXY_HOST, cls.PROXY_PORT, cls.PROXY_USER, cls.PROXY_PASS) path = os.path.dirname(os.path.abspath(__file__)) chrome_options = webdriver.ChromeOptions() if use_proxy: pluginfile = 'proxy_auth_plugin.zip' with zipfile.ZipFile(pluginfile, 'w') as zp: zp.writestr("manifest.json", manifest_json) zp.writestr("background.js", background_js) chrome_options.add_extension(pluginfile) if user_agent: chrome_options.add_argument('--user-agent=%s' % user_agent) driver = webdriver.Chrome( os.path.join(path, 'chromedriver'), chrome_options=chrome_options) return driver def __init__(self, username, password): self.url = 'https://passport.weibo.cn/signin/login?entry=mweibo&r=https://m.weibo.cn/' self.browser = self.get_chromedriver(use_proxy=True, user_agent=self.USER_AGENT) self.wait = WebDriverWait(self.browser, 20) self.username = username self.password = password def open(self): """ 打開(kāi)網(wǎng)頁(yè)輸入用戶(hù)名密碼并點(diǎn)擊 :return: None """ self.browser.delete_all_cookies() self.browser.get(self.url) username = self.wait.until(EC.presence_of_element_located((By.ID, 'loginName'))) password = self.wait.until(EC.presence_of_element_located((By.ID, 'loginPassword'))) submit = self.wait.until(EC.element_to_be_clickable((By.ID, 'loginAction'))) username.send_keys(self.username) password.send_keys(self.password) time.sleep(1) submit.click() def password_error(self): """ 判斷是否密碼錯(cuò)誤 :return: """ try: return WebDriverWait(self.browser, 5).until( EC.text_to_be_present_in_element((By.ID, 'errorMsg'), '用戶(hù)名或密碼錯(cuò)誤')) except TimeoutException: return False def get_cookies(self): """ 獲取Cookies :return: """ return self.browser.get_cookies() def main(self): """ 入口 :return: """ self.open() if self.password_error(): return { 'status': 2, 'content': '用戶(hù)名或密碼錯(cuò)誤' } # 如果不需要驗(yàn)證碼直接登錄成功 cookies = self.get_cookies() return { 'status': 1, 'content': cookies }if __name__ == '__main__': result = GenCookies( username='180000000', password='16yun', ).main() print(result)
useragents.txt文件里面是隨機(jī)的ua
Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.2.16) Gecko/20110319 Firefox/40Mozilla/5.0 (Windows NT 6.2; rv:39.0) Gecko/20100101 Firefox/39.0Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0; QQBrowser/8.3.4769.400)Mozilla/5.0 (Windows NT 6.1; rv:39.0) Gecko/20100101 Firefox/39.0
到此,相信大家對(duì)“Selenium爬蟲(chóng)登錄生成Cookie的方法”有了更深的了解,不妨來(lái)實(shí)際操作一番吧!這里是創(chuàng)新互聯(lián)網(wǎng)站,更多相關(guān)內(nèi)容可以進(jìn)入相關(guān)頻道進(jìn)行查詢(xún),關(guān)注我們,繼續(xù)學(xué)習(xí)!