|
|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑 7 N7 J( r2 X* u3 S0 |2 n; L' n/ e
7 W2 V8 ~8 r, N8 O% X5 [. h) Y. w+ r' C
- 4 ]+ B) L* h' C* h0 i( y* z: I
- # -*- coding: utf-8 -*-
0 Z) z% v" I1 \% }; y - 0 ]8 V" m, r& A# L P, i ?. R
- from aip import AipOcr' a" Z) M9 ] c
- 4 o, b& e9 L$ D; G. F( m
- from selenium import webdriver3 a" J* t4 K: \
5 U4 Z, P+ k6 t- import time
' _2 C4 S1 ~! [% [: q5 x - & z- T( G# K: E; H
- import random
: r) o7 B1 p% P( w% G; W' H - # M2 }0 ^. }4 ~( z1 i
- import sys,re
: X2 F9 K) M6 P" A/ l - 0 ]6 i( l8 `) l- \" I$ N) f
- from PIL import Image, ImageDraw,ImageFont
1 M, f( b. M2 G% h
) {% T4 o @9 m- """ 你的 APPID AK SK """. q8 e. E/ ^* y, k% [. t$ T
- ' D- G* V* M: Z- B0 \: E7 q
- APP_ID = 'xxx'
0 b2 w* {6 P: m( D! X1 ]$ F - / I* {2 ]9 J5 y& n5 ? c
- API_KEY = 'xxx'- x8 D) K5 V' }5 h$ D' R& a
- 8 a7 ^* E. j1 I
- SECRET_KEY = xxx'
* f! I$ @" q% l- w3 p9 V - ; V% N' l5 H M2 @9 c" E
- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
4 p$ @1 a! w/ f: @
7 Y& z7 W6 M: q3 }2 R- #PROXY = "127.0.0.1:8118"
4 P" p9 Z# i4 q' j: Y( J: P0 U" Z
+ h" J- F3 h$ u6 d+ C8 v! @6 Y4 c- chrome_options = webdriver.ChromeOptions()# s- v( \7 F1 l1 H3 x) a* i/ a& U8 k
! S& r" i- N+ \+ g; y/ j- chrome_options.add_argument('--headless')9 A8 d/ @# d. ]! }( x7 |) f
. v7 F9 D/ d; H, b9 b( r \- chrome_options.add_argument('--disable-gpu')
2 I/ h" v- N) F1 `* q) s - : I9 l8 P' A/ w. d1 _2 z4 `
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)+ i" a$ D% W% V7 g. j" m. J _
- # n5 @4 l ]8 t* Y6 q! P) e/ R& q
- chrome_options.add_argument("--incognito")
' u- w0 h: `" [$ u# g+ r
" P# {4 m& B* g* W) }- chrome_options.add_argument('--ignore-certificate-errors')
: q% |, n8 _$ C: d - ]$ M& j; P( M& f
- # Win
9 s# I! l! I* I, y
$ T/ k- t' i3 Z# d% W q9 X6 X- # chrome_options.add_argument("--log-level=3")4 _9 q4 x2 Q9 L3 T' l# B# s
- 4 E+ j- F; _( }) y# U- Y; I- N
- # chrome_options.add_argument("--disable-logging")
' ^9 P& M, U8 S. z" f" v# @. f
$ V* E9 o# L2 v' h& E1 o% }* r- # chrome_options.add_argument("--disable-logging")
8 M) x u: g7 Y; K; c - ' a) a! H/ E3 c) D/ ~
- #chrome_options.add_argument('--no-sandbox')+ k. F& N% |* s" X$ c
0 _; u* X/ o0 A% D$ w- """ 读取图片 """# n! h% b" v% s K
- ( O! ~) X3 x/ I1 ^9 E
- def get_file_content(filePath):- x2 ` l: d% m- ^) V' P' X
& V. i+ ]1 w9 D5 U- with open(filePath, 'rb') as fp:
+ I# {+ l* \2 J - 7 S' Z h/ y4 {$ P; r8 E! R7 S8 V
- return fp.read()
" e# ]* F9 k: _. ^) J/ |, _ - $ H k5 K3 J }) h* \
- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)- K& E- a* Z1 e
- ( v" A: m. D7 |# P; d2 E
- driver.set_window_size(1280, 727)1 y# H) d: t3 h6 M" ` H
4 z7 w3 H- S, c j; z- driver.get("https://youlikehits.com/")4 m% s$ K) {2 N9 U( ~
0 K. {3 Q) I5 H, y8 c" m6 O( c3 M/ d) P- time.sleep(5)
8 u0 h8 D4 A1 ~( i - ' ~0 e. a1 T! _% N) ]+ `9 G
- driver.find_element_by_id("username").send_keys('11111') #user
( J @7 j# _( f7 `5 k4 A8 X' t - 7 Q# g) P* M; x |2 E# n g3 B
- driver.find_element_by_id("password").send_keys('111111') #password, F1 [0 {. n E" d) Y0 S1 r
- I5 ~! r! [- S3 K; y# v
- driver.find_element_by_xpath("//input[@value='Login']").click()
6 V. H, G) v+ {# W
4 N J8 `! x& Y& s- driver.get("https://youlikehits.com/youtubenew2.php")
4 z4 B% o2 s7 U& o- C - & o0 j, A# P# R$ E X
- time.sleep(1)' U O9 D( ]: h7 G% e
) N* M! x5 T% ] T- d, I! T- #Try Again
8 H* T' Q5 A* x8 s) H# k4 a - ; }5 J2 b0 p" H* @! p+ {
- def checkRefresh(driver):9 Z+ u% p5 t K1 o* J% M
- * d$ ` I" D9 Z; ~- O% T7 s2 W
- try:$ b5 @7 c7 s2 R2 u* i& J2 ^
! Y! E" X/ Z `* N4 {- Refresh = driver.find_element_by_id('loadmore')3 R# _- @' f8 T/ `5 ^
- 8 e+ q, i l- b5 Q; M
- Refresh.click()
, M8 G" w# _; F( G' N; |( } - - i) J+ a I& F9 R0 o1 V5 |1 ]
- driver.set_window_size(1280, 727)# C# k( }, u5 S
- ; \' }. H* o5 D$ S
- except Exception as e:, Q b, K5 F/ H3 v' p
6 ~. ~) ], k1 f* l/ g+ b- pass
1 H' v( y* w, {3 E( n7 A8 m# r) p
5 G6 a% e) E$ e0 V6 B: y' h- def checkcaptcha(driver):! [* k) y8 j7 A# \3 p
- . L3 k7 r; t0 d2 w
- try:, Y: {8 ?( K# r& V: |! M. Y: D
+ C6 m% R% M) P0 H. k" p- captcha = driver.find_element_by_id('captcha')
% u2 B3 b. d+ b5 u; Q4 }) Y
& g, g# `3 h8 o% U1 c: m2 V. e- print driver.get_window_size()
% g+ `$ ^: t6 f3 J8 d3 c
$ M# e5 V+ V: `$ m9 a, T( e$ f- time.sleep(2)( k- Y7 @" H! `# S% m
) G9 n: ^; _2 K0 l v3 s D8 f. e8 t- driver.save_screenshot('/tmp/screenshot.png')
5 E7 J% `" b# d7 [
# _8 A) n, x/ K6 j( T- im = Image.open('/tmp/screenshot.png')
. V3 q9 Z+ m$ a; x1 d+ a9 w1 D" K
! p1 `; n ? ^: l- #取消headless模式
1 b4 s- n( t4 I
4 t' Z. c H' o; w8 q: l6 ]% Z, c- #a = im.resize((1269, 610),Image.ANTIALIAS)- V& ]5 O2 C! s9 B# u* `
- w7 t8 B% G! D
-
- r: T' x+ I% ~% }+ H# C+ \
5 | ]2 r' J# `$ V9 l* \' t% \- #开启headless模式7 r6 `: Z: v2 x I
- 3 K9 |) {6 l+ N" M0 V
- a = im.resize((1269, 727),Image.ANTIALIAS)
! A( v; o. f9 Z; l/ J
9 j" ~5 L5 l+ B/ y+ g5 Q- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
6 ?) U) ~5 k+ r9 I' ^ - $ U9 B. N( @2 V4 w
- left = element.location['x']* j5 P: M3 A9 o2 |9 ^, }9 s
- / n' W, i+ J( ^1 L7 L }; c
- top = element.location['y'], A: K/ P% g+ i0 X$ x, _3 D: |
- # I% c9 R+ `$ w7 m5 z
- right = element.location['x'] + element.size['width']0 N8 F& ]6 D" K& f
2 K% s5 x; A5 e) k: S9 @) \! I- bottom = element.location['y'] + element.size['height']* K0 m1 c: J" [3 v0 m
- ; h% ^, \' k* ~; O. X
- / T0 n: Y3 |6 q3 F5 r6 _
* ?5 o4 B& T3 K/ I! f- #element = driver.find_element_by_xpath("//div['#captcha']//img")
; a' u& R; Y: g; |
4 ^) s4 |) n8 H; k4 l' [, m0 a- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")
8 A& \' Z6 A# N( N, L - ' a p( c; u( u% b& O" h
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
8 k. g' m4 \8 I" v' a8 A1 z7 r
5 @7 u; y; ~4 Z; E" X( M/ K- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')
+ A2 a" a0 Z2 _% z( h) d6 C4 B - 0 g1 b' T- a4 N9 o; [. _5 y
- image = get_file_content('/tmp/screenshot1.png')
- w6 O, |1 Q! v) I0 A5 _! N
3 k h( H. Y. a! t- a = client.basicGeneral(image)% g5 v! d% B& t
- 5 i/ K D9 u5 `3 b& Y8 h
- print a
+ r" Z0 y$ r8 Q4 ~: B9 @2 x- D1 i6 d
6 Y. J' s- M" }, W+ u& t" }5 K- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])( o1 X/ Q9 E1 e4 k$ N, h4 E
6 K5 v4 v, v# K' F: i, S8 ?- yzm = re.sub(r'\xf7',r'/',yzm)
. w% Z/ {+ R: m$ t( N, w" t8 g- G - + k. L& J. Q4 h$ D1 d0 U; f
- yzm = re.sub(r'x',r'*',yzm)
. e1 |- ?. k- x6 V; k - . d6 H3 X4 @" i- D4 d$ C# W: v
- yzm = re.sub(r'X',r'*',yzm)6 @6 s6 d4 {2 w: G; o
- 3 t5 S( x! T( f
- yzm = eval(yzm)
/ `4 p. o6 `. y; o. a - ' m4 q/ m4 V; |% W6 z9 j
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))6 ~/ e6 ~0 J0 B4 g' O m, e4 [% n0 x
- " L( j( L2 a/ m3 [6 c
- print '验证码: ' + str(yzm)! @7 `; Y: }# {) b8 Z/ P( p
- , g* U% O1 z7 J
- driver.find_element_by_name('answer').send_keys(yzm)$ c Y9 P) }" K6 M
- v* X/ ?! t: l5 ]( {- driver.find_element_by_name('submit').click()
9 {9 K9 c' l) H. Z2 Q" k( e c - ; B5 ^, B& a, x9 }
- time.sleep(3)& h+ G" L/ r4 u6 F
- ) y' C5 X3 `4 k( i2 |0 ~
- return 1: E, q7 ^. m f
- 7 g" r8 H; J9 |! y/ @$ S
- except Exception as e:/ W9 q! B7 L% _- [
- - j; x3 _, A4 t: \
- return 0
/ ?# _% S8 h: p1 c7 D+ p
8 @0 j- e3 B4 O8 c) p- def followbutton(driver):3 H: x' r& E& n9 S
0 C8 O T6 B, |, J J8 U# F* K- try:) {/ w2 H. g- S
- 6 M% Q% f- V; k$ k& |7 |
- driver.switch_to_window(driver.window_handles[0])8 q; N2 P/ h5 p }" `0 g
/ B1 }( A# O) I: P- a- points = driver.find_element_by_id("currentpoints").text- ?" F j) Q4 q' ]' p: P* q- q
- * x( g. u/ X L% Z
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()5 B4 ?+ D' f4 Z
! b$ D' `* K1 I- driver.switch_to_window(driver.window_handles[1])& S! j6 L& ~9 Y
% q; U1 X" `( K* C% t0 |* R- VideoSource = ''.join(driver.page_source.split())
' q0 y: d& j/ I7 V5 X1 W& t3 N$ _ - & E/ E K8 S5 e9 L
- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
; S. r0 x/ c$ g0 z% a - * q! |1 m; E( w5 B( o, r5 K# e
- print VideoSource
) e7 v% M& {1 P. F% c; } - 8 M L' S8 D! i
- driver.switch_to_window(driver.window_handles[0])- g9 f3 W/ u) ]7 i& s4 M
- , ^3 l' C. s% t. ~5 R$ A( Z8 Z
- print "Refresh..."
& u6 k) D# I$ e! Z
+ Q; b p4 s6 e5 q- e* T! ~% _- driver.get("https://youlikehits.com/youtubenew2.php")
5 z& V( b5 [; R+ L6 h4 W7 b" w - 0 [$ h, t3 g" T$ t
- time.sleep(1)& N( X3 g. }0 T8 \
- 2 ^3 G& H# C4 @1 A6 K3 g" f: L
- return points$ o4 T/ Y2 ^( E$ C8 x
0 T8 g1 w& Y* v7 x0 Z- except Exception as e:
) ]% d0 z& N: o1 M
& j& L# L& B) q9 ~8 {6 X% Y8 d- return 0
, p5 \6 p- L! p, A4 w- C3 t$ n
1 a- R; V; B: u( ]6 O- for i in range(0,5000):% Y9 p" K; Y" Z' q
- 9 v! ^/ ^, n; D" u- B! W
- try:5 ~0 F+ ]: W5 e3 j$ z2 x& S9 x# D
3 K) V. [& d* q& _; u1 g6 u: `+ ]- captcha = checkcaptcha(driver)
# q5 }. y+ W8 J# u- @
% |( r+ \0 f; b4 r7 `- time.sleep(1)
7 q! _4 v/ ]9 u1 F, {) t; Y% | - 2 n' W% z/ W7 h# z% @$ @# H% g
- checkRefresh(driver)
$ ~3 f! _* L8 Z2 {$ z0 }/ Q
* V8 K O0 }, c5 E- Q- points = followbutton(driver)/ U% c3 w/ V6 F* R% [, e
- 7 `# U* z0 ?2 m0 e) j# D( z, R
- time.sleep(65)
9 Y' @% Z3 Q1 u3 e& i6 I$ y" b - % F8 Q; `+ t' D; ^
- driver.switch_to_window(driver.window_handles[0])% M* d- d9 X* C
7 b" q+ i5 w& V8 r- tmpp = driver.find_element_by_id("currentpoints").text0 z, T0 x# i' ]' @4 z; u9 x
4 G4 \$ i4 [. E! ~- print "points: " + str(tmpp)2 e6 N0 F+ P. F& k' k
- ( O! m$ r; \* J/ W. _+ m
- if points == tmpp:# O* x$ L# k, X" B2 e. e2 z1 W
- 3 r* e H% F7 l4 ]. U! W0 |4 x& j( |
- print "Refresh..."
. L+ d8 p9 n8 w; l$ e& J
$ t6 B$ E1 O/ q$ f7 c3 L- driver.get("https://youlikehits.com/youtubenew2.php")
, a' b/ c @7 u - $ V; H6 _# S$ B+ f% K1 S/ N% i
- time.sleep(5)
1 P8 @; x: T7 `5 K& m2 z - 9 S" O( G* T1 E2 b
- except Exception as e:
G0 Q S" n$ F; D3 M' I - 1 Z% P7 B- h3 T- s* A8 ]
- driver.get("https://youlikehits.com/youtubenew2.php")1 G6 k! c0 N3 @1 O- B7 ^8 s
: _; }& [: O+ w/ O/ [- k- print 'error: ' + str(e)# ^. ~3 e7 q6 i; L8 A; L
, Q: y5 ^% w: i0 N! r4 a6 }# a- driver.quit()
8 H' h9 x( K7 X1 b. f7 q
复制代码
& Y4 A. C# z6 m8 ?) O* |
+ ^% @/ Z, ~) h0 C* b4 I5 @" g! y+ V( F% Y9 \
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。 ! v" k$ f, F* N/ l1 r" H' ~
, J4 T+ x# e0 n( R/ V2 b
|
评分
-
查看全部评分
|