本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑 . y$ b" W5 k" s. H% ]
+ x4 u9 j5 `- K, ?
( G3 e) U9 o# ?: [0 g6 [, l) K: D+ Z
/ v* {; j+ u0 g% P9 ~8 m- # -*- coding: utf-8 -*-. z4 E# k( v Y/ r/ _& i1 c) `% J4 l0 W
- * P* }1 ?" K t9 K5 x, }, A
- from aip import AipOcr& ] w) S; S. C: M; p0 y2 i
6 O6 c; y8 o) v* S) g+ ~- from selenium import webdriver
% Q8 T' e( O4 ?: }0 @$ E% q - , M( |# P; X0 U9 t. u) g# o. ^; L7 w
- import time
8 b* z% X- ~. _: B* n X
* W1 i, P- J9 M, v1 @* S- import random
! H% O1 s/ m2 R. [3 D% ~ - # `* f6 `: J2 R' n. E
- import sys,re6 H: y: o6 D) \/ [
) {' V5 ^/ y+ L; k7 {5 e" b( h- from PIL import Image, ImageDraw,ImageFont
4 z! Y9 a: u) s [: y! E. v9 b2 o - 3 q2 d! S4 J: c0 l! @% L' n" v
- """ 你的 APPID AK SK """/ @ t6 ^, Y5 l
; z+ r" e" ]6 s. h8 J- M" i3 z- APP_ID = 'xxx'. u* r) Z2 c. ^, R1 L
- 8 p6 K2 J- e1 c$ ~
- API_KEY = 'xxx'
$ U2 {! ~, l: u
5 U( p: \- N e# }3 I- SECRET_KEY = xxx'
2 b% o/ I6 k$ { - ( z6 F! S! c" h7 t8 e# I
- client = AipOcr(APP_ID, API_KEY, SECRET_KEY); ]& F7 w$ I% u' i) }; M+ R
- 0 s& B' K5 @$ v
- #PROXY = "127.0.0.1:8118"
. z. I* j5 E4 n% c6 H - - l7 L% Y" [7 _& O: Z# v, C
- chrome_options = webdriver.ChromeOptions(), C3 G8 {; Q# h2 I5 i4 A% e
2 N( q! [6 t7 Z- chrome_options.add_argument('--headless')
- l* A7 o; F7 C) y! L' g - : d5 V9 B) n0 u+ c0 o1 Z
- chrome_options.add_argument('--disable-gpu')
- |9 I% ?) P, Q( V - - P% B) V! n. N
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)3 p' a7 u% h4 n2 Z! X0 w( l
" X) ^, L3 c# [, c4 f* I, a( e0 z+ l- chrome_options.add_argument("--incognito")
3 f. e! ?# O* a4 Y5 P1 g* R3 A! g
: y$ P+ q( \8 D( l% f6 r- chrome_options.add_argument('--ignore-certificate-errors')
* g4 U1 L4 c* R! N6 @; n L
! q) r6 H) t e/ I" i0 o N) ^- # Win E7 S9 C" n8 o
- ) a; r! f4 y$ S" D0 E7 E4 [& z. T
- # chrome_options.add_argument("--log-level=3"): ]% z/ `$ s3 C0 q8 B+ ?# l
- $ S# w3 n1 B/ j- D |, R/ ~
- # chrome_options.add_argument("--disable-logging")
7 g. A s4 E* o
" v. c6 r6 J5 U- # chrome_options.add_argument("--disable-logging")
7 ^3 A6 ]& u8 A/ o% p# v, V- ?# I
4 R, |* ]/ D" M; R/ R5 U6 w' I- #chrome_options.add_argument('--no-sandbox')
, @0 z' a# ?9 `! t+ `3 O3 R" \ - ' L6 L/ B! S" c4 i# t* m
- """ 读取图片 """5 W, r0 Q: |+ M, L" ?
- 3 K+ p. ]% E8 x' [* o8 Z
- def get_file_content(filePath):
* g. ?0 Y, b& I4 `, S7 f7 O* X
/ g" w& Q/ r, x: r& s- l" s- with open(filePath, 'rb') as fp:
6 x$ |3 y* |. w8 C% @1 f) ~ - : I- N) O8 `' \6 ~9 h: Y
- return fp.read()
( Z' |: X8 o9 k% o1 n/ v& K
; m* ~1 h& _. Q( e: \) F! n- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)$ J* Y0 r4 S0 y) H% [# f
- 9 T( o, [ ^9 I3 P8 Z3 j# A
- driver.set_window_size(1280, 727)
) ?# F- a0 G, p; l - ( n1 G4 ]' q1 Y3 e
- driver.get("https://youlikehits.com/")6 [) D5 X8 e, S E# ]+ r4 |. N7 F
1 V& \: T7 C# C- F; Q, K+ u' G- time.sleep(5)/ A+ I) p4 |0 h3 n- O/ [
) h9 R, k5 E# X3 k- driver.find_element_by_id("username").send_keys('11111') #user
* ^% P! P4 G. |& I9 r - - J0 h. m' {1 I# s
- driver.find_element_by_id("password").send_keys('111111') #password. {9 A3 t. @3 ^9 Y! y4 [ ]& |, b
- 7 |7 R* ]+ l: R/ O: _+ W6 A
- driver.find_element_by_xpath("//input[@value='Login']").click()2 C* v. J" J, ^; ^' [6 h0 Z5 d
- # O! m4 K1 n$ i+ F- I# q1 y: t
- driver.get("https://youlikehits.com/youtubenew2.php")
4 a+ B: }/ [( D x- J - 7 a8 A" g# H# C x. N4 Y$ T
- time.sleep(1)
9 L3 i0 s; h/ n3 w3 M - ' F0 U$ U& ^5 x4 x, ?- `% W
- #Try Again( D4 h; N; \& D
- 5 ?% v0 A7 x* N
- def checkRefresh(driver):
+ f7 l4 e F, i/ D - 5 ] m& I: b3 C" p0 k! Z
- try:
' l3 V9 _3 g4 ?/ x3 h6 F8 B3 D0 K
$ z( B. u! \2 s6 _0 y- Refresh = driver.find_element_by_id('loadmore')
" \- E! \7 P" _. _. G$ a9 B - $ m1 c5 t* ]6 [
- Refresh.click()
& S! U' U. Y; ~* e
5 w( z1 [# A+ c6 `- driver.set_window_size(1280, 727)
$ s) u7 `3 i- T5 n; m) L/ G1 X2 I
$ V H: w k0 J# R/ y- except Exception as e:
9 `- W3 U# m( T* l! G, N& w8 b - 8 C. H) L) Q) d, T% N) |, y0 E
- pass
1 \0 A- K; x. x: G) U+ U
) S# Z; l2 Q- a6 q9 x- def checkcaptcha(driver):
1 |7 T6 |0 z2 Q: `+ m - . A# B( K% H* p- P/ R' ~
- try:
5 E+ W! s+ Y3 Z O, A - $ J; {( P3 n. g( h; j. c- u7 B% U' p
- captcha = driver.find_element_by_id('captcha')
) d0 A2 p3 S' k. V7 ]5 N - ! D! T6 g7 t U6 b% F4 C1 ^3 k+ V
- print driver.get_window_size()! [- [8 K3 F( O
- / q; ]6 O- J* h% P3 W; k% I' E
- time.sleep(2)5 v, c/ F- z7 e
& `6 V3 z) f$ p/ F8 i- driver.save_screenshot('/tmp/screenshot.png')
: L9 [* P* W+ i4 O L
% t; A ^5 R; Z0 `; J- im = Image.open('/tmp/screenshot.png')0 ^: q2 m t, q, y/ X9 m
- ( ]5 m9 m, Z% F- D. J9 S8 ] ?- f
- #取消headless模式% t5 v) M3 R- M$ m/ r5 h2 E
- 6 A! |/ W+ f& C$ ~( R) ^
- #a = im.resize((1269, 610),Image.ANTIALIAS)
# H, T8 n. P9 g/ U, W/ }
. D8 V% X7 j- i7 Q* u" B2 y3 H- 5 o% }0 N4 O6 T! j3 W; a- ^
# Z" K, l* P0 V5 z/ N9 T- #开启headless模式
/ r" t, F2 {; h4 e* `8 I, i
' D, }$ g, ~: s% d& t- a = im.resize((1269, 727),Image.ANTIALIAS)3 F6 k1 C9 j6 v+ t2 D' v4 P, s
- * E3 s' h3 S) W* G, s
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
& ]0 k: D8 V Y8 |& t, U+ A
/ R C+ p$ H; k; m- left = element.location['x']- r) }7 K$ M" x H6 c, L
- ' i0 q Q8 Q# l9 q
- top = element.location['y']
$ H) N) N+ h8 z6 K# P - 7 C6 B" N' A% H( F
- right = element.location['x'] + element.size['width']! ]& ~, n* v+ [
- 1 h. v! m! d% X, b: B5 W: l
- bottom = element.location['y'] + element.size['height']; p @6 w5 G6 i! [: f& G
- 1 @. @) A& p W7 b9 a7 G
-
) `, k: ~+ y" d, E* z - ! R; `" I% k% d$ H3 P
- #element = driver.find_element_by_xpath("//div['#captcha']//img")
, w/ ~5 y( u) a1 U, k& l* A1 M+ ~9 V
+ J6 Y% Q% G \: h5 Y6 m- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")/ R2 C/ P3 g7 _9 T( o/ m9 r: m
- 4 \' S- |3 _6 x( R; Y2 Y) O# q
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")+ _2 \0 i0 J3 r0 Y; G
7 N" j0 m1 x$ \6 [" K( ~$ g! d: N' D* h- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')' K3 k, I3 a$ k5 ^8 R; c
- 0 X: p' t, B: |! Y: W+ g, H: M
- image = get_file_content('/tmp/screenshot1.png')8 g$ D: N* g1 E3 |: T; h% E
- t. p$ S) Q+ K9 |! Z
- a = client.basicGeneral(image)
# g7 g' k4 O q* |
5 W- s# r, X$ r9 c- print a
, I, o4 i3 e$ Q2 X/ x - v- P& \1 v$ z; r. W( p U
- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])4 M9 H% q& P8 _
- + D$ P5 T7 d/ ?( W1 D+ u
- yzm = re.sub(r'\xf7',r'/',yzm)
; s- Z( |! N: _% f; q* q! g
- z# F- e& w# p5 W! I+ T- yzm = re.sub(r'x',r'*',yzm)
: F) _% }7 Q' E# o3 T$ S
$ ?5 ]" k, u' O% v0 P0 f- yzm = re.sub(r'X',r'*',yzm): {% K9 K$ m1 m% S& d, k9 F
- 0 s y4 \, I( _5 M- s& X
- yzm = eval(yzm) s6 c' {; k i6 K
- ( m% d$ G& c* j' u4 c
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))$ H) T7 A; c; m3 L/ i1 z, _ q
( ^. r" F1 {- b- print '验证码: ' + str(yzm)* |) N5 V+ P2 `0 p& y+ x: N+ G
- - L, D3 U+ @, u2 C- X7 K' Y
- driver.find_element_by_name('answer').send_keys(yzm)" V2 v1 q. k% k7 V
- 9 S+ o- f; b- N& `/ K
- driver.find_element_by_name('submit').click()
5 R) g+ l0 d0 r9 P' F5 W1 H - 9 x1 t6 F' D) E
- time.sleep(3)
- S" n2 I4 T( {7 W$ k - ) Y" r2 a7 l( ?% V: L! b% W( w
- return 1
& ?( G9 {) u6 s) Y' t
, h; M! r# d( Z- K. r- except Exception as e:5 K* p3 [6 C4 H# ]: R0 e
: [! x, ] o- u# W- return 09 l% Z5 I) c U4 J7 c; u$ f) B
3 E, p$ @* j0 N3 F0 H4 }' q- def followbutton(driver):. K" m- n' f" p5 i5 a# C
5 q4 O F/ @6 `& S% e! n- try:
2 n8 P7 O1 R$ y" |+ ] - : [7 b0 v+ u# i' B! v; i" O
- driver.switch_to_window(driver.window_handles[0])3 O9 p( {- h2 e2 g+ {7 z
$ _9 g: O& }3 C0 h3 l- points = driver.find_element_by_id("currentpoints").text
; T4 k, W+ F5 R - ; W/ @: `; }4 l
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()
. q- f' ?- a- Y; r0 l8 r6 L! j; I: ?
, t6 H8 o+ q+ R4 J- driver.switch_to_window(driver.window_handles[1])
( h" A4 |* C" z: K3 U+ b/ _
7 P$ N7 e2 U& F- VideoSource = ''.join(driver.page_source.split())
: }8 y X" b# w; P - / O1 c/ M H! x& B
- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
, u% o: I" h- n" _% O; h$ z, d - ! Y' B$ p, l+ P/ m; L" }) {
- print VideoSource' \! H1 ?* w+ u* H( `# L5 O
1 C, z* v; i, e% o9 z h; I8 Q+ ^- driver.switch_to_window(driver.window_handles[0])% v" E- y. J& p8 J& C
4 p" d. ^" a* `) M/ T- print "Refresh..."( x* o, J4 ^3 ]6 x
& R0 Q4 n1 y( O4 A5 h- driver.get("https://youlikehits.com/youtubenew2.php")
6 c0 G3 r0 E) _: J
3 _/ K. g5 Y! k' S4 c- time.sleep(1)
, r# t! [ M8 n+ Q- J, \
% I) t/ p: R" n& n8 H! t9 q- return points
7 |2 k ~) R' \# q9 k' M' } - 7 q* k H% d7 }* N. B
- except Exception as e:
' G" z, ?0 ]4 \# i% H: o8 B) W4 B
7 L/ \: b! r; ^ y4 L+ G4 ]/ l- return 0
% y% q4 }% w, j* x0 n! K* f
. a c* G& v1 W" z/ s F2 N) y& X. t) ^- for i in range(0,5000):
: P! E, A N, C( [ - 2 k8 E+ x; R! `/ H# I0 D
- try:
2 c) s& s6 N: X# x8 ]7 [% C
3 ~" ?) z! L5 m! i- captcha = checkcaptcha(driver)
( o# i3 l+ a9 Q- n
" r. G% O* ~/ e3 ]4 X* S+ [- time.sleep(1)1 x8 O: C" z# i/ r1 ~3 a
- % ?8 I! T1 g9 W4 X
- checkRefresh(driver)0 K! ^+ J' N% j9 G' X
1 V3 N* w; \, b0 d- points = followbutton(driver)
8 Z- e- O/ m% A5 [/ ?% \ - ) Q5 a0 {. Z- f3 h* }6 H
- time.sleep(65)
: C; ~$ U U6 K2 ~. j2 i' ~8 h+ y
4 G' Y7 x, y# f- I d* V) J, Z- driver.switch_to_window(driver.window_handles[0])
4 \. ]9 S9 w2 s$ {1 l: x/ z2 X% z - $ U7 F( s( N. E0 k5 g; P
- tmpp = driver.find_element_by_id("currentpoints").text \0 U! r( Y3 c! i; Z1 Y( {
- . r7 Z4 Q3 w* B, F5 s
- print "points: " + str(tmpp)
# F8 d3 N( Z& R; ~! Y
+ z+ y% e1 c0 c. Q; @/ N7 H- if points == tmpp:% Q" w" u- o" a! n/ y& M8 M
- n& F# a5 X( C1 T, o
- print "Refresh..."
) [6 C/ J, ?- X- p2 i
_0 A. i- Z6 A% N- driver.get("https://youlikehits.com/youtubenew2.php")% J- t1 K' M5 C! e
- 4 q- j o/ c, g8 C
- time.sleep(5)- ^2 ~8 y3 H0 M9 B5 L1 H x9 J
( O/ i* q5 M5 s5 ? }2 |8 S, `- except Exception as e:# w2 p0 C2 V7 m/ G
- 5 w+ I2 C! Y$ E5 W' G9 B. X) Z
- driver.get("https://youlikehits.com/youtubenew2.php")
v2 ^) L) t! S - 9 v$ t. M. v/ T9 L, r9 L
- print 'error: ' + str(e)
# i n4 J8 E1 O$ X% m; _% V - k( C( d/ \8 K; E# P$ I! D3 s
- driver.quit()5 o% j7 Z) d" l7 ?. z" S
复制代码 6 Q: x2 Q& ]1 j
* D% _0 G) M2 K3 Q" ?2 `, S" ^
) ]% n( `5 e1 y% g" ?& }/ ~ 因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。 ; D; s, u6 w- Q# Y
: b f* g$ \, W: p& }- M |