|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑 / [" `- P1 N' T' o) D1 K4 _# Y! W
* ^" l( P+ ]* s" R
% L9 m, E. B1 z) [
+ j! c% F7 F B- # -*- coding: utf-8 -*-
! [) J4 T9 s. y6 E) ~6 c
# Q/ J$ h. L$ d) i- from aip import AipOcr% y( [, t: t' d8 l% B) H
- 0 {2 Z# m: Q4 H' C/ ?6 |- P2 F
- from selenium import webdriver
7 L+ L- G# g0 z4 j
( A, k* r, ]+ Q* A% b- import time: j# |' e3 g; O* U( x( H$ Q
& |. o( K/ n6 Y: g- @- import random
! }' k+ }( m, B) v) U% P5 q0 I( g
* z6 X( y& Q' n% ]! q- import sys,re! |+ M& l! S* U/ E
- " V }1 i6 U' F9 Q4 z' g
- from PIL import Image, ImageDraw,ImageFont. M# ]7 V; S) x
0 ?. L; L* V4 K' z. S9 r- """ 你的 APPID AK SK """! `7 d# G# d: T3 H* I# P- _+ M
- 8 [! u. M# l5 M( I8 L/ ?
- APP_ID = 'xxx', ?3 G7 D% E4 K8 f
- 6 H. U2 D3 ]) v7 i* L% `) q
- API_KEY = 'xxx'
) D: f3 X( r- `& v2 N) |" Q
# C+ j7 z: }! D5 C+ b- SECRET_KEY = xxx'+ F7 M' ? t/ C- K' B
2 {. l* f* D4 ~+ J8 O) \6 a$ s- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)% p+ l5 C9 c9 ~3 `" H
- ! h: f# P8 H. u
- #PROXY = "127.0.0.1:8118"+ f" U) \* j" _2 D6 a6 r5 b; W
- , i% y7 f' ^& W: s* o4 X
- chrome_options = webdriver.ChromeOptions()* {$ r! k0 z3 G" X3 a) q
- * [& ~ H$ T" w$ y$ _
- chrome_options.add_argument('--headless')
" G# h& M. s9 d. y+ T4 f9 y$ ?: d
! e+ i9 O' I$ j5 ^7 c* U5 N9 T- chrome_options.add_argument('--disable-gpu')
e9 q9 f# g2 f2 b/ p* ]9 X' s - 3 U% R2 a$ s& r# Q5 ^- r- L& U
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)! B* \: Z- I: }
- % Q% O4 S5 U) H8 H' _7 }6 h
- chrome_options.add_argument("--incognito")
7 K0 [0 i+ m( I) }# N# h - 8 @4 {3 A& J0 M. v
- chrome_options.add_argument('--ignore-certificate-errors')) P) S; o. U5 {& X
! F" s5 C6 h f. x- # Win
: Q+ L+ M2 g/ f' m! W! j2 w4 _
3 q5 |% J+ ^* W2 S! i- # chrome_options.add_argument("--log-level=3")- M" G _0 L, p
- ) e1 R2 s7 E2 K' S: E* m
- # chrome_options.add_argument("--disable-logging")
" ^3 s' N' n! E8 m; {6 Y+ r- Y
) u z4 K* n6 H; v- d/ c8 @- # chrome_options.add_argument("--disable-logging")
, }+ ?' s) u/ w8 ?+ h
' f& @, t8 z0 n9 C! C* ^1 S7 V- #chrome_options.add_argument('--no-sandbox')
- v0 R) p$ ?6 [) Y, |! c4 R - ' I2 k; T5 D/ v
- """ 读取图片 """6 }9 s) r) x( g4 x
$ f, j) j! n- {) ^; u, N- def get_file_content(filePath):" @: d6 A' F! b ~( n0 h' c1 D
% H# e. u% d" ~6 j4 X& l# z2 O- with open(filePath, 'rb') as fp:
& ^4 T, i: m$ s0 H* G
3 `: a5 J) c* F8 O9 E$ [% X2 G- return fp.read()$ A+ O% _4 @# K% X
- ' ]( e: o3 ~/ I8 K2 s
- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)2 @. l9 _1 F+ O
- # ]- l9 v1 V$ F1 p, I2 Y% B
- driver.set_window_size(1280, 727)5 [$ }! ~% d6 t' j
: X0 l3 ^8 G( B% c- driver.get("https://youlikehits.com/")
/ M* d& Z( G2 [" n+ {9 n! A0 e - 3 X% y$ t) j, n7 w1 r8 i% F# W% C
- time.sleep(5)( }# _# I& d; z: g* u/ q3 }: a+ n1 e
- : F9 [; g$ Y8 R" Z3 U4 J, t
- driver.find_element_by_id("username").send_keys('11111') #user- Y8 z8 K5 S) r) u% b0 q A( K. O
( Y8 O9 Y: L" E/ [3 Q- driver.find_element_by_id("password").send_keys('111111') #password
; \+ d4 P- t% a8 m) W3 q
w8 l# H% j, Y( s- driver.find_element_by_xpath("//input[@value='Login']").click()% ?. b7 @8 d) R2 P" M
$ f$ r0 O' N: j- driver.get("https://youlikehits.com/youtubenew2.php"); s }* T" k: b4 H
6 a% d4 I8 g K) c' B/ r5 L- time.sleep(1)
0 }# e- j- |( k8 Q* z# ]7 k3 T
0 s( r% K6 Y" ?9 h# V1 [" B- #Try Again
( Z4 V: ?) T5 ]' a: d - ; a+ d0 i9 \4 p% e5 H& p
- def checkRefresh(driver):
1 d" Y' _+ |( {3 C, [! w- U/ Z3 Y - 6 q# m9 b- ~2 L' [! m7 d3 T
- try:
! ]" s% m. R- _$ `) b: c
( a8 ^0 i& O+ L( x& m% S- Refresh = driver.find_element_by_id('loadmore')5 k$ A; {* r& L
- 6 C r p$ ?. v+ `
- Refresh.click()5 E" h. y! c e" c( |. G$ D
- ( m) e4 S0 Q7 r0 h2 T# Z
- driver.set_window_size(1280, 727)
* M/ V* p) r5 @5 O7 N
1 m! b& r# J6 {- except Exception as e:
4 Z S' P8 d$ T k% ?! M; [
- d; D- z m0 f- e8 t. w/ w- pass3 K3 q: Y* c- ^
8 K' ]% Y0 w1 B4 B2 P8 b4 L- def checkcaptcha(driver):
+ V! F3 W$ \" r# _' T; h# R0 W2 Z - 5 w1 _$ A3 J* Z' Q8 p6 D
- try:- \5 l x& W: r1 V# l
) b8 F+ c8 B" x4 e9 f- captcha = driver.find_element_by_id('captcha')
d; F8 @0 M6 x- b - 7 K# z1 U4 ^( D( f8 o+ b* ]
- print driver.get_window_size()# N+ O2 [+ N3 e1 Q
- ( h/ H4 j- z+ L4 e
- time.sleep(2)
( i& |7 H7 w4 U& b% r
* p7 g" X+ w# {9 b) T; K! r- driver.save_screenshot('/tmp/screenshot.png') K8 o# r; }3 X( ? y( u" S
- " T+ p% _# t- h; |
- im = Image.open('/tmp/screenshot.png')
* O3 x6 w" A4 Z, b1 ?, u m - u, B- Z, H' |( V: K' [ r
- #取消headless模式
( Q T8 K- [& m2 @& e+ E& b
' U6 a8 M5 I$ Y2 Z8 C K2 g- #a = im.resize((1269, 610),Image.ANTIALIAS)
# H! i& D! c/ ~( I - % d5 {3 I. h, o9 y2 C
- - G* L9 m4 |$ a
7 L! L# L+ ~; q1 a' k- #开启headless模式; _1 m. P% B1 k7 E0 c1 _
# `% U6 M9 f$ B8 R5 [- a = im.resize((1269, 727),Image.ANTIALIAS)
( q. C D8 Y/ |" n/ ]2 Y6 P. W u - x8 h" a i0 ]5 \; Q5 Q- c4 p% P
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
5 h& o4 |, R. y" i f. i- G
9 a' Z; Q5 O) [. i" K% b- left = element.location['x']6 w' h2 J9 ~ `3 E- U( y# O
- ) ^! c; A. {9 c9 E) c5 X- {0 {1 @
- top = element.location['y']
3 H1 W! V+ i( G. u7 r5 j - $ P9 N# z% E! K6 e7 c
- right = element.location['x'] + element.size['width']! A1 P* @0 ~7 ]) @
- ' M x1 h% f$ L4 T0 G! I8 X
- bottom = element.location['y'] + element.size['height']
# O2 o# t# ~& L/ j - 6 ^6 v) T4 C9 ^% {4 R
-
. P- \+ `5 k+ l1 d - % P- O- y3 ]( v+ @
- #element = driver.find_element_by_xpath("//div['#captcha']//img")4 i/ G! ]$ w n
- " e. X5 P: G! p( G W
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")& S" n& A: V1 c
# N% L/ ?$ R: r- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
) w7 r- o; S6 s
( p I' h2 o* T7 ^/ [8 u- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')
H2 S2 n$ k7 t* G
2 }, C# e& g0 f! N5 Q. Y- image = get_file_content('/tmp/screenshot1.png')5 X( O6 ~" b' K3 ? H* x
: ]* }1 O) l1 T) h- a = client.basicGeneral(image), h6 W. c/ I. y- c
5 V1 A) D& ]# H- print a
\% i4 V- c2 B7 H& p - - ^/ V, \% L, X% j/ f% E0 @
- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])+ v6 a7 ^# D4 j8 b) }
- 7 ~. u2 f9 X7 h% z2 v, `
- yzm = re.sub(r'\xf7',r'/',yzm)& v) K+ K. F4 g/ j! U4 c- U
- 8 a2 x7 Q% j+ B
- yzm = re.sub(r'x',r'*',yzm)) ^! k& n5 k2 W7 S% O3 p- v g
- * I6 w. A# f# R: Z6 l2 h o7 M
- yzm = re.sub(r'X',r'*',yzm)
! k5 }; e7 }/ ~1 @ H3 c - $ z% e# D* w& I: z, J2 k* i. x+ d2 A
- yzm = eval(yzm)
" A, @7 W; b7 f - 0 }5 F- E, f& b j" ?
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))
) O8 T) p$ \- f - 6 g+ G; _) U' [9 G8 i$ l
- print '验证码: ' + str(yzm)
* `3 d* P0 w1 B; b
6 ?( Y# i3 p/ }7 E% G1 c& n- driver.find_element_by_name('answer').send_keys(yzm). z. g, U9 d% N6 ?+ L) G
- 8 T' a, n4 X! [5 e' @
- driver.find_element_by_name('submit').click()7 V$ O0 y# S- v& Z4 r
- * X7 u% }, n- G' g n9 B
- time.sleep(3)- @$ w6 I1 b$ ~5 L
2 [ ]9 q4 u/ d% M- return 18 J- A& Q1 m& P( j
. ~2 ^1 k2 _* Q# P) s- except Exception as e:
& N/ Z7 v, n& T: E8 Q; l1 Q& A7 W - # l' d7 G9 t5 }4 g8 A+ I. T
- return 0- B9 f8 ]+ Q4 x5 [; y
- ) }7 b: g4 |( [8 H4 Z: w B
- def followbutton(driver):
* t1 w$ ?1 U" P9 i/ D; h - . k0 F3 C) k S+ v9 ~
- try:' o: d1 V0 k9 u! e. f2 R) U( M
- / ] o2 r' o" @6 t: t, ?( [# V9 y
- driver.switch_to_window(driver.window_handles[0])
+ F, P6 J. P/ \' o - ; h& ]2 q+ ^# R$ k$ E, t
- points = driver.find_element_by_id("currentpoints").text
: z: K8 j4 i* d - + X* F1 K2 t8 e
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()
5 [ l5 d x# U( P - 9 X$ c9 h4 O. F: F
- driver.switch_to_window(driver.window_handles[1])
4 g, \' M2 ]: m; ?
* M: i% z# b0 A- VideoSource = ''.join(driver.page_source.split())
. A+ M5 |+ }7 L) {4 r# T - ; h( }& l) V* F4 A: p
- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
: `0 r. J+ S* A8 c, j0 d# t - # _! y# t, Q0 m6 s g+ K
- print VideoSource
2 V$ q2 f( \" h9 u+ h6 W - $ n) m! J4 S9 I( w# }3 z
- driver.switch_to_window(driver.window_handles[0])
% q( G* M9 p" t) \
0 y H: G8 b) S2 L9 |2 P- print "Refresh..."
) a7 y# ]/ z- t - ( y+ |3 s0 v7 ~
- driver.get("https://youlikehits.com/youtubenew2.php")
8 P! a1 ]! u1 a# d, B4 n) p* M, ` - 5 D+ f5 r2 ` }" U
- time.sleep(1)4 v' {5 ]3 B/ I Y2 b" }2 u
- 6 m6 ?5 i! L0 _6 p4 W- P2 c1 }0 H0 ]
- return points% w- h( q- A+ I C0 p8 ]& k
- ( J6 }/ Y2 ]5 M% ?; c9 m7 M
- except Exception as e:, Z$ h" d& r9 k* D
( m5 a3 H6 v2 B- return 0) r1 l! {+ M" R0 Z# W6 t3 U
& w' x" j2 I7 N1 p7 i1 Q- for i in range(0,5000):
, m% `8 `/ M* w8 { - ; N- \+ `8 O. c. U
- try:" l1 U, s+ Z U8 A6 Z9 b
- ' O. A" _3 J% n% o& u/ V
- captcha = checkcaptcha(driver)7 G# z, g* @* Y9 I- E
- 9 F8 K7 E7 C% ~( a, ]
- time.sleep(1): X/ V4 @( j% N
- " G& Z$ `8 C) T* ^: n
- checkRefresh(driver)
7 K$ `* F, w, r0 x5 b6 d3 K0 d - % @, |) |& F) E& n4 [8 p) k( o6 V
- points = followbutton(driver)+ }& R0 f0 c& L1 n, j- e* c1 I
- 1 F( V7 }) S% i |/ J
- time.sleep(65)
3 E( ?4 e/ n) M; m A% _2 p9 z0 P - $ v d( g2 n2 U7 E
- driver.switch_to_window(driver.window_handles[0])
; s( n2 e/ r; V8 O - 5 {% G+ { c9 }0 l. {; c
- tmpp = driver.find_element_by_id("currentpoints").text
. o( C1 R k) F6 d- T( l
! T2 ~) o& l$ c( ^- print "points: " + str(tmpp)6 P) M/ N6 F$ p/ X5 p
- ! X& c) j# m9 d i1 z
- if points == tmpp:- u9 A. m/ g1 P$ m Y' N
- * @, r. v5 E' L, P9 p; j' M- Q
- print "Refresh..."
- {7 `8 O, [2 w
2 J' `0 J8 J+ h7 R0 e. D! ~, _5 y- driver.get("https://youlikehits.com/youtubenew2.php")' s! T; f) `& E# l/ Z
0 l1 Q8 D! r+ b f* L- ~9 r' I' O- time.sleep(5)# @, l3 T# w5 p% X9 z
5 b# U# E$ J/ d: f x( U- except Exception as e:
" s8 [# ]3 }4 t4 w& p) ~( R f* Y
5 p8 p$ K! t; b0 v" i- driver.get("https://youlikehits.com/youtubenew2.php")
% ?/ ^! \. w9 v
( A% Q7 n) S9 s9 D- print 'error: ' + str(e)' Z' F3 r: `# A% n5 c
$ N& |" t) S* n6 ]- driver.quit()) P8 R n: r7 o$ ]# N6 `- ~; x5 E
复制代码 ! q6 I; i( V& U" M- O
, _ p5 _' _. S) I5 g0 H
( }( K" t! p" L V. J" @! Y5 _ 因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。 6 B$ D4 O& ]8 Y% G. M
* n0 t! r$ W3 I% S8 y0 w" @2 C' r) x
|
评分
-
查看全部评分
|