|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑 ; j# J* b9 ^, h
% Q1 t$ l# l* H+ J6 `& w7 E# o$ r8 c& k( K- u: P) P
- " N( W+ i- g. M/ f; \5 f+ O
- # -*- coding: utf-8 -*-! \# E3 K+ K# B7 ^# |
- $ S) D* c, s! R9 y6 X2 L) u
- from aip import AipOcr5 u$ Q7 V" v0 x; C2 h- U' B
# o$ U# ^3 E \+ Y' ?6 I- from selenium import webdriver Z# i) @) r7 J/ w
0 F% `9 q- Y1 X3 O* m0 p8 z$ A- import time% f) L" s4 w0 K2 w7 U( K6 f1 V" b
- 7 p+ H+ x: O6 e5 C7 n& E, ~% N
- import random
) x: r1 U5 J% @+ I( C3 b
" k' |& j- q# |( H$ M9 n- import sys,re
/ R) h7 ?* W, F" h) ?1 [ - + `, l: K9 I' [0 G7 |
- from PIL import Image, ImageDraw,ImageFont: ^& L) J8 z! G/ M: d, R
- - F. q" w: B) Z9 |* H6 g/ h
- """ 你的 APPID AK SK """
, L& n o0 L5 s. ]1 Q* W - . x; e$ D/ {" H2 d3 I$ p9 b
- APP_ID = 'xxx'
, ]# f; u# b% N& ?* ]- m+ G/ f8 |
) R6 v/ P- a( l3 _0 {, e- API_KEY = 'xxx'2 H5 T j' V; r* X
- 5 [. _# j8 z4 a. r# A
- SECRET_KEY = xxx'
& o1 u" k1 E2 j
( k& r+ G, N) |6 t- m- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)4 \2 c1 g) B4 z+ H: a* S _' }
/ v$ h6 U+ q1 @- `- #PROXY = "127.0.0.1:8118"$ y5 g+ Y- Z$ f! O8 e! {. b1 C' [
- 9 Y; }$ D0 [5 Y# |/ d
- chrome_options = webdriver.ChromeOptions()% q# l; W; s3 \5 h5 U: i" |
; O9 [8 d! @5 _1 C- chrome_options.add_argument('--headless'): t6 D! x' t) b4 Z) w1 b
$ M, M# I9 R; R; @- chrome_options.add_argument('--disable-gpu')
9 q$ L% a' Z; r& c k - . f2 r" g, o0 _+ \1 C: _ W; w
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)
4 F; S9 T( U) u7 S* H, t - # a3 {. A5 R( _* N
- chrome_options.add_argument("--incognito")
$ ?: A+ T6 X7 X q - & P. z- V! z/ U R
- chrome_options.add_argument('--ignore-certificate-errors')& v. {: m# Y" m
& L% N! f! @% c5 P& R& T) s5 j1 p7 x- # Win
! ~. `1 F$ Q9 @ - 4 ]2 R+ ]; K; C g7 B4 K
- # chrome_options.add_argument("--log-level=3")9 w! E* ]; z: z- i% _" C' _/ [
- ' ]8 N4 {7 P7 J& r) M& W8 d
- # chrome_options.add_argument("--disable-logging")
$ a" f6 r- e' e! ` - 3 X+ |' r+ U: {+ @$ Y
- # chrome_options.add_argument("--disable-logging")
; Z- |% _) B6 }5 P7 R. B% k# k
- }+ a H8 i9 }8 Z% {+ M( W- #chrome_options.add_argument('--no-sandbox')
9 L% W f( F4 ^: Q. _3 R
" J6 _, Z! W5 a6 a- """ 读取图片 """% c+ L, h- ]+ h
- 8 d& r. O3 O. e: t6 m* P H8 p" X7 t
- def get_file_content(filePath):1 N; P) F7 G& }5 Z
8 ?$ {: C2 p# @3 u: D7 p" Q- with open(filePath, 'rb') as fp:8 |, A8 Z" E& D6 R4 i1 d& w
- # Q$ P/ o" i" \9 u: N. F
- return fp.read()
9 z0 l z" k4 ~- `4 `6 v$ Y
8 f6 P5 b6 V8 @- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
- h$ B5 n& u4 m" Z2 R H0 q% a0 g: ~ - 8 N5 _9 t- c& e! \( `6 v" L3 n
- driver.set_window_size(1280, 727)
: O& @1 F1 D T
$ Z7 z$ x" b( m7 @- driver.get("https://youlikehits.com/")7 y. s( h0 l. |( B$ I3 A J! s
- # u4 L; K5 u) z4 m6 R+ i2 x
- time.sleep(5)
9 _# L7 x/ Z' W - : ~: [* ?7 q, N& C2 L
- driver.find_element_by_id("username").send_keys('11111') #user. J; e8 p: d j$ L
8 G" W& A# V3 |# Q- driver.find_element_by_id("password").send_keys('111111') #password0 D5 g8 j$ n# k& r
- - O- n4 m# _3 @) ^! Y( @+ m; N
- driver.find_element_by_xpath("//input[@value='Login']").click(); ] b9 o. ]# A1 G
8 w. j: O( M0 m4 R. b* f$ N- driver.get("https://youlikehits.com/youtubenew2.php")
5 z2 I' `6 X L/ {0 p8 u( a7 i - $ j) L+ U$ n( Q6 {/ ^" q3 _3 O3 w0 x
- time.sleep(1)
* @3 ?' I9 X3 x% S/ @' b; X - 8 m" T( I( I+ g- Y
- #Try Again
( P/ ~8 F9 V2 V* t- [4 o! D
! N/ I! l" w' Z: ]- def checkRefresh(driver):
) s( q- e) k- d
% @9 n: L! y+ e( p4 ~- try:$ Z# O% s' L% C: U3 ?6 u) O
9 O6 y7 z$ H: U- Refresh = driver.find_element_by_id('loadmore')
$ |+ D2 j& r6 p; B - ; Q$ w' _. Z3 S- P, r5 i9 x
- Refresh.click()
& y8 [8 A" u/ e9 m2 ]9 i ]
" ?8 d6 ]" S* @9 J2 \- driver.set_window_size(1280, 727)7 ^8 |, V% x8 @! t% k
- / ~$ K. O& U$ D
- except Exception as e:
7 }( `+ {2 P5 `0 { - 1 a: S! c* ]6 S d* H- O
- pass- p7 O" Y6 K. \8 M) ^- R6 }
) f# @8 L0 I: B# E0 G- def checkcaptcha(driver):
) }8 d' R/ ?. ~
# e4 j0 \+ G5 t' @ S* h( k& T- try:# Y o& G+ s+ Q3 J. c' E3 ^
- Z9 z) s/ d, D. D5 @ Y& }7 z9 X( k$ ~- captcha = driver.find_element_by_id('captcha')
G8 V' A9 P$ j1 ]
\+ V- X* i& x; W$ I* J! |- ^- print driver.get_window_size()3 g D. s# w( E& ?6 u
- ; z. Z2 f6 k4 C! Y9 d% E4 y
- time.sleep(2): e5 P! g1 f% P# x; s. u9 I
4 a& N$ _% r5 A8 H- driver.save_screenshot('/tmp/screenshot.png')
" ?# _: Q% o8 `. c: H* o! t
: v q1 H3 r/ W. s- im = Image.open('/tmp/screenshot.png')
# l5 K- h3 _5 \. B! q - * i7 s& K& j W6 K* A I
- #取消headless模式
e+ n+ G+ d. v' d
3 E+ Z0 V$ c/ p/ v1 B9 v3 e- #a = im.resize((1269, 610),Image.ANTIALIAS), F; }3 g( k( n
- 9 w2 c: Z3 L$ f/ R1 o9 V
- ) x* S ]' z( F3 k
0 G9 w& H0 T- e3 \- #开启headless模式
- K* N+ S# T6 M6 q/ C. o( y6 ~ - 7 m" k @0 [# z" e
- a = im.resize((1269, 727),Image.ANTIALIAS)
5 q; B: j+ q- ^1 w* b6 Y0 F. A - : Z& F* N' ]" a. h u
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']"). [" t+ M& I6 x2 A+ B8 D
- 6 K% A2 @/ |" L) F* G
- left = element.location['x']
8 W, ~7 L6 F; z' M
' N' S8 g } B5 d8 l- top = element.location['y']+ s( I2 n2 H: [) T: A0 }# h$ Z
6 r% S3 g; ]# h0 E- right = element.location['x'] + element.size['width']2 _( Y6 _# |. k0 e4 P U7 d M
- 5 [& _) a. f3 Q4 Q
- bottom = element.location['y'] + element.size['height']" U2 ^0 |" o1 w# r+ @8 c, S* u
- ; Q' y; c* e) |; D# @
-
) \- A8 @( i _1 Q, G" K
. {' n, k- M8 K/ i5 [2 u7 n5 T- #element = driver.find_element_by_xpath("//div['#captcha']//img")% r$ ~2 o" h8 l9 Z, o K2 y
- 6 u$ G5 T- p- }
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")
/ ]& U J/ e0 g7 z' M
$ B# P3 t% U% I$ U) `; m; x" k- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']") i# L* ]7 i9 ~' `
- 3 y4 x+ E) b- O6 F U" X- l
- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')8 ?( M, W) v' a: I
- / O' `! x: G0 f) k& d5 Z
- image = get_file_content('/tmp/screenshot1.png')% u; A1 L2 P! A% c% ^5 ^% G; e% Y8 U
# x; e9 w" T& z3 t$ n2 M+ U+ |' H- a = client.basicGeneral(image)* s5 N% F+ j: i7 N/ q) q, M$ @
9 W/ h& L: \; q: J' M- print a
1 a0 w+ ]' t, P# Q) O
: d8 {7 \! J- ^( W) ]& M- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])
6 H* y& o$ v* N - 0 {$ ]' b5 i4 m' q
- yzm = re.sub(r'\xf7',r'/',yzm)) m* k; T0 ~ @
- ! ]* X5 J4 q5 i" z
- yzm = re.sub(r'x',r'*',yzm)
! i* Y5 b$ g5 ^& ` k8 v! v
: |# w9 J4 g7 T: z- yzm = re.sub(r'X',r'*',yzm)( U/ k* ]4 ^: T( `! h& t7 [5 ^
' U6 B7 L- x8 h0 ?0 n- yzm = eval(yzm)
$ g5 G$ H2 y. }* M% B: Y
% n1 F) i/ |! {# O9 _- #yzm = eval(yzm.replace('x','*').replace('÷','/'))
b' W8 L0 d- u( U) y% c2 ]: ]
9 x6 c3 R+ n- _ H* @7 p- print '验证码: ' + str(yzm)
8 q# } w+ e+ f) }2 C - 5 ?9 g4 g: b% S* A$ n
- driver.find_element_by_name('answer').send_keys(yzm)
9 R4 f" Z j- h4 V1 g/ u
0 X& F* L* @- o( f! s- driver.find_element_by_name('submit').click()6 Z4 j( Z' ]. b; Y( C. i9 |8 P
- 8 @0 e' ^* V1 G4 S `
- time.sleep(3)9 O0 L3 i5 g5 v
- + _) E; i, V7 ~: w( R+ M
- return 10 g* `( E- M: A2 N
( Q$ E8 Y3 p6 c* a# r: s( q- except Exception as e:. O/ F' U2 S9 [: j( X7 d3 I
- 5 Y) O# r( Y, `2 E2 D
- return 0
; h, w: s; C1 A+ }+ i
# Q/ L- d$ ~: r1 p- def followbutton(driver):3 I. ]- M2 r; \' n' F. ?
- : o- E, B' [0 r
- try:
' |& r: y& v3 e2 F# T9 u+ s# _ - 5 \1 n6 w- ^ {& ?- \5 {, n
- driver.switch_to_window(driver.window_handles[0])
" K J# M* @0 I: ?
, e$ M# E1 O) Y( b; z- points = driver.find_element_by_id("currentpoints").text
* E* l. F/ \; h4 l( W8 j3 H5 a - : K5 F3 Y4 K& k+ [+ G& r
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()
! K, p% U% J, l( }) x0 W+ M) M - 0 H; ^# o" [. p8 I I
- driver.switch_to_window(driver.window_handles[1])6 {2 Z6 z6 O% T
# A9 W. \/ i6 b) b. |) {. V$ o7 k& L- VideoSource = ''.join(driver.page_source.split())
' \, w/ w6 A. |2 F% b - 9 r2 ?( f9 w1 c z/ D# v
- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
7 U3 T5 n; t; q9 y" ^+ `
- V- f2 e% ~1 h4 Q. L- print VideoSource2 A& ~! F* o7 e" j: H% N5 u4 u
- # T# j' s) W- o" k5 _1 t
- driver.switch_to_window(driver.window_handles[0]); H$ l; l9 Z$ A5 ~" Q: O
- 8 V8 x4 y/ |# z8 \
- print "Refresh..."& |! V! Q" a$ ^5 h- i9 Q
- ( @7 a% b. ?- D# H' Y! z
- driver.get("https://youlikehits.com/youtubenew2.php")5 Y6 p. U, N4 O* m5 v. Y) h) _
# X Z5 u5 t5 U- d7 ]- time.sleep(1)
5 Z+ N; Z) r+ a/ Y/ z ~8 P - : `/ ]9 ]7 q5 e$ h
- return points
2 c- X/ X/ \: o, ^6 y' s - # E" k0 K9 m! g! C" B
- except Exception as e:: P% U r6 t% F0 t1 e3 C
- 5 Y5 B k5 h1 B1 e& O
- return 0
" y8 `" m2 N$ k0 z5 O L
" k- X- D. ]% M0 S8 C4 i E- for i in range(0,5000):' B/ m& {6 Q5 T" P$ [# I
- - W. H" y. R$ M" U% M' ^: K
- try:
0 }$ |1 F$ p3 C1 _
: y$ Y* ~5 e1 A% Q: g$ g: L, s3 X: U- captcha = checkcaptcha(driver)
6 B4 F# B( H. ^& V. B0 K: }
) x% n5 h. g o4 S, M- time.sleep(1)* z9 B5 Q. K; O) h
- ' s: N+ ~& K* O8 R h" n
- checkRefresh(driver)
7 K2 M6 m) g O. ^" J1 C& M
7 Y; k( F( a- c% [6 E$ p- points = followbutton(driver)
+ F; ~6 X7 o( `1 {) P' [/ p$ r( z
/ b6 Y$ o8 L, U# z P* a& ? c- time.sleep(65)+ |. h7 l4 A& T- {- L
- ; C4 a$ K" T% i
- driver.switch_to_window(driver.window_handles[0])
- n: c% i* P9 I e Z
) R( p! f. R" ?: Y3 T2 A- tmpp = driver.find_element_by_id("currentpoints").text
8 R- w9 X' e) W3 I
' m3 z- x# C+ q4 b- print "points: " + str(tmpp)
# f. }) L! l8 `& w& p6 H/ p
# Z \. V% q j- B- if points == tmpp:3 k5 F) F7 d- _
- $ ]' R+ f% [( p8 d; k
- print "Refresh..."8 p( K$ @. y w0 Z9 o8 {' N
- 7 C/ A- d1 q' o& v# C
- driver.get("https://youlikehits.com/youtubenew2.php") x6 }3 p7 G% Z
- s+ g8 r9 h# O2 B( w
- time.sleep(5)4 C7 l0 `% v' ]# K
- , }( N2 d+ g" U+ W$ V
- except Exception as e:
( v! u2 w8 e' u! v
" E8 X( Q x- i3 G3 W- driver.get("https://youlikehits.com/youtubenew2.php")
y. _( L- o4 j9 h
/ Q6 v3 z; {; Z" b5 ]; N- print 'error: ' + str(e)( g9 u& N6 N5 i' X ~8 c3 @% `
- 0 Q9 n( s8 H* L8 m9 z
- driver.quit()5 P* [, _3 \# E b$ M7 c
复制代码
+ x) o! E' g# L1 r, d! B" `; R. {; q! u2 Z: T% P- V+ s
8 f7 R# g% `7 q9 p% h
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。 2 o# @/ F: r u& T( H) d
; A0 Q2 `2 e% _% U |
评分
-
查看全部评分
|