|
|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑 , _7 a( h& c( Y9 y! d
7 J* ], {, f- z* [. |4 |
" g* Q) q. L9 ]
! s& e4 F. I; t- T, J- y6 N1 r- # -*- coding: utf-8 -*-
8 `/ k- F1 ]" v
; x' J* p, h5 E. t- from aip import AipOcr
0 S- ^! V' y" i$ k. E0 n' u, O* M
% }. y5 U0 r) |+ U: i) t- from selenium import webdriver) w9 d S5 ?9 o& K
- - N8 k* _( |+ v1 Q! I
- import time
+ a7 B1 g: L4 g: b3 `
N# z Q( {5 H) z- import random
& y4 K9 X$ ^( u ]" K" K+ q - 8 o4 i ~5 a2 X9 _4 R" T( n5 ]: i k
- import sys,re
7 i' i5 I- P7 k8 @ - 2 a. }. O' ?8 Q# ]0 z
- from PIL import Image, ImageDraw,ImageFont9 |* O+ `; K; G. j! Q
% L1 p8 p. o0 d8 ~! z( U- """ 你的 APPID AK SK """
* k3 k. u; x J - - R( H' f [1 c$ ]
- APP_ID = 'xxx': T. @/ u* F' D% O! |# @4 _3 U
- s% g) n( B4 P x- API_KEY = 'xxx'; Z' y" R/ y& T- z7 S1 V
% T7 S+ I8 D! w! s% M- SECRET_KEY = xxx'
/ a/ c2 h8 x. | ~ - ; D( ^! j6 k2 Y- m
- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
% J) K" h3 l) I9 z Z - ' h4 o7 u ]( n- n
- #PROXY = "127.0.0.1:8118"& T; L$ O2 x5 A7 r( j
- # X' _! |9 {& i( l; x
- chrome_options = webdriver.ChromeOptions()
9 `1 B4 Y( n# ]6 Q( ]5 Q) C5 K
5 p+ m0 s d5 r3 x- chrome_options.add_argument('--headless')* n9 i8 J7 L3 G. X6 R$ L% K+ Z- H
, w" l- y3 A& _1 x+ a- chrome_options.add_argument('--disable-gpu')* ]3 U7 E @7 U- L4 }
- ! }4 Z9 @# q5 P" Q, ~ [& u
- #chrome_options.add_argument('--proxy-server=%s' % PROXY): u1 d# |0 C8 b. P, b
7 Z" T& F* o& }+ @6 x: R, U- chrome_options.add_argument("--incognito")
0 u3 z% c3 B% j) a- i
* c* p1 |* \+ j1 }5 ~1 K- chrome_options.add_argument('--ignore-certificate-errors')) j% T0 G ?$ R( h
5 ^3 X) }$ A; R3 G/ x- # Win9 y( T8 u- Q* B. {( S4 C& z& e$ ^
1 g9 v5 B# T. t8 l$ H- # chrome_options.add_argument("--log-level=3"): U7 A. Q3 Y. x
- " i: y. d- {% F; S4 C
- # chrome_options.add_argument("--disable-logging")4 ]4 ~2 r8 A7 Q/ J
o& D7 I6 e" F# ?" w! K- # chrome_options.add_argument("--disable-logging")
! F1 y9 t- B" R. p3 k
: f( X, p ~* Q& M- #chrome_options.add_argument('--no-sandbox')8 Y& [" x7 L' n. V
. }2 a: b1 @3 g7 M, A- """ 读取图片 """1 g; z- S" y, y) v% x
; ?9 ~& W V4 \+ p3 P( a5 f& M A/ t- def get_file_content(filePath):
. O o$ y Q) s- S - : L1 y! B9 N7 D; Y6 h
- with open(filePath, 'rb') as fp:5 \) z: @4 f) ^& a; K/ C
- ! I d. m( a6 B1 Y5 @" ]3 {
- return fp.read()# b, t2 l1 T6 K+ R3 w
- ! @9 e& ]% T6 Z0 d7 ~# e
- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
7 p& p' @7 {$ Q% c5 z: Y - : c. W; l# O P+ Q, _% v0 P
- driver.set_window_size(1280, 727)& b- r$ }! V/ B7 g) L4 m
) s' ^5 r1 I8 u# f! D% o- driver.get("https://youlikehits.com/")
( Q( |: _: l# y% n$ ]* a- s
0 l5 B) W9 n! `% i8 A- I# H x- time.sleep(5)1 v- p: r" k5 P: j V2 b* {
# \9 u9 ~- W3 [2 g8 K. d- driver.find_element_by_id("username").send_keys('11111') #user7 k, I9 t9 k& [1 M3 `
" s$ { y2 m4 y. `, ?- driver.find_element_by_id("password").send_keys('111111') #password
3 o$ ]. ] p1 S$ z+ l4 i - 2 s; B' V) I3 F/ n# W$ x3 b- S. ]
- driver.find_element_by_xpath("//input[@value='Login']").click()
$ f* |0 s- c3 t! b X
- a. N" L0 V- A* d% n- driver.get("https://youlikehits.com/youtubenew2.php")
; h {3 x2 U/ l- z" i2 n; `
( o+ b7 b7 U* G8 a$ x0 ?- time.sleep(1)
; v! L1 g X( V" D
% {- ]3 p' p& [3 Z% C+ `) G- #Try Again+ G, s0 r: I; k# }8 @
$ P% |2 T4 {5 Y8 o6 r; x8 o; H- def checkRefresh(driver):3 o; l9 q/ n6 u% i' u
3 \, ]2 a2 t7 p2 z* V6 o# w/ U- try:# p; z. }" |1 O5 @$ w% E: O
- , }2 p* }& J8 m! s& `
- Refresh = driver.find_element_by_id('loadmore')
$ Q9 ~' q9 z" s; A; x k
* Z6 F6 s* e% k, B. D p- Refresh.click()+ Y, W) ^5 b2 I8 S% J
2 C0 N) m7 q6 S+ n- driver.set_window_size(1280, 727)
$ P/ [3 n+ m8 \ - : [9 E' F$ M3 w" e+ ]9 R) ], j
- except Exception as e:
4 h+ u* a F# u4 C- ^9 o - 0 [, B# P' {, K7 ]0 \
- pass
5 D9 M2 c) O9 k- _& k% ~) H - % }# q4 [6 }5 Z; k
- def checkcaptcha(driver):. o; z6 j$ D( N- Z% w
9 a: U( h z. c" c- try:
) k4 v2 }9 x- }- P1 C; K - 4 R& s; Y( Y/ }* y0 F: m
- captcha = driver.find_element_by_id('captcha')# ~5 e, O, [9 d* O
g. H( ^5 [2 q2 D, W9 W4 w. N- print driver.get_window_size()9 s' V* C/ o3 p2 M3 E( r. E
- % p3 p* v4 [/ d
- time.sleep(2)
' K. I1 a. k: @$ j* C - ; e ]1 M. V2 i: M9 h7 f. a9 m2 R' h
- driver.save_screenshot('/tmp/screenshot.png')6 F% Q2 j: i+ m! C9 \
) j; l3 q: `* [) y" e d8 ^, s- im = Image.open('/tmp/screenshot.png')
) \2 D' @2 p" N( F# ] - * O) O! f" b* {9 R/ ~
- #取消headless模式
6 f e' b i, t. T, Q1 N - % e) I! a/ g+ G) M2 d
- #a = im.resize((1269, 610),Image.ANTIALIAS)
; [) o3 A; }+ W/ ~; J - @, i4 x% ^. n/ ~9 p8 J
-
7 B% v$ x. T8 F( ~3 P$ L0 I - ) Z1 ^" D1 v9 j+ J8 @9 w
- #开启headless模式
, Z }' \# R0 v& e! H7 g& K4 J - # C! R% T! k4 x" z2 N8 R" W2 M
- a = im.resize((1269, 727),Image.ANTIALIAS)
% u' d; d! ]7 @! D2 v
# g# i/ ^6 o7 F$ p/ {( Y- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
. u$ s# ]8 n: U. x9 w
4 M# Q, G7 j8 c/ E- left = element.location['x']0 n! \; @: e, s9 Q7 I2 [6 X- V
- & T+ B% \/ i0 N, {; ?( w7 T% W
- top = element.location['y']
, q) b2 M1 X# B - 1 k2 I, K z+ t
- right = element.location['x'] + element.size['width']- p+ F5 D# F P1 q* t; F* i+ L
% v" p# H; K- u5 o- bottom = element.location['y'] + element.size['height']; T3 V1 R, g9 _# J" k% i
9 p* k; d, q" Q( _- % T6 l& }$ D; R( F9 E
- # N% h2 z7 I/ i; L% o, Z B: X
- #element = driver.find_element_by_xpath("//div['#captcha']//img")
6 f2 p% F" G2 h7 H: ~2 ^+ `
6 ]* v2 B4 \. U7 B- h6 r/ o- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")9 `; Z1 J: W$ r, ^3 \# t# B
- . R( R: j2 d7 ?" x4 B
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
5 G( M' p) ]! E: m - ( O* X( X4 ^1 p7 w
- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')) y4 L, v0 F- ^
5 w2 E- M2 _: Z0 t7 ]* V; H- image = get_file_content('/tmp/screenshot1.png')
+ |3 v& S( C9 w' F& P
4 K& H& }0 `" J, Z2 `% Y! ^+ l- a = client.basicGeneral(image)
! F" m3 D. t! Q- [
1 n* s2 @* f1 `8 {. v- print a
/ } M6 L* ~: Z. `% \3 a - ( c+ H5 ^" P: @) g
- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])1 |6 @2 c; z1 z8 k. p8 O& T# K+ y
* b3 W! z" z7 E- yzm = re.sub(r'\xf7',r'/',yzm)- x( `4 I# D4 b0 R9 Y4 W
- 1 P- W3 ?2 H7 k+ ~# s* [
- yzm = re.sub(r'x',r'*',yzm)
' g: C9 `' [' W7 {( @8 L( |) H4 B6 ~
6 W* c% n$ z. M- yzm = re.sub(r'X',r'*',yzm)
& g h) s5 u' N1 S& Z+ o
|$ k0 H; |% |$ _( z) K/ O- yzm = eval(yzm)% Q: ]* d1 s+ s3 b
- T: W7 A& K2 f. \/ S
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))" ^* L1 Z3 h3 `+ U" z! I% }( R
- . g. {: y$ }3 ?" M9 R E
- print '验证码: ' + str(yzm)
/ V# h, }3 _) @* f6 A0 s - 9 V. Z! t5 n, M% S l
- driver.find_element_by_name('answer').send_keys(yzm)
" w! Q. R5 @7 b0 U6 {& I - ! j# J- E+ H% Z, O% |
- driver.find_element_by_name('submit').click()* M$ U1 H% V* F
9 s" J; N" T3 l1 o- F) y- time.sleep(3)3 t" ^4 D- ]4 h' A1 k) a* T: ]
% l8 w' k1 ~5 C$ J- A- return 1 x0 j6 o- T l/ ^% W" t
: o$ {; y; V% p% F8 S- except Exception as e:1 l, K5 c7 c/ k' w( k' k# N
( m) d! @. b. t. Y- return 02 T; b3 O* A) n i% Q c' F
) Y8 G# ?/ A* z6 a$ e- def followbutton(driver):! ]: i- N: m7 O1 v5 v4 ^
' c/ u0 o% \4 Q- try:
) b" |9 X9 U6 j - 9 G; U- A- \, V$ O+ V7 ^% @' G
- driver.switch_to_window(driver.window_handles[0])
/ O4 t) w2 i* j8 `& z$ D% U4 d - + Z7 S% R3 }, g1 F' ?
- points = driver.find_element_by_id("currentpoints").text
| c# Q# @; |4 a8 h. W1 x
6 G% g+ `9 m- F. T5 Z( `# I- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()
) e. D0 |/ R' e" p - ; h9 H! n; b$ |6 m
- driver.switch_to_window(driver.window_handles[1])& M9 d4 W8 C. F; ?4 w# w3 Y
' ^9 l: N( _ a- VideoSource = ''.join(driver.page_source.split())9 J5 K! q/ \( q
- " D, ~8 m1 M% V+ J2 ]7 U. d j! ^& P
- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:: S+ ^: u# V- R/ ^5 w
- / Y% w. `* [6 v- e! y! V1 f/ k
- print VideoSource
# A- C, [6 l7 x% i' p; n% X
G* x. N* Z M7 `4 _- driver.switch_to_window(driver.window_handles[0])# g9 s7 E) I0 x
- . M# d" u: k0 c8 Z: ^
- print "Refresh..."+ L+ j" a/ a0 x
3 p/ y* p" S" F) c ?& O- driver.get("https://youlikehits.com/youtubenew2.php")9 t5 S1 s8 P3 u e" k/ R
9 p( W3 N, c) D/ e. k! g- time.sleep(1)
) ]7 o% a! D- d' f4 O( \# y" d
+ P9 i3 R8 [. Z: h; U6 [4 x, S8 O- return points
9 R4 s. {4 d2 ^& y3 v
n) S1 x0 R6 M! s( `% M- except Exception as e:6 s) ^9 s, v9 R0 ^8 _ B6 n
8 M' t; K6 Y3 ]7 ~, m- return 0
8 [" A- x0 o/ a1 W: s
; o& i3 P3 ]1 J6 o4 f* z h' z- for i in range(0,5000):
7 X# y. t0 b8 x9 Y: H; }1 Y
8 }& [* }" e/ r3 f- try:( r, v& a6 i" ~ B
- 4 R& N1 ]* B- E# ?$ T8 J1 g1 \
- captcha = checkcaptcha(driver); m: A6 d6 J) G' b* y
- + Z# a# W( k. o$ e% X( t3 d$ b
- time.sleep(1)& m' ?$ y9 m# m' H4 ^# Z( o: S; n
- 2 \& }4 m8 ?- }6 N4 O% G. U
- checkRefresh(driver)
" s7 U; m1 a: W2 w( E' s( O - : e# p# b' k$ }+ r
- points = followbutton(driver)
% E' w9 W" k& p - : Y4 i/ Y3 s f
- time.sleep(65)
! l+ y* o( n, v( Q( f8 h! e7 v - 8 ?: ~+ k- Y9 R- ~( P8 n
- driver.switch_to_window(driver.window_handles[0])
5 a0 |3 B2 D6 R7 ]' w0 }
# Q" r4 N/ d! j1 l- tmpp = driver.find_element_by_id("currentpoints").text3 {1 t4 _ Y3 `" X6 m3 c) D
- 7 O% e, w) h4 g' h. c8 r
- print "points: " + str(tmpp)- N% K8 A0 W! b0 B3 g$ F( y
& e$ s' [, P8 s; w0 \+ V- if points == tmpp:$ w2 t$ a, }2 L- A/ N7 @
- # S' \4 d. A' H% x1 M# G
- print "Refresh..."
* b4 H c* K2 e+ z, Q - $ g* w6 `( n2 z/ } S
- driver.get("https://youlikehits.com/youtubenew2.php")! P) W! S. S8 n& s6 O \
- ; {6 C# z t: Z z, m+ ^+ a2 X
- time.sleep(5)
+ s9 |3 b& C* z( H( Q+ K4 y
& E5 e8 @6 j0 z4 t; t, d- except Exception as e:0 a- m* ^9 s1 @6 f
( J# e- S3 z- A1 F/ i$ @ ?1 O- driver.get("https://youlikehits.com/youtubenew2.php")
1 i! N* ?3 b4 R* t8 e
$ o [: H+ Y3 k" Z* P4 z* b6 k- B- print 'error: ' + str(e)/ q8 p4 \5 X, e$ ~
- 7 W% Y0 _" p, b$ i- C0 {3 T6 v
- driver.quit()$ C& U* J' r" G) i( b* M- t
复制代码 4 g/ I( X3 X% n$ c3 u: {) y
/ ~/ C; p# l, n( _* g* l# a5 Q( b
# s5 y. c( u9 ^: j: J
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。
0 d) I6 \& }3 J& c _+ G# Z" I) _+ L, p& ~2 s9 i! a t: T7 Q7 `5 f
|
评分
-
查看全部评分
|