|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑
' k: y p+ g) }! m7 l y+ A# w9 P y/ f% V: x! M+ x
1 b! R9 v+ ]$ O& e* ?- / d5 p% A% C( G6 ^
- # -*- coding: utf-8 -*-9 ?7 {! F8 J5 T/ k% @
- ! i0 \- @# C* f. S4 z! S# T
- from aip import AipOcr6 |' K) k" u J$ _, r3 Y X8 H
- $ n8 i/ ]0 K* {" [ D. M/ [
- from selenium import webdriver$ v5 S5 T& b0 J* s
, j7 e# Y6 C3 C& D# Q1 E$ J- import time [% ]! u5 N E9 k
- % R5 C) Q5 ^$ v$ R' C" j: V5 }# V9 o
- import random
+ Q' n: N; ?' A& A; R* u - 7 `2 I; [& p/ r* W% ]
- import sys,re
3 ~" X3 ^& s9 x) L& [1 q- e - 8 U! |( F) H: j! b
- from PIL import Image, ImageDraw,ImageFont! F; ?2 }* D7 o( f" k9 |, z
$ {7 c+ H# n, K, `- """ 你的 APPID AK SK """
) E0 ?9 D2 S1 |3 A* A: Y1 g5 k3 D - ; |7 Q# a0 b; C8 v, I
- APP_ID = 'xxx'
' o" f B7 E. D. d - 3 y: [: s) l' {6 K
- API_KEY = 'xxx'
) n1 J& }0 o4 |* L' o - 2 I; @- }7 M$ D7 G# _) \
- SECRET_KEY = xxx' f0 Y' l- O* l( u2 v
- % ~4 M( q' e8 q
- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
4 ]+ {8 P5 o& t. g
( R1 n- l/ o+ l# l q" o- #PROXY = "127.0.0.1:8118"
; f" R E" U0 W! l/ P, [
+ d6 |# g" a1 s! T3 \6 ^/ D) a- chrome_options = webdriver.ChromeOptions()
! W% b( _" Q! l/ i6 x. T - 5 K) x! ?4 K1 q+ L% y3 R. e& Z
- chrome_options.add_argument('--headless'). \) J- v8 x: o' B6 T' ?
. Q& B8 ^7 @" Z: a$ l: W- g/ O- chrome_options.add_argument('--disable-gpu')
5 z. t9 |$ D6 T5 O5 T* c+ o
: I$ g& S2 Y j& Z- #chrome_options.add_argument('--proxy-server=%s' % PROXY)
( }: {9 j3 \- @/ s( R - & v4 I/ e# ^: ?7 z/ F% |% r
- chrome_options.add_argument("--incognito")
' q! d9 J$ B+ r/ Q( q - 6 c' h9 J/ o2 v. E+ T; o- G/ E
- chrome_options.add_argument('--ignore-certificate-errors')
4 p. z# k& K) I; F - , @, n u& M. y$ Z: X
- # Win
4 c1 v, ]2 B0 x8 T( J: y4 S
f3 O3 d( V3 K6 d3 H- # chrome_options.add_argument("--log-level=3")
$ ^* `1 S% ` G: z k0 Z0 k - 3 A2 \+ a& G) u* h
- # chrome_options.add_argument("--disable-logging")
# C$ D. E* w2 L2 r7 s. `1 [ - : A% k2 n/ R! k+ N8 O- I
- # chrome_options.add_argument("--disable-logging")
" ~; x' O* F1 e' B - ' F/ t7 T% n9 `2 H! w
- #chrome_options.add_argument('--no-sandbox'). s8 S6 l h% k& z2 L# G2 W. m+ F
( N6 s9 A( n+ [- ^+ M, D# ^- """ 读取图片 """( i2 H5 q B3 T9 t2 z2 h
0 I) Q8 h7 h! S# g2 V& v, R2 S- def get_file_content(filePath):
n" x7 ]7 ]7 H4 @) U6 r4 h4 i) o
" ]9 E- u, _% R8 C/ h2 S7 e t z7 ~- with open(filePath, 'rb') as fp:
; f" k! n6 F/ e: t6 ?. ]* Y. P! W
4 z, O) N; A! r( I5 U. u( u- return fp.read()
; N3 k) ^ P6 m8 v' z - & ~/ A/ n+ d# ]$ |
- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
0 E1 {; `7 W+ E$ n! d7 h
+ }$ j1 r, ~4 e( |" w' l- driver.set_window_size(1280, 727)8 z! T8 A- g. V9 d
- * }8 L# V# ^( V/ S" Y7 b- ?( f( T" s
- driver.get("https://youlikehits.com/")
+ Q1 ?$ r5 q u6 [6 L0 ]
. \ S# q; T+ u4 z% c0 ?4 c- time.sleep(5)& u) y' o- Q% L
- . H i$ W% e7 b3 i/ F
- driver.find_element_by_id("username").send_keys('11111') #user8 b2 H7 d# E# I" x/ c
- % t, u6 \* N7 S( x/ e
- driver.find_element_by_id("password").send_keys('111111') #password- k; M1 m# K; S& R6 O$ N6 `
' T8 n, M. G7 q5 O+ G8 c4 H( U- driver.find_element_by_xpath("//input[@value='Login']").click()3 O: e" d# c, }
- 9 m" H1 S# S9 `" a9 L
- driver.get("https://youlikehits.com/youtubenew2.php")
3 q+ w( S) ?9 E/ K- N% X6 a+ b
: x: t0 E" s5 j F, D# @% q5 B- time.sleep(1) j7 ]' r( v- u
- $ t$ x% d# Z# x- f( h
- #Try Again# o6 v) z* o b9 \5 b- Z
) L5 R4 |) u! Z6 W; i- def checkRefresh(driver):
$ q3 m) Y* a0 J) m" W - 8 i* [9 P1 p8 F3 D
- try:6 I8 @. `( O7 P/ y3 n/ [
8 {- n# A( H0 c; [9 L/ g1 j2 t- Refresh = driver.find_element_by_id('loadmore')* u: y7 g; J' n e, ~
4 B7 O/ I) Y9 |( z3 C- Refresh.click()2 [. l( t" P( l; r( W
- & b0 o( P2 T% r) z/ }- M N8 w4 S
- driver.set_window_size(1280, 727)
. y$ v' S ~5 }0 I$ k2 I
3 O" _' t, V& W- U- except Exception as e:
7 m1 E+ Y/ i8 K - ! T! {' P M+ U, _, Q& ^8 M- T
- pass% ~& ~& _# X. D5 W2 y. J
- & p1 J4 D2 a/ v- i4 ^
- def checkcaptcha(driver):
! L1 Y+ f. m5 H; t1 t
- C" P5 W2 T. n# z Z1 l- try:, T' y* _' g& S
- 5 c" I/ V1 v0 l1 I
- captcha = driver.find_element_by_id('captcha')# ?- C- l7 o8 s. H7 j
* L+ ^1 D: x6 L& @% ~2 S4 z8 {- O- print driver.get_window_size()
/ S3 d9 [ ?0 e2 X: R$ I. q4 J* h
+ Q' e# g2 P" a. i! V7 N- time.sleep(2)
7 ] r& L- b6 w' V4 j+ D - 0 }2 V4 H3 w- t+ _
- driver.save_screenshot('/tmp/screenshot.png')
- {6 S5 d) n# w; H - : K/ C6 U9 G; C ~3 E! X5 n! D u
- im = Image.open('/tmp/screenshot.png')
8 o$ w9 ]3 g7 a
/ Q$ G! e! q/ g% n% [* }- #取消headless模式
; O' C8 }5 V( y! I2 S) {8 t1 u - ' x6 j+ f/ b$ m f
- #a = im.resize((1269, 610),Image.ANTIALIAS)* W: H: S8 [3 S0 Q' |4 [( k
- 8 C6 S- U- Z. n0 b- l, ^
-
r; B9 P# F# M, A - / j6 ]1 R' F4 J9 H# a
- #开启headless模式
2 I, I: j9 Z( D, ~
1 Y8 q. { F6 u$ o- a = im.resize((1269, 727),Image.ANTIALIAS)9 K2 D! U1 k; a5 Z8 l0 j
- 1 F. z6 U) ~& c( @
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")6 r$ L, T2 f5 R) H3 h# d
: t$ V& Z, U: Z0 h5 v" J) @- left = element.location['x']5 c" n, j/ }4 r: G2 a5 U0 P1 y
- , E2 a+ l" F* t5 W3 i B# A
- top = element.location['y']
( W# o# Y! j- {! a/ a2 g3 W& p - " ^1 O0 I: m5 `/ k, _
- right = element.location['x'] + element.size['width']
6 g( C3 F' W5 q5 v* M; n4 k - 8 |) w; P3 @" a! `1 X: z1 j m, x
- bottom = element.location['y'] + element.size['height']# v0 N; z, y* e: \$ m
- 0 j& i+ \( C# G# ^$ R
-
. x9 \7 q+ [/ Z, I! ?$ b2 Z
U: C/ M( |# B8 P- \- #element = driver.find_element_by_xpath("//div['#captcha']//img")
7 f, l3 c2 n8 e+ p) y# A% R( K" ]( A
' r( ?7 L4 u, p4 g- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")0 J. J f& d4 A% B& ?5 D
- % x8 K4 h( p1 A1 P6 N" E
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")% P8 n, ^6 c, f3 c5 o) g- {
- ' c. ]/ a; i" G6 n
- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')
* V5 C6 f( ~8 V3 O# a* y! ~, d - / ^3 s5 x+ Q1 ^( d) }( N
- image = get_file_content('/tmp/screenshot1.png'). [& j5 s) h" }+ u; Z6 F3 R- c
- ' y3 x3 q/ Q5 g
- a = client.basicGeneral(image)9 k/ q4 E: i- I; G0 N0 l+ J i
- ! c* O1 N7 }+ J9 c3 {; s2 \
- print a
: ~" @) L/ r: j# c
U( c+ _$ t$ \0 m, a* O- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])
+ V9 B9 O4 @* @. e
0 w9 H' A! H7 r6 O- yzm = re.sub(r'\xf7',r'/',yzm)
: ]& {; a- g9 ]2 l& j* o! y4 \) v/ { - / E4 T* z' f* N
- yzm = re.sub(r'x',r'*',yzm)% [; H% Q5 W! G, j) _
- 8 r) j8 `/ l- j- V; h& w
- yzm = re.sub(r'X',r'*',yzm)# D% Z" T$ J8 ^7 T7 f0 C7 n" ?
- : k9 |' |5 r; O) A- I3 K0 d8 F
- yzm = eval(yzm)1 c) }% c% {3 l: V! G, k) n9 v
- , ~/ s* b1 v3 ^$ O9 @: P
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))
$ D/ }7 S7 l% W: [1 |3 | - 7 o/ @) S2 l$ Q. A# {* _
- print '验证码: ' + str(yzm)
: V7 ^- B2 M5 k/ A( P6 @1 C1 e
. c) C% v9 c" v0 \- driver.find_element_by_name('answer').send_keys(yzm)" L; z7 n/ S6 B3 P3 x) H+ l
- % M2 I( D# ?8 Y& U% \: E$ J
- driver.find_element_by_name('submit').click()
' f" V' I% H* w' U% i2 {- ~5 L, S - : w$ I* q- c: N& O9 A3 ?
- time.sleep(3)7 `, a" R8 k0 I8 s4 r2 l
. `6 B, C1 a6 C- S- return 1* ], W) k4 G1 w$ F1 Y, d
8 g/ r. q+ i Z- e' u- except Exception as e:
8 [; t5 p' ~2 R& H7 Q - * \2 d8 N3 {0 n+ y6 a. P
- return 0
" j1 v8 c: D4 g8 Q! K' E
! J: R$ a( Z9 _3 O2 S+ [- def followbutton(driver):* y, f% n, n1 b2 ]( ?
% O" t, ]$ T Z6 v1 K- try:
* I% G3 y* L2 P& Z/ H - 7 f v! M7 F/ Q, L- s
- driver.switch_to_window(driver.window_handles[0])2 ]- E8 a) p4 ^: R3 Q3 `
8 O# U' z/ d" m! D- points = driver.find_element_by_id("currentpoints").text
. I3 P% x! D9 `# {9 p7 f4 N: D6 a
) Q8 Q# ]4 K" T2 S0 k3 v- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()& P. a! x! E3 G0 C0 h: H
9 a+ A8 ]8 Q/ ^3 m+ @ \0 A- driver.switch_to_window(driver.window_handles[1])0 P9 ]: p7 H% {. K; D4 I6 E0 D
- 0 ?2 H' ?; W( d! z( [& m
- VideoSource = ''.join(driver.page_source.split())
. f0 y# t2 M3 U+ J% W - ! B( {& z8 G7 o7 G! K) W
- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:# u3 T% {, o) j6 F: G
- $ m# [" e" e: b. `9 E* l
- print VideoSource
3 t1 H2 @ ?, O! X3 x" i& T
: t9 l8 @( q5 A. t) Z3 @3 `- driver.switch_to_window(driver.window_handles[0])
: _9 h' C* r8 ]. } I) r) @ - , j, Z7 T7 o' k2 s
- print "Refresh..."
' e& ^9 \4 ?+ E' I+ [4 \ - 6 T5 U+ C I+ P
- driver.get("https://youlikehits.com/youtubenew2.php"): J% r1 f6 L0 W7 u! y
- 0 n: f4 [2 x; A0 S
- time.sleep(1)0 Z) t: _, A6 f4 K+ r
- 7 S' A) ^5 g8 f
- return points
& u! E9 A/ q) K1 {9 t0 g0 ^4 z; r - - s" z! R1 O% u2 y i: B
- except Exception as e:8 \" E% U, v! }: ^* o* y
- ! b; c5 j( Q' f
- return 0# R0 k! c# q4 y; s
, e" s' {1 z/ ?. i2 k. `) ^- for i in range(0,5000):
4 ]8 x# O# a5 a, i! ]1 G3 P( X% x- G7 ~ - ' G/ c; i( H8 e4 Z) ~0 A1 E4 P
- try:
% I( D3 \! r% D4 D& d* M
0 l- m/ Y# g4 v# \- _- captcha = checkcaptcha(driver)
3 m3 @9 T3 J$ F8 E* E - * j6 P- n. M5 M2 t2 ]( Y
- time.sleep(1)
r2 L; D3 J, K+ H4 ~9 o R - # C4 V+ D; e+ T
- checkRefresh(driver), L9 ]) o6 u$ d
$ K) k& ] k1 k( A- r- points = followbutton(driver)$ c( o B# @" ^
- 3 G, J. o: J& u/ U. |
- time.sleep(65)
* q; q4 G! b9 b& U3 p* o
, u4 f1 b6 u y7 n( `7 i- driver.switch_to_window(driver.window_handles[0])
6 S6 a9 z) c) F8 [3 | - B% W' K R& J. V1 g
- tmpp = driver.find_element_by_id("currentpoints").text! C- T% }4 G' y9 A! H& x! t1 W
3 [/ S# f* ]* L6 a- print "points: " + str(tmpp)$ m0 _) N7 t- I) Z$ u
- ( a6 d @8 |, \- y3 V+ W
- if points == tmpp:7 o+ `; g6 w; d; a
- 3 o( [7 ~& f7 @" P, p6 w& T8 I4 {
- print "Refresh..."
3 x5 v8 Z% K% p. s - , U( V' j, U$ }- a8 h
- driver.get("https://youlikehits.com/youtubenew2.php")
: I& |: [3 e$ y - 5 h9 P/ z: P1 R( d; {2 S( d
- time.sleep(5)
( g# a) _5 p8 C9 k2 h
9 G7 {3 c, p5 u) c8 b- except Exception as e:
# d. v, f7 e' W8 ^, @$ W! g
! d6 ]$ O" @, }/ E( l- driver.get("https://youlikehits.com/youtubenew2.php")* `* o$ t5 P6 v
- 1 E- g0 `5 p5 P& g
- print 'error: ' + str(e)9 l7 X0 Y2 ?( h) j- z; x8 `
- $ q/ U2 |4 v ?) F
- driver.quit()
- C0 w( H9 K# Y9 t) ^- |8 D
复制代码
) Y, A3 N1 A4 h& h& A- x# V) B/ L; U. H7 l
( R! g4 j" T6 b7 E/ W 因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。
: y2 z8 f' P/ r8 @8 ?2 z# R) A
7 q0 ?& h' h: _ |
评分
-
查看全部评分
|