|
|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑 ) o' ~0 ~. W' y( K( w; @" _
- H1 i, z% z- c: L u
5 p; W0 a/ \# }( V
2 C5 n$ R# \4 O1 m+ r5 @/ Q- # -*- coding: utf-8 -*-
5 s5 `( L! Q' m - ( M- A$ J1 M0 S
- from aip import AipOcr
0 z+ k5 U* X4 r* {0 D - * j* s7 ^6 w. o0 [; w* T
- from selenium import webdriver
8 K1 v0 F9 m! U" d& n3 o# W! T
) g q* x' u: f0 C# m; I- import time
& y* [! v* d. h- h5 r" @ - $ i* L: M ]5 v) ?3 i
- import random
0 Y2 l7 K8 k9 r. z! m7 d' ^7 \7 F - ; k: O9 O9 g u0 B5 u# i
- import sys,re0 X% {+ n$ M( m% H2 H
- ; t# ?2 @, u* ^0 T! d* t
- from PIL import Image, ImageDraw,ImageFont
+ G, B8 E" ~: K) R2 C
" H0 {" P. e; k/ ~# d- """ 你的 APPID AK SK """: d9 P$ h' m7 M
3 ]9 z( X' G+ z: e- i- APP_ID = 'xxx') E( `( s, G1 `$ X! `6 B& w
+ o2 w0 `( M# n3 a# N# d. m- API_KEY = 'xxx'7 v4 J6 H: S+ G; B
- * L( f, e: n( e5 }
- SECRET_KEY = xxx'( ~' ]) D. R# W' r) |* l" q4 s
- 1 a1 E: D6 E% ^2 X, N' V- @# ~
- client = AipOcr(APP_ID, API_KEY, SECRET_KEY): N- O* |$ s, r/ C: ?
- ! d" R2 T2 b8 U6 ]% y) v, _( \$ x; V
- #PROXY = "127.0.0.1:8118"1 t% ^ D$ o$ ~1 x( J
- - w3 {5 s2 m* T' N
- chrome_options = webdriver.ChromeOptions()8 i7 @% @$ L- x' V
- 1 `/ N! d- K {! J- O8 ~
- chrome_options.add_argument('--headless')0 f% g0 E" ?0 x: J
- 7 G: `) K% n! h8 G7 g+ D8 h
- chrome_options.add_argument('--disable-gpu')+ r; Q4 z5 A% u( z, r
- 0 D! `9 X2 A: _/ m+ j- S( f5 k
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)4 @2 f) ~ V# Y: ]& ~. ?4 [
- ( [: Y7 J6 F, [0 v% ~
- chrome_options.add_argument("--incognito")
; k9 ~4 P$ U" h0 f: H' \8 g
( A" [5 y) P) W! D3 z( u* d8 E- chrome_options.add_argument('--ignore-certificate-errors')6 x: i; v6 n( [
- 0 [* `- p) T ~) q7 b4 z8 d3 W
- # Win0 v: `- T) C: s; x! d' s
( ^5 I) G5 Z; y4 V: @- # chrome_options.add_argument("--log-level=3")
: y" L6 m& |# k+ O4 o - " W. j/ E- A3 [+ M8 `! ]0 f
- # chrome_options.add_argument("--disable-logging")' j" M* i3 M# c' m6 W
- 8 f/ Q* C- n7 j8 B0 V
- # chrome_options.add_argument("--disable-logging")
5 |$ G5 d! L `! I) ?, g, L9 n - 2 r3 R4 a% G' |3 C. A/ B* g
- #chrome_options.add_argument('--no-sandbox'): k+ m% o9 {0 \
- 7 G- _/ m( Y0 H Q+ H' z0 q0 C1 ]
- """ 读取图片 """: x6 I Y9 r* z, b# O0 X) Q% Y
- ' j' X w3 S/ x9 [/ U2 n
- def get_file_content(filePath):2 l4 m& }. O4 A, S5 D
! ]8 P X7 l1 o4 l- }; L) @- with open(filePath, 'rb') as fp:" w! v* N2 g: ]! u9 V
- ' a. p2 L9 l5 h- k e$ b
- return fp.read()& s+ S# |, \) F0 N7 H2 r
- * ~# L2 ?4 a9 V' L) ?
- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
! ?# I0 ?9 ~$ l W9 ], q5 Q - : e& p, h3 A: ^3 O: D1 L
- driver.set_window_size(1280, 727)2 P% V) r6 @& M
, }) H( w0 n+ E3 T# G% V+ {- driver.get("https://youlikehits.com/")0 A6 y H! X, I
- + e+ e& f( H$ n; u" K
- time.sleep(5)
: M$ z, q; |3 x0 v2 N) |
! {3 v* L! b4 U6 W5 \2 T- driver.find_element_by_id("username").send_keys('11111') #user
& `) N& B$ F7 Z. D% g - / X) S& U7 q8 X& Q, }+ q
- driver.find_element_by_id("password").send_keys('111111') #password6 f8 \/ y3 v" m
- - p0 \5 p1 d' B" h! T! A6 H7 b
- driver.find_element_by_xpath("//input[@value='Login']").click()& {5 A4 U3 k- @- b
; x, x8 k4 l9 k- driver.get("https://youlikehits.com/youtubenew2.php")
# T; C/ S9 E, X) R3 D$ d - & ~- s6 C, O. F- J$ S8 `
- time.sleep(1)
+ Z8 _1 ~) ~& q7 k
1 i& s' H, `& H3 Q8 K' q/ p: \$ {- #Try Again
! h' o1 p: _9 t6 ?, i7 n" {. ^ - ! Q" @! i+ F1 F4 S1 t' _, |" O
- def checkRefresh(driver):
p4 I, Y! ^- G# A0 w - 2 U8 E. z' P/ R3 k( M7 w
- try:
+ T9 T% z6 @! P% Y9 Q% z7 Q - 1 D# M. H3 ^4 N8 r
- Refresh = driver.find_element_by_id('loadmore')
* _; o. s. h0 y5 u7 e' K - 6 b0 L [9 k$ b% D; x4 T x6 q
- Refresh.click()" n& g \& K# l! {' Z# X. @7 ~
3 a w) P' F( V" @+ Z- driver.set_window_size(1280, 727)
7 o# { l/ f' F+ b - : A3 _8 V. j( L5 R7 e- r
- except Exception as e:0 N) a& e7 Y# W6 Q+ k9 E
9 Z$ \ F" f$ j8 g) \- ?( _& u( S D- pass
2 }1 s4 B0 F$ |" ^0 H) @8 p - 3 a3 [6 O, s: X: m) T5 Q/ G1 H
- def checkcaptcha(driver):; x5 Z3 y% V( L& s; S4 p
- 2 ]+ G# \# F: a3 Q) y
- try:
/ R* d6 N" I$ S - ) Q. L& U) ]- H, i# f
- captcha = driver.find_element_by_id('captcha')
* Z4 w& f5 R3 ]1 V: b
% V. s' f" P5 e& y% C- print driver.get_window_size()
0 S5 N1 }3 P" g) t) ~
7 W$ V5 y$ T3 j- time.sleep(2)
2 z4 v# B" x4 s - ! K) T8 N! x! k+ A* B
- driver.save_screenshot('/tmp/screenshot.png')7 n( e& |7 e/ l: [; S- `
" F6 Z1 |1 o T2 N6 i2 G+ X* ]3 D- im = Image.open('/tmp/screenshot.png')# H* A% q% K0 X, z/ M9 a/ @
- 4 J" }% U7 I. J/ Z0 p) B
- #取消headless模式4 p6 ]& }" r- k5 g J" w
- , x z6 |* Y- i% o
- #a = im.resize((1269, 610),Image.ANTIALIAS) g- ]/ b& A0 E1 p- @! ], z x
+ V* a& M# u9 n: p7 }-
6 U! j% c# ^4 \9 t" _' o* s
: I5 V5 }3 ^/ m) p( `- #开启headless模式
+ B- W3 L9 t) s( g+ d `0 A - 9 D& m; @4 p- W: Y5 l: ^; [% }
- a = im.resize((1269, 727),Image.ANTIALIAS): h) }6 F" W7 d3 N `* f
6 j% _$ {- W, F- y- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")% g$ a4 G2 a) e, |3 G
- . O, P' ^$ W! T! S0 d. \
- left = element.location['x']
/ s' F3 c: i- B' W; F - ( U1 e1 z- A& k
- top = element.location['y']3 a/ D0 s+ U+ L$ S( m' I1 `
- & |- ` |; U7 m# M0 Q2 X" t! b8 r
- right = element.location['x'] + element.size['width']
( \ C. q5 b4 K" Z; ]! O
1 u/ s3 @+ K9 x0 @9 j! v# V6 Z) L- bottom = element.location['y'] + element.size['height']
* Q, {4 k9 |0 S0 p/ a
: s: v& Z {5 r7 j+ u, L- 3 N4 ]& f" ~$ }2 Z+ v0 o3 [
- + r' l* h- `: p: w0 F ~& N7 Y
- #element = driver.find_element_by_xpath("//div['#captcha']//img")
( z7 e! C: C d8 I
- S' }/ F( g: O. e; j! L' y- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")
* s$ ^8 h% g4 \% ^
! Z- K) P2 A+ S( x# o% k7 ]8 u) z- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")% o, T! W6 E2 E; q# U
- 5 O) Y1 x, V d, t; y
- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')) h# S6 J! v$ q5 @2 e% ~
- # m5 Z8 R* h/ f) o7 I
- image = get_file_content('/tmp/screenshot1.png')
1 ^9 x0 h- r' l) u - ) f" }4 k5 i% e
- a = client.basicGeneral(image)9 ^% z5 }( R7 J2 v; c i6 Y( V+ d* y
- 5 e; W/ D2 ~: X; \2 g
- print a! O7 E7 B' A. S8 O c7 x
4 _/ |) x6 T( J: V3 v4 n- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])" J/ h4 a- J' i7 y: b
( N' o& s+ @; T9 z7 X7 ]0 W- yzm = re.sub(r'\xf7',r'/',yzm)' m% B& A; }) ]
7 q) G3 d' B {' ]- yzm = re.sub(r'x',r'*',yzm), p# s- z3 a! H1 A; u: t
4 N! ^+ T1 H# \3 i- yzm = re.sub(r'X',r'*',yzm)
) z1 A2 w- z. O: W9 S% d
5 h! }- ~$ W: u( _- yzm = eval(yzm)
1 a5 H. W" X$ Z Q8 i+ a* A
/ E( S# p& S. d0 N) W) Q- #yzm = eval(yzm.replace('x','*').replace('÷','/'))
& ?% {1 O/ K& X( l; U5 R - : m: S# B7 f4 E7 Q* _
- print '验证码: ' + str(yzm)8 z) l& T8 n( s" d5 I2 d6 C2 }
- 2 Y1 l" j: y$ t% C
- driver.find_element_by_name('answer').send_keys(yzm)
8 I9 w$ \0 h' e8 M2 W9 ` - 4 I5 K2 E x# u$ a: v( \1 z D
- driver.find_element_by_name('submit').click()
* C# D# A+ Z0 O$ D
/ ^ u7 _. g( M1 M5 D- time.sleep(3)
" V7 A0 w6 N7 ?% H) a5 Y% M4 o5 G
. `6 R* O# P7 B" z$ l- return 1. R8 Y7 q2 H2 u% X8 D3 E6 T6 U
- * T8 N) a: `; k3 E
- except Exception as e:/ z8 Q' P, t4 Z, O8 v; f1 K' m! n
+ N; ?, \* ?# g- J' }0 A1 J- return 0
+ D+ g7 e8 P6 K. } A9 I - # i, S. n# y* r' P
- def followbutton(driver):
+ x, y2 {6 X* O& I9 }9 D; V
4 e: E9 p8 A7 V1 _7 M" h4 p- try:$ E) b$ |# g7 l: v) M! |0 V/ D* ^
- 9 G( X( m) g6 S5 ]8 D
- driver.switch_to_window(driver.window_handles[0])
. [0 |5 d+ N8 }# i( a - 1 z( F( I, P4 i
- points = driver.find_element_by_id("currentpoints").text) b6 P$ r' ^! k% b6 o& m
3 L% Z. E. l6 e# v# _- j+ N2 [- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()1 H; d# U, \1 g: r0 u
- " E$ Z# f, j+ F
- driver.switch_to_window(driver.window_handles[1])
5 ^$ x, K" }4 Y& [
% Z" X& P& Q0 g' b# @1 E- VideoSource = ''.join(driver.page_source.split())
" u u, Q! s3 Z2 P; k$ @4 o - ; C+ G5 T# Y% S1 t9 ]6 \/ v7 O
- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
+ ]) t, [: ]& H6 O# s
) N1 ^6 g) t3 S1 M- T- print VideoSource
N2 K! D2 |, s! T! l - & t6 J) g2 S- j
- driver.switch_to_window(driver.window_handles[0])! n# [, o6 m. D7 x. M: i, P
- ( Z* g3 P6 M; |4 B; q
- print "Refresh..."4 O9 @$ N1 a( N' p* o
- 4 T) a. [; @! A+ H
- driver.get("https://youlikehits.com/youtubenew2.php")
, ^6 ^ h% n) R9 o/ e
! v0 v8 f8 ^% ] k/ L8 m- time.sleep(1)
& T+ [! l: v3 F7 M - : ?* b4 l3 B5 J6 B" |
- return points$ Q3 [( U& P0 \8 h' C
- ( n1 E2 w1 l0 \+ ^3 H' q
- except Exception as e:- c) p. p% [5 e4 k
- . |; J6 _; O9 V8 ?7 f6 {& }
- return 0
" d: \) k2 k0 d& C - , t0 V" B: ?. V! }* E7 F# t) T
- for i in range(0,5000):
! {# E, J; N+ O! M
: I% r* R( u, i6 U% k# o Y k/ p- try:' K9 n' X, j! d$ z
! ]- V9 ~9 L! L9 c5 T- captcha = checkcaptcha(driver)
$ I8 ~, K# |+ \# {# l/ R/ Q - 2 |( h- x" I+ l+ d9 A0 o
- time.sleep(1)7 b) m% ?8 P; _8 M
; A8 B' V8 Q; t% y1 z: [0 _& `- ~- checkRefresh(driver)2 D' J( }6 T( t/ _! F& T
$ u( o! [0 g. X. Y+ O* z- points = followbutton(driver)( z: |, u9 X6 g: n! R4 _2 M
- % X" n& x( J% z( S' x1 e* z% u
- time.sleep(65)% W& l3 _) K$ h) N5 O0 R( h
0 m2 e& R) H0 ]- driver.switch_to_window(driver.window_handles[0])
5 `6 S3 y; O# l - $ q, m( ^) v; ?
- tmpp = driver.find_element_by_id("currentpoints").text
, o3 F& ~2 m+ C( d% z5 i
o' v' s/ O% K" m9 Y- print "points: " + str(tmpp), C, { N8 V' C2 M
- 7 {0 z+ Q' R6 ?! r; |
- if points == tmpp:
6 K- o+ q& G6 B! k* z& D - 7 Q2 E6 f3 v+ @% _8 P1 O- _
- print "Refresh..."" Z- a) v' v0 _1 G# ^- c6 I
. Y, B( A* ]0 a: @6 w! f- driver.get("https://youlikehits.com/youtubenew2.php") `2 O0 Q# A5 e
- ! l, ^' I% V3 d: |- x
- time.sleep(5)
+ O+ k5 E' d5 T - , Q1 U5 D9 F6 @# z0 c( T/ W
- except Exception as e:
1 ]" T& E7 d: h( p2 B/ p
, }& `$ ^8 v" b3 \/ ?- driver.get("https://youlikehits.com/youtubenew2.php")
+ O' g& Z ~* o5 y$ U" A- {
0 U0 A4 R1 g1 ^. C: Q- print 'error: ' + str(e)# \7 j+ _; H u! M) T' X
- 8 t( V( A9 l6 V/ }5 ?
- driver.quit()1 W4 x0 {' o# p! o. k- m
复制代码 ( _8 X% G, x* N, F$ ^' z9 j- l
8 ]. h# y2 j1 J1 A1 {& ^9 `$ Q
1 _! b2 l0 C- a( I# V( W 因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。
; Z+ q/ s/ `# Q# K, F8 [" m* @( J& Y
|
评分
-
查看全部评分
|