|
|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑
, m3 [' O# L) r! X: y/ W
- }8 h" i9 v* }8 W- c8 Q$ a
& e5 K1 ]( J! ?
$ a" {1 `, ?; p- W- # -*- coding: utf-8 -*-" c# K; g8 L6 u9 O. p
- ; s! `2 H4 }' ]
- from aip import AipOcr
; g Z! X F6 _% u2 _8 h - # E3 b! e! a5 ~' }) A6 B
- from selenium import webdriver
. ~2 O" a- I9 d* E9 [6 X
$ g4 S; b6 o. [- }$ Y1 B0 e- import time7 Z0 l: S0 ^, `
1 ~8 G/ j3 @+ z5 |- ~- R8 f- import random( O9 R/ C2 A" [* d
" z* T/ ]( M1 D& }; w7 {9 w- import sys,re
, G B: R! e9 [, ?
, L0 V3 t! r) w" p+ o3 i: r- from PIL import Image, ImageDraw,ImageFont8 F6 E1 y/ U3 R. a5 t% D& K5 O
3 f- w, o; ]. m5 L/ }. L- """ 你的 APPID AK SK """
5 @ Q) ^& }. o2 ?, o* _% R0 V) Y
" T! |: x7 H* E- APP_ID = 'xxx'
% B2 o, ~* A" K2 _
& t; C9 @+ P( W2 Z- API_KEY = 'xxx'
+ |7 c0 S# e# U. [% d; { - 9 K; Y+ i. }! e* p- t
- SECRET_KEY = xxx'6 }9 p3 `' e6 c0 } W
" _5 @' K8 |. y2 x5 u S- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)! ^; n" x) I# ?3 T
4 k/ T, s+ G/ @! [- #PROXY = "127.0.0.1:8118"
, A0 G! _2 j9 p, k: y: s" } - 5 y, M3 T/ X3 ^1 W( N$ y
- chrome_options = webdriver.ChromeOptions(). y* ?. K2 ~+ e9 n) R! d
4 D9 z9 G- f+ G, F' ?# j9 u- chrome_options.add_argument('--headless')
3 G0 y C0 h) z( I: `
, f) [: _6 q6 S5 O) l; a- chrome_options.add_argument('--disable-gpu')
( t0 g1 m: \& u - 2 }% y0 ~0 l! C. D' a4 @! b, @
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)
" [; O, e7 C- l% X
! Y2 ]6 e7 P- X: k) {, s) T* n- chrome_options.add_argument("--incognito")
3 a5 _' X( a$ i& I. ]" ~- l
: { Z0 U1 H& ?5 M" P3 ~- chrome_options.add_argument('--ignore-certificate-errors')
$ A" O# i+ m$ R7 @8 C7 u3 n - 0 N# s& Q. C7 ~9 _5 s: O: v
- # Win
+ J6 Q. V ^+ W$ [. @6 ^* ]
0 b! Q1 E4 [9 B6 k& {+ [- # chrome_options.add_argument("--log-level=3"); k. b. s! N1 H7 z
- % @# n0 {# m* G: g/ N
- # chrome_options.add_argument("--disable-logging")
* k- N! w0 F4 I* {
4 Z# T% @2 R8 h" q- # chrome_options.add_argument("--disable-logging")7 s! {* G" V6 \: m! T9 D
- * a$ k6 X" a" V' R& W5 `: @
- #chrome_options.add_argument('--no-sandbox'): J" m& d9 v( I! v8 x
2 M& `* m) d- Y3 O5 |4 W- """ 读取图片 """7 M; E( P( J C0 s3 M8 U3 @
- & @# V3 [* c8 U& c# p
- def get_file_content(filePath):6 w0 K, g" t" ]9 v+ I! @
2 i3 X: O+ V/ x7 {- with open(filePath, 'rb') as fp:. Q' C, S) O( J* I. P- N
8 z4 Q+ P! [1 N. y3 y- return fp.read()
, g+ ^. G7 v2 w0 w9 j; F
' s6 ^- k6 q2 p6 \) x: V$ y: A- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
: p L3 S5 T8 C1 ?. { - $ O0 { U6 v% l
- driver.set_window_size(1280, 727)5 T' }! P9 t) T' k4 k# t+ M
' |. Z: Q% v/ u; H* W/ u- driver.get("https://youlikehits.com/")5 K: Q% e; F' b7 B7 j
- 4 n+ ^. E' K. D' m$ C4 \
- time.sleep(5). z* }- b' s, f' T4 _9 [
! a- x% g @; S/ `1 t4 V0 v4 {! a- driver.find_element_by_id("username").send_keys('11111') #user$ n$ u/ b( ^1 F, f4 K, I! T) e$ V
- 0 q: F/ l5 U5 s' O/ w" ]4 ^3 P' s) {
- driver.find_element_by_id("password").send_keys('111111') #password
) @; l" L3 t6 q6 r1 p/ S2 H
4 B+ Y* w1 B% K3 J- driver.find_element_by_xpath("//input[@value='Login']").click(); h' q. P! }. y3 f, L
- 0 y5 T% | E9 i$ g5 h5 `( `
- driver.get("https://youlikehits.com/youtubenew2.php")" B1 R2 u0 L' ~
- # c; c! x1 {1 O* R& c9 Y
- time.sleep(1): Q% l; i6 a8 R6 d1 q7 n B$ g
- " q. w6 M, q5 {2 T% c& U+ ^
- #Try Again1 l5 q. m$ ]# C9 ^& @
- 4 ]7 X Y2 i0 M3 ~( I1 X
- def checkRefresh(driver):+ H4 N5 y) y, T3 q
- & o6 M! h& Y, V1 B7 A- S
- try:
& {/ a* a& g5 u! P4 U
2 ^1 O; E% ]) @' ^7 I- Refresh = driver.find_element_by_id('loadmore')
! t& z# s, R1 R8 d$ c6 i
+ U$ c8 h6 ?$ Z. `' ?/ B- Refresh.click(): Y% d+ J* v( ^7 O. A
) D* S, W- m9 s1 n/ G% A L- driver.set_window_size(1280, 727)
# I: z$ l# U$ m( h+ a( C3 g, W
# B R% o+ H9 }- except Exception as e:
) y9 N9 B: b C0 g! ?
& X3 l6 `5 z: J* q0 J E w- pass
/ k0 O- S" i) ^ - $ }& W# k' a/ I/ Q0 o
- def checkcaptcha(driver):
. D8 c" X& a' H7 [
) {$ G, p: l5 w; W& E! @( x- try:
5 x! v0 k5 [9 v6 f$ |" G9 r - 0 y" ~ c; K7 y* r: J$ r/ W! D
- captcha = driver.find_element_by_id('captcha')9 S8 g: L6 f* E$ C$ l
* d6 D7 n/ h% ]) x- print driver.get_window_size()
! x7 x: t8 F" H' w, q7 a7 P! ? - ; T- W2 q# L/ Y6 ^
- time.sleep(2)
$ E( |' @8 W8 g - - ^+ o2 C: F) a0 ]' j
- driver.save_screenshot('/tmp/screenshot.png')
* n- R1 s3 O, B5 e; E - 7 I+ j! b: \6 x9 a3 i
- im = Image.open('/tmp/screenshot.png')
1 N2 T- u+ {5 x. K; r6 y# r - 3 l# K0 v) A) M' J' C5 ?
- #取消headless模式6 S; p U# }/ T2 P$ A" o; l* I) N
- / x; N: S* @' Y* n. r0 |
- #a = im.resize((1269, 610),Image.ANTIALIAS)9 M0 x$ b- B3 h/ t& L! t T4 u
- ! T5 a) |9 N. a- P
- % K3 \, S1 O3 D Y0 a
- & a! v) F& N2 j9 x' i+ `# }
- #开启headless模式1 E @1 |2 C# \ X
- , Y6 ~8 }, s* W, B
- a = im.resize((1269, 727),Image.ANTIALIAS)$ W7 i7 @; m8 u6 l M7 N
v! b$ M+ ]8 c V; @- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
( ]1 b8 ~5 Y4 |& G; G
. J. _- R0 M, c. Q* O- left = element.location['x'], s7 D( B5 h: y2 n8 E# u
( [+ l: {# a1 J' O- M& M% `3 K0 ?- top = element.location['y']0 f% k; e2 ^( C4 Z. X5 ~
- k9 X9 e8 r# Y' _" h- right = element.location['x'] + element.size['width']& s& K$ D& j+ B' x# l) @9 H
6 A5 p# u% Q$ x+ K. O7 H4 F7 X- bottom = element.location['y'] + element.size['height']
. ^) D+ T; C8 h" _! n( R. b( E2 s
0 S* c, t. w. `* C. k( k-
7 v6 \4 l1 l% m7 o. {( S - L e+ P1 @/ i! e7 z! C9 i# e
- #element = driver.find_element_by_xpath("//div['#captcha']//img")
5 B' T( I+ ~. a5 E# y - ; z0 V1 D1 A. ~8 ^
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")4 b9 J; \! b& Z/ }# x
- ' P# ^ g* B& P( F
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
/ K1 \3 C+ |9 T3 ?/ m8 d2 G# u$ u
* @2 d; ^+ J. F" a5 P- ~: c- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')5 k- o. L: [0 X' P9 l
% q9 x; g: y- s$ q" K- image = get_file_content('/tmp/screenshot1.png')' W5 A8 b, s- W0 A$ J6 n4 @
- 6 G% \! H- T8 w3 g+ c
- a = client.basicGeneral(image)
/ L# Y$ W2 j- F" a$ E
3 }4 N2 V `7 q; x- print a
* C. Z& K/ L; A1 J0 a2 b- I F: `
& U3 M# I6 I% ^' M3 x- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])
3 x6 W. E; W. W3 i - 7 }5 \" s5 f8 z0 v
- yzm = re.sub(r'\xf7',r'/',yzm)
$ i( S' Q5 R/ @9 l; E5 E, p3 |6 L/ F
, `$ m. r* S8 n( f9 ^& s2 G. y, m6 x- yzm = re.sub(r'x',r'*',yzm)
/ y) l9 G5 |0 J% \; Q8 `
1 h! C2 O# l/ f# d5 [ c- yzm = re.sub(r'X',r'*',yzm)
1 h( Z# g+ C- \- d7 D. S
: }; R: _& ?% w+ }8 I- yzm = eval(yzm)- l! l/ P& T9 F. s* s2 O
- 6 \& Q" K! J$ P% S! G: `
- #yzm = eval(yzm.replace('x','*').replace('÷','/')). c, {3 c0 z0 E7 }* E: Q: t
- " X) n* L& r; e6 K, u3 ]
- print '验证码: ' + str(yzm)& G% h4 O6 n& w0 Q1 w5 m5 s( d
- ! D0 n& C @; T6 f
- driver.find_element_by_name('answer').send_keys(yzm)
" n' q! _1 d- @8 g5 j2 ~9 x0 j) _) M0 k - 2 Z& y5 R& u0 V) a. j9 L
- driver.find_element_by_name('submit').click()
8 ], r% J% B+ y l) e* m8 f
( m4 k& ~1 y; L' B5 O- time.sleep(3)) Z4 _$ ?3 Q3 f9 F* @) F
- 3 m5 \2 s a$ X9 t# l
- return 1: ?( C$ s7 h- l& d* c1 a
- 1 q8 R- }2 a8 H
- except Exception as e:
* I/ p4 K+ g" }* y2 f" C3 s - - ^6 N$ z6 U5 Z. n- k" G
- return 0
0 G, m( Y. T! X5 F7 k5 \( H! i. c - 4 }: O8 v" F! j: w( T
- def followbutton(driver):
3 r' ]8 S2 ]$ P
$ [4 Y2 A6 a. r: P9 Z8 G ? u- try:
, `6 y! A1 A0 A% v' R: G% x
9 n5 n" P {) O- driver.switch_to_window(driver.window_handles[0]) ^5 n B1 V& e% D
0 `( k+ H- Z" M3 t1 R1 x- points = driver.find_element_by_id("currentpoints").text
/ y0 D. i" P3 Z) g% R7 d7 {8 Q
# @" Z: h% M! \) E( L) Y- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()& s+ P+ ]& z2 K8 s! L
- 8 C9 A' ?0 [- q
- driver.switch_to_window(driver.window_handles[1])0 o' w3 c& x5 R& x8 `; S
% ]/ F# u" p2 N' m9 \- VideoSource = ''.join(driver.page_source.split())
- G- ^+ c1 T. |# ^ - 8 T* |0 W% G( h! _- Q
- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:8 n/ J2 v3 t' C
% ?, I$ b* s, V2 X- _- print VideoSource/ @! p, }8 Q6 V' U4 c* T1 O
! Y$ l" @/ q, Q. z- driver.switch_to_window(driver.window_handles[0])5 i/ H8 w9 O P
- * b+ C$ e4 J# f7 S2 Y) y
- print "Refresh..."$ _0 m5 L# I' F! a0 T
- : K$ X! H/ N3 a' }4 k$ t
- driver.get("https://youlikehits.com/youtubenew2.php")
" X3 h- Y$ H& p
( e9 w. e: V* _% m- time.sleep(1)
0 W7 J+ H- s- U0 Q& h5 _5 J% n
) j; @% f% i1 v4 }& f- return points
% B* L% t8 j' {5 @' q+ r - 2 Q( V( ~$ f/ h& T s+ _; V4 n
- except Exception as e:5 U( H' K0 T. w1 g8 m3 s0 {
- 1 G) H2 k7 c" ~0 N
- return 0( l; G6 W' Y$ ~6 }$ j! J) h
- / Z( ?5 U& D2 W
- for i in range(0,5000):
' u" a: U# b0 Q; N* m9 S - . X: y7 o" a u. X% ^
- try:0 b/ B- x0 u2 a; j2 B
% ^% Z- p) R; H5 d) n4 z) p- captcha = checkcaptcha(driver). l" _- i" q3 l. U5 \+ Q, p' u
! L' }( x, o, ~$ u# g. k- time.sleep(1)
& k; h# N! G. K0 m3 p6 o
/ G/ Q$ E( }4 a$ z- H0 t; x& N- checkRefresh(driver)
8 Z9 [+ x9 p3 k, `6 p4 O
?6 n" w: ` K, \- points = followbutton(driver)
5 W1 f/ T/ A5 h. y) R! e
8 ?0 l6 E( f# q7 s1 Q+ Q0 s; @ W- time.sleep(65)% \1 {' L+ ?( A5 ` e8 O ~4 y4 `
- / [/ B- t) s! B, R* h7 J1 c
- driver.switch_to_window(driver.window_handles[0])
4 O3 `( I2 Q- n
7 ?+ t$ w5 N( \- tmpp = driver.find_element_by_id("currentpoints").text
- z" M6 c& c7 H8 W, w9 e! p- H
1 I7 C: `+ [+ k" x3 H2 ]- print "points: " + str(tmpp)
1 ^5 Z3 ~1 |. F) ?" |; y/ g
; D( [' p V5 g) O9 V q8 Y. L- if points == tmpp:
D$ v, P7 V6 {7 G8 K. M - " Q$ X5 ?6 l6 }8 G2 Q5 y& d
- print "Refresh..."
$ M8 k. C/ v/ S k0 r
2 K8 ?% r# X- ?% L* b' r- driver.get("https://youlikehits.com/youtubenew2.php")
3 n2 t0 p# h2 D! r4 S
# `# j3 I% n1 f- L2 }- time.sleep(5)/ e9 i5 ]+ x4 k
2 l$ |( @! L/ I3 m- except Exception as e:( g/ |5 k2 ^8 G; z% `
- ( p( r ]3 }) j+ `5 N
- driver.get("https://youlikehits.com/youtubenew2.php")
3 p( w- t* v: k9 |. \ - % r% D. ^( B- m, b
- print 'error: ' + str(e)
/ E% h: ~6 W6 D( G7 R3 J* ?$ w - + i" r1 O. |& B: X N; n
- driver.quit()* `% C! p1 N, N- |: \& r5 q2 D6 v A% e
复制代码 1 J; o4 X3 c) h
% Y2 A5 P V v7 T
* L' e y1 w% ]; {) |: O
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。 8 \+ s8 u% a+ j* i6 I* p, J
7 A4 {) V- V. i+ h; i1 P
|
评分
-
查看全部评分
|