|
|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑
4 J) S" J9 a# C* X
5 D# w# d6 o- q, }% n: j9 t3 s: m0 X3 u
- ) L( b" [4 U, v4 Y& A, F
- # -*- coding: utf-8 -*-$ a. e' r0 v3 q' [! a/ S2 m/ J
/ K k6 y1 {! B- from aip import AipOcr4 ]9 A, a) L7 K# F6 i6 ?
$ [* f; U H3 a- from selenium import webdriver
- o( U! s T4 l6 T6 @6 J9 r5 c, o - : r. P8 o8 o. e
- import time
& U5 ]% L$ F; \8 q' b
: k' P- g" y2 k: P! G) x2 J! t- import random. u, v7 f* y/ O- L, v
- 8 X% [# Q) T7 U
- import sys,re
: g+ z' ]" i9 ?, S7 t
% f& R: { r' K! n, j8 V- from PIL import Image, ImageDraw,ImageFont
! z0 d5 d! U. Y/ T
- z: X$ ?- E, G4 t# W, x- """ 你的 APPID AK SK """( O1 ?% s3 d& X! X2 b/ l
( b) n4 Z w5 M9 p }- |- APP_ID = 'xxx'4 Q6 J* m6 _5 E9 H1 l4 z
9 c5 p( _8 H; u$ z) f( c3 H9 \- API_KEY = 'xxx'# g* _# H- d% b
- 9 r n( J& N1 _: o1 Z
- SECRET_KEY = xxx'8 _: s' K# p! v( p+ e* l! \
- # |7 C0 q4 B* C, u" S
- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)2 x+ M; U9 H8 q, C( A/ {
: G( W8 T7 B8 S! A; N. x+ [- #PROXY = "127.0.0.1:8118"
. C- i, S5 ?/ I
" K5 i( T1 X7 R: v3 H: c0 s- chrome_options = webdriver.ChromeOptions()! P/ o! {" z3 [; S A. X3 i+ ~1 q
- W, H. u7 d9 t- chrome_options.add_argument('--headless'). P& I6 ?' i$ r- N* ^$ L
- I0 h$ J" o1 ~- chrome_options.add_argument('--disable-gpu')
+ }) _! L. M7 l$ P2 K! \; }" z
+ u, o( G: O6 n3 L/ n- #chrome_options.add_argument('--proxy-server=%s' % PROXY)
' I- s4 B" s9 M" j( |0 C+ \8 } - : K- { Y: ~' W7 r. k* E6 ?
- chrome_options.add_argument("--incognito")
1 x$ V5 I3 K4 t2 T, f: f - : q x5 [0 D) g! Y# `1 f0 X
- chrome_options.add_argument('--ignore-certificate-errors')
6 W! U& [* F' c& H, Q# V - . q7 {+ ?/ R9 o4 [* |
- # Win
# I3 C- ^; F) Q- K5 x2 I g0 E
8 Z' H( C, v; \' k5 P2 q+ _- # chrome_options.add_argument("--log-level=3")8 X' b) K( b! F+ i. i( e
- & {* E: M( h' g& @ v2 k- H" N
- # chrome_options.add_argument("--disable-logging")4 _0 ?& L" o- ^) C
- / A1 W5 F3 \4 {
- # chrome_options.add_argument("--disable-logging")1 ^; h9 _& o( l1 m6 B
1 @$ @( X- ]3 ?& W3 |- #chrome_options.add_argument('--no-sandbox')
) \5 f- C* u K# ~, Y3 f) V - , G# B5 m6 X7 v4 W g* \
- """ 读取图片 """* Q' ^# M8 S$ v o7 x% d
- , w: \8 @7 L h( b) C g' u2 d
- def get_file_content(filePath):7 e: ] ^8 O* O; h7 S) P2 j$ `
- ' ^5 K$ O( P3 @; F$ R+ L
- with open(filePath, 'rb') as fp:; K- X1 F! X+ T- E
- , G/ [. L3 X" `9 ^6 R1 B
- return fp.read()$ k$ Y6 S, F9 Z: a C
- % w$ I# b$ V9 m) ]) p1 U
- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
1 @0 [' _* U" A! i - * X1 e4 z9 k8 ]
- driver.set_window_size(1280, 727)
* d1 k$ K' h* r( l; z3 F - 0 {9 _& H2 \4 y' z9 M8 g/ z5 h
- driver.get("https://youlikehits.com/")
/ L6 B d1 y0 V9 {. Z - 4 X2 {3 f. W& B0 f; ?- o9 C, l. t
- time.sleep(5)4 G7 U$ X1 k+ Y5 k
0 s' Z$ p7 F0 N1 w/ K5 G y: V- driver.find_element_by_id("username").send_keys('11111') #user8 o [% d1 @% J- ]
# E# Y9 J L3 m/ ]: `5 O. I- driver.find_element_by_id("password").send_keys('111111') #password
2 }% k# d0 l! o/ H" R- [ - # c' T, ~+ `$ n. G) M
- driver.find_element_by_xpath("//input[@value='Login']").click()0 J& E- J& [7 A
- 0 L: Z9 D# t& X+ Z
- driver.get("https://youlikehits.com/youtubenew2.php")
- E' R8 ]$ h2 l% U9 J8 K - $ X' W' v" [) Q/ l( N
- time.sleep(1)
8 W9 \* b) F; y. i4 `" W# w7 U* @
0 C9 \7 o" c2 l% x) j' m) U- #Try Again
6 A: D7 M U" J - $ o6 Y* i6 Q4 x% J4 v
- def checkRefresh(driver):* \- \1 T$ [/ p
$ F, b0 L8 H6 |; Z+ h& T- try:+ e% D/ ~5 h1 R& I
* F& p5 V3 F9 B& g# s- Refresh = driver.find_element_by_id('loadmore')* q- d1 D4 K% z$ z
- " s! \, R$ I k, z2 G9 U7 W
- Refresh.click()* q$ D: F% e4 J" Q
+ }3 D9 E8 R+ j3 C/ a2 m" U$ t- driver.set_window_size(1280, 727)* L: f! G* H: D" ?* `' m
+ `8 W2 _( D( |; m0 ]- except Exception as e:/ u3 L0 c: U+ N/ {" {5 o
4 q; ]4 s# V8 _' B- pass
( q- w1 h$ E1 @; ]
3 F! L- L8 t2 H& ]- ?! c: E2 r- def checkcaptcha(driver):& d% q4 ]$ k2 t3 l
( X7 ~) b1 ]. v% \9 N% b3 `. P- try:/ R+ W S5 L, f- M
! T5 {6 k7 }" Z8 V- captcha = driver.find_element_by_id('captcha'), x1 H! \# G$ y: O$ |% T4 Q! g& S
- ' ~( Q/ {1 j; T4 X& |1 }7 p
- print driver.get_window_size()7 n& b x3 K+ @+ K1 T
$ @$ e2 l$ l9 x4 T7 a4 k- time.sleep(2)
" A$ _/ T0 I+ g/ f5 M+ E/ k - 8 \4 n( L- D5 ~# z: z1 ]+ k: l
- driver.save_screenshot('/tmp/screenshot.png')
9 l- |9 h1 ~0 h+ E - - i$ {2 D6 m# d) }8 U
- im = Image.open('/tmp/screenshot.png')
4 C3 Y" T2 ^. ] - . r+ U1 H: T8 g* X& ], @( C
- #取消headless模式, X6 W9 Y& y+ M
2 Q8 y M1 N; a5 s- #a = im.resize((1269, 610),Image.ANTIALIAS)
1 a$ ?* l9 v8 H6 K
F: o& Y( F$ k0 C, m1 e-
I! s. j( f( U/ H) b3 A+ o, \ - * R+ M: Z% D! A! F2 i6 V+ A) @) y
- #开启headless模式
. j9 c" ~+ l" m3 p8 U
/ W) r! N# j% D$ v- a = im.resize((1269, 727),Image.ANTIALIAS), c" Z$ R4 w7 l9 l- f( x$ c
- + G# b/ b' z2 N
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
_+ }* t# S& g. _, _' o - 3 U- W/ h ?" z3 o4 o& ]
- left = element.location['x'] H, _$ M& t% L
' u, k0 M# @( D* G- top = element.location['y']6 ~" {- T6 x) o) p7 ?
- 3 n8 B5 @, a y; G- x9 Z
- right = element.location['x'] + element.size['width']
+ n, Q& s8 o! @' ?7 |6 _ - 5 S! G4 j: q5 [
- bottom = element.location['y'] + element.size['height']9 r+ d6 `3 v$ M2 L5 B
7 f: z8 E9 m( v& ]6 t9 ^: c-
" U- n$ W" c8 u2 K" H# ?& B" M - # e8 C% \) r2 u! d& _- s' i* Z6 {% f
- #element = driver.find_element_by_xpath("//div['#captcha']//img") ]% R; m2 G6 i- q0 \
# L2 V% k, }/ Y. q( U& Z. H- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")# O9 R2 r+ a$ z# X4 K& c
- * a2 `: u0 _) }+ Z3 X
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
; R8 f! x. M" R9 s9 f - : x, T7 r% e$ x% J( v3 B# N
- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')
+ a7 M3 Q% f3 {. j - 6 f9 Z' G1 j. o& A, t, b) s G# G
- image = get_file_content('/tmp/screenshot1.png')
$ B- F( ]" Q) Z' l/ t8 V* g6 L
6 a( y+ N8 ~2 d" u) N; q2 \; M( I6 T* k6 E- a = client.basicGeneral(image)
; b; U+ c' n S r7 g/ X. r5 L - : M3 Q2 n" b$ t$ {6 X- E
- print a1 J9 N1 e, N* h; u7 n$ [2 T
+ k$ J3 a. D0 {3 K( Y- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])" e7 z: S% _: Y. q6 G
& |6 E; ?3 k6 {- yzm = re.sub(r'\xf7',r'/',yzm)
3 \* f* x/ W: j4 R; c) ^ - ) d4 l/ i: C7 c3 _- ~
- yzm = re.sub(r'x',r'*',yzm)5 c* N* r* Y" v& H" T1 R
- ' E5 @& N" L& L% d9 l
- yzm = re.sub(r'X',r'*',yzm)# k8 u, T* g e; p( C
, p! d: I5 N: G9 X0 k- yzm = eval(yzm)8 o, M% z0 a) Y1 \5 z/ Z$ v4 T
- 5 m( w" g" N: \! a) U5 l
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))# ]. T. ~) Q0 G1 l% \/ `
- 4 p3 [+ b' g: X; J ^" L
- print '验证码: ' + str(yzm)4 ?1 q. F& ?4 ?0 C1 Y: m3 N6 }
- * W9 Q) |2 T3 ]& w7 I f& _
- driver.find_element_by_name('answer').send_keys(yzm)
6 M. C- f4 n8 A5 J; Y' k2 R( N - 9 v" _6 |' d _7 g
- driver.find_element_by_name('submit').click()" S5 n& @) I) R+ l: P& v4 I
( l1 G$ j( n' b6 P- time.sleep(3)
( f0 K% E& o) p2 e% z) O: @ - $ v0 `% E& j% K3 J9 D% a
- return 14 \) h) x0 k9 t* s& m6 `) F- F0 E
- ( @2 n2 ?. {0 ?7 v. I: L
- except Exception as e:
/ z+ {3 I, R* W" R* c; Y - 2 M0 i7 k3 j: O6 N& q2 ~; o7 V
- return 0" a6 f/ v1 A+ U' A( c1 o5 P6 Q( ] l
8 x7 G0 W7 P; d+ W. m9 I- def followbutton(driver):, E$ [& _! W( }) k. H% z& u
- 0 {2 A$ z5 A/ P/ T
- try:
$ l( N- x2 d9 W- f5 r2 D
^, H% o7 O/ n2 F2 e3 W- driver.switch_to_window(driver.window_handles[0])
6 P k3 {% G2 A% e8 w+ @3 W+ ~
$ I* _$ r( Z: ]) _& n6 l4 b- points = driver.find_element_by_id("currentpoints").text1 u A* g) }2 Z, @2 ^' u
- ' C e3 n" D ?6 x/ v+ t
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()) F( w* G4 y1 B% T6 i% v
- 0 X1 h8 E7 _5 y7 c4 {; |
- driver.switch_to_window(driver.window_handles[1])
+ r. k* m7 b/ r* Q) P/ ?" k7 z
' K: _: G+ S7 @ F' J- VideoSource = ''.join(driver.page_source.split()); @2 g1 M' D) ?, Y% p8 }
- ; X' w3 J2 S- r
- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:# M& r- P4 y4 E
' g; e/ R# l4 Z# ]( ]" U( p- print VideoSource- P0 Q3 ], H; p) [& Q1 U! O
" w8 F( ?1 }% N/ s% |" ~5 ~- driver.switch_to_window(driver.window_handles[0])$ }; w5 `2 q; I
6 ]. i, Y1 T6 s0 T, Q8 ?1 Q- print "Refresh..."4 Q6 L( H0 }* A g+ \
5 U1 t1 L7 g: V4 e5 B- driver.get("https://youlikehits.com/youtubenew2.php")
; _7 D: r' g; U2 a |2 i' A& S - & j& M5 ?" }3 _/ X
- time.sleep(1)
. S3 r+ Y. p8 F
2 z/ j: |, R9 y' R/ i4 F& Z- return points
3 G, M" {2 Q: Z
3 p$ u7 a2 { g! @4 _- except Exception as e:
* `3 S& `* C+ b) a( t
# k" `$ u) S4 Z# V t+ Y' {- return 03 Z# J }8 n) G' p; i c) S+ n
" J5 }/ Z z' Q- for i in range(0,5000):
7 K6 ~) T( ], C: G \5 [- k/ t
* S5 n$ ?9 ]$ Y" z: P3 I- try:! |- A8 X* x% c1 l7 M
- ) G; L8 S7 n3 m5 g2 i0 W
- captcha = checkcaptcha(driver)! u7 e4 r. v2 H- {. R- l
- * J) a7 H- ]% S2 I' h- ^8 h
- time.sleep(1); h0 q2 Y9 U+ g
0 F) t p r2 @3 |# l2 A; E$ Y/ B2 ]- checkRefresh(driver)
% q2 d r: \* W" e- w; [ - 5 Q4 X3 k; k, |9 _3 ^
- points = followbutton(driver)/ d9 T. p6 |/ Y/ s
9 {1 O1 e# c' ^- time.sleep(65)' h, W/ q, u# j+ Z9 P& s9 [. l
5 K) z8 e% k# B( I, a) x- driver.switch_to_window(driver.window_handles[0])- i d) A& Z2 ^/ N
- & c# [" b8 T/ x* |! A3 w
- tmpp = driver.find_element_by_id("currentpoints").text' f9 R/ `1 j1 p+ l, g" ^0 }( K
- 0 h5 P/ b( X! V7 F( V' L& }0 B
- print "points: " + str(tmpp)
! @& Y. e* C ], p' y - * i1 Z5 E. k" a$ u k9 Z# F, x
- if points == tmpp:
" z, ]2 i) F9 G7 f' G* @2 W+ e+ i
7 Y. N" k: j% H* s- print "Refresh..."# I+ t2 k) r* [) W3 ^
9 t3 T, Y3 k) Y/ X- driver.get("https://youlikehits.com/youtubenew2.php")
; X/ |# A+ _" W' ~& N; K
6 w( f0 i5 U, F! b& e7 p5 e _- time.sleep(5)
5 Q5 `4 s1 [ a0 v- H$ w - 3 f# K( U M H4 N- e
- except Exception as e:; G" S% W* ]3 N+ s
- 9 h6 o5 i5 a! W, k* n" M' n
- driver.get("https://youlikehits.com/youtubenew2.php")6 \) Z. j. N. [/ z/ e5 T& o' P
- ' n8 p+ x1 _- _$ M2 E& N- p
- print 'error: ' + str(e)* A& E# G `6 [9 X- {' c% Y
- ( Y( j4 o. q9 J
- driver.quit()) I3 r" |7 m* r( c1 j& |5 r
复制代码
( G. R2 N1 Y, b1 N3 h
! l/ \% n- h; C& J! R; j$ ]6 D+ g, k9 j, s
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。
- E; d4 \) ~, r% r/ t' I7 z
- X" U8 e8 U% H* q8 |; u |
评分
-
查看全部评分
|