|
|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑 # F7 j+ s( S. i% b; U) L
5 B4 i K$ w# Y5 G/ C' y
) t' ?3 g/ ]) ]/ C7 l4 I- 5 _% I! B7 |' k8 R& J
- # -*- coding: utf-8 -*-
# s" G( U) T' u& ?; Y3 g1 f/ r
# F b) v- R! M- ^/ Q- Q- from aip import AipOcr
2 ]7 c1 I8 U5 }# _6 k - ; q/ P1 n1 D1 [8 `
- from selenium import webdriver( {) i6 ?/ d; |1 V
- 5 h& O. g, q' L1 Y
- import time( r) M% ^; S: I& ?6 [) `3 ^$ x
8 U+ C P9 T4 u/ P' n- import random
, \4 A; d' C2 ~ - D. n- A: y+ ?- J. b
- import sys,re
" @( w/ {3 W8 S- c4 n& h3 ]; G: z - ; R- s: `5 X0 h. u. \9 O
- from PIL import Image, ImageDraw,ImageFont2 o% Z+ Q$ H- Q" T9 t" s6 D* K% Y; W
4 q `9 r% P( h- { r4 x0 Q. l- """ 你的 APPID AK SK """9 x) i! U( O7 t0 g2 ]' ~
: d, y% K8 z _/ x8 Y- APP_ID = 'xxx'& h6 i j& B0 w. G) }
- 4 l ~7 t# h5 U, l4 g) {
- API_KEY = 'xxx'
% g- Q9 e6 H8 R% c" y - 0 K4 R( Z4 a* T3 y R
- SECRET_KEY = xxx'8 T$ |) z" ^2 ^4 W( _( y" e
# D4 f" J4 ^4 \7 D$ n- client = AipOcr(APP_ID, API_KEY, SECRET_KEY) J7 C7 K. I1 ]& K* M+ E
2 k. q9 t! [) l2 M) B- #PROXY = "127.0.0.1:8118"
7 D2 X% i9 H" T; u* o ^. @; T0 J' _/ | w
1 {) s' w! f' L$ t- chrome_options = webdriver.ChromeOptions()
4 C% U8 p, E0 d9 O9 b7 U
8 e* ]7 ~( f! {- chrome_options.add_argument('--headless')/ X$ W: N( q( m0 s2 Q8 y" J7 ]
5 T8 J+ O) @6 [0 ^, e3 M$ y* z; g- chrome_options.add_argument('--disable-gpu')
- `7 N) |3 h0 ~* ^ - , {1 j3 B$ k+ Y Y3 Q6 f
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)
; d( i' y- Y9 A. r4 ]. N0 w) w) S - $ \0 R) @" F+ Q9 |9 ?- Y1 w
- chrome_options.add_argument("--incognito")% s. v* s0 e: C; P0 c5 e7 a, L
- ! p' }+ J) ?# N
- chrome_options.add_argument('--ignore-certificate-errors')' D! j! O2 P8 m+ j( @8 \( u+ U
- ; P) i9 V/ k( [& b
- # Win/ w- X4 N6 s, A X
9 D+ T6 G) T. v8 v6 r; B- y- # chrome_options.add_argument("--log-level=3")
+ z9 j! L2 W2 @0 f - 0 d1 E" l, _1 ?
- # chrome_options.add_argument("--disable-logging")! h1 V7 j4 S- a3 N
( e7 b# M; c5 x0 G+ C$ j" ~- # chrome_options.add_argument("--disable-logging")6 {& w' I5 o8 J. Z# x) q
- - U5 U3 C# V% w }0 r+ E
- #chrome_options.add_argument('--no-sandbox')
1 i- `) g0 U" D8 h" k; g - - S$ }: [/ `% Y8 l
- """ 读取图片 """4 |2 }: I8 | [6 t
) X% M' q- J2 F7 X( |& s1 e* v- def get_file_content(filePath):% W" I2 P1 r0 s9 [2 V! v
5 d8 A% O1 b O* G6 s" p- with open(filePath, 'rb') as fp:
' R K) t- G! n4 A+ E& `
' j7 i, p0 C. p P; y) e- return fp.read(), X/ f: f9 o' M/ `( \
- - y$ V' q% y: y |+ ^
- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)7 o+ B( O4 X# K) C0 X
( b# }/ Q$ ^' ^% n A# b' g- driver.set_window_size(1280, 727)- g; G( A! p4 M7 ^: D
2 ~' t% f$ |1 l2 A$ U, V- driver.get("https://youlikehits.com/")
% z; ~& U& X3 ]& I# z
0 X& [% f* T# B0 H- time.sleep(5)) H) F" D" V# g8 I5 x6 e
- ' J' n/ Z9 M9 u! w$ Z
- driver.find_element_by_id("username").send_keys('11111') #user* M: s; z* |. }" ~. U
- 3 |0 c6 I1 e( C0 m4 o
- driver.find_element_by_id("password").send_keys('111111') #password' B# x% U7 E6 c9 ?" X
5 R) |0 L. W" I7 [; g" n4 @- driver.find_element_by_xpath("//input[@value='Login']").click()
1 C; Z Y6 g0 a* l* O
" U: m% D8 ?. _$ d- driver.get("https://youlikehits.com/youtubenew2.php")
/ \! V/ a9 G: H
. K/ u& W( r! h$ j+ T7 m" n- time.sleep(1)5 _. p* O9 ~$ f+ a. F
- E. P0 t% L F' H- #Try Again* Q/ \( g! L" Q5 @- ~
- 9 u3 L7 R$ X9 {9 a. L
- def checkRefresh(driver):
7 \8 n( I+ D9 S0 Q" I
: Y( g: G/ t, @5 } z" R9 s- try:) l* P/ w8 J5 z8 c/ \& L' c8 `* [
& l. p6 \* t8 f. L7 Y& W b' w- Refresh = driver.find_element_by_id('loadmore')
0 N& A5 U/ Y7 }1 N - 5 m0 J* T& k! |; ?, G/ h, H
- Refresh.click()
6 }! E# t$ A' U, @! b
0 M; Q1 o n$ p; [- driver.set_window_size(1280, 727)
- r( Q4 Z6 j: F/ V. u, B - 7 c0 [( |. t& Y. V! b1 W# l
- except Exception as e:; }% Q3 k# ^4 Y# ?# `
8 ^7 I. e- [' ]# z x9 J- pass
. e7 u0 \$ }4 J, V
- j5 D5 j' g# M5 }$ t, i" z; k: i- def checkcaptcha(driver):0 ~" G9 Y. m6 U, i
6 G% a" |# _6 _0 @' ?. X0 C- try:
/ P" ~% l$ s9 D$ [8 n I. y! G. C - 7 v% ^; g8 T; V
- captcha = driver.find_element_by_id('captcha')+ j5 [3 p ^. `% C
- 1 o1 h" Q; T" N; t' k, t& R
- print driver.get_window_size()
% ^: ]5 _) Y3 H$ e- \. E, D
: J5 e2 L( o G+ F3 z/ V6 j/ `- time.sleep(2)
g0 T. H: Q7 o8 P) Z2 L) r* F
: E, c& R2 `; }; K- driver.save_screenshot('/tmp/screenshot.png')
% h1 a7 [! t1 \% x - 5 I6 J! r! m0 u8 T x2 Z
- im = Image.open('/tmp/screenshot.png')4 d3 L- ], H: L3 ^; ]& ?1 t% l4 C
- 6 l, t* i' |+ D; }; x5 S" T* G
- #取消headless模式
; Z+ {* L% ]9 E% t
8 R$ C* E) O6 v. B/ c8 j: P- #a = im.resize((1269, 610),Image.ANTIALIAS)* f: M5 \( f" R1 c. ^' A f& k
- ) r* S* S8 x7 b) C* X+ r% o! u
-
7 I8 i: K4 c; r' u - , X; N) d1 |" Q
- #开启headless模式0 Q, z. C# k# m* y1 C% Q3 I; W. C, Y
; \& Z3 s4 u, P7 {% p- a = im.resize((1269, 727),Image.ANTIALIAS)
; K4 W5 }5 \ F- S- z) A - " j+ B) c$ q1 l5 ?4 i( m0 M
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
% \& A5 c" g W. i" k! j* ^ - / t9 [8 z/ y! D: I. z! B
- left = element.location['x']5 [6 \+ v3 S: e& U/ I3 w5 Y
- 0 [' G0 s1 H4 P1 e) v# v
- top = element.location['y']
; q! m( u9 k4 a5 J$ x - ; \. U/ { X* \8 ?2 J
- right = element.location['x'] + element.size['width']
" h- @) q4 z) i
6 k( l$ E. v5 j0 E# {- bottom = element.location['y'] + element.size['height']
" I) F* \4 s# G. h
4 `$ A% R* s8 z- & W, H7 Q8 v& w1 n: h% z$ F0 |
8 a& j: z( L$ d$ {- j" j- #element = driver.find_element_by_xpath("//div['#captcha']//img")9 M* r. G& q9 P
- " C3 j# i, X7 {% N. H" D. d
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")# q9 R) E$ R0 ^/ d. N7 [! B
- 8 w6 A B& F% N6 W. l* `, B9 H
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")( F. r9 f7 T8 H% U) X% F# U- a
1 G! d5 S' k, E# A5 s2 I- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')
1 l2 {6 \# M, S8 H1 I6 `4 G: i
* w0 e5 Q& S$ [ e7 d- image = get_file_content('/tmp/screenshot1.png')
4 Y J o) u5 ` - # x6 q) r5 u: ]4 O
- a = client.basicGeneral(image)" C6 ?1 O% P# }5 s4 i$ x+ g
- 1 A' B1 M, Z$ @+ p) L6 K5 J
- print a
' z) U! W# z" l2 w: N i5 t - " D" O. I' h* w! _! l
- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])
{- X: j s* V! `3 z e) ] w
8 R; F6 q+ e) s' v- yzm = re.sub(r'\xf7',r'/',yzm)- m. b- f3 Y i7 k/ i
" O0 l, {/ t8 B1 y' r- yzm = re.sub(r'x',r'*',yzm)# {) `( Z1 t- \4 [2 y
- : F& z3 y, Y2 R, s9 v! ]- D1 m+ X
- yzm = re.sub(r'X',r'*',yzm). P3 Y0 A3 r1 _3 E
4 E/ a8 p4 y6 W5 T% h. S% h- yzm = eval(yzm)8 j+ P4 P9 ^ b. R4 k/ u# a, d
- ; A( \. Y4 W3 C
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))! g" l! I% j9 E% l
- ' C& X. j! V0 B, }9 p7 l
- print '验证码: ' + str(yzm)
, H7 Z9 h9 Z% V: A) T - ; F9 J7 A2 K9 e" c) R+ ~- u. U$ E
- driver.find_element_by_name('answer').send_keys(yzm)! ~1 R0 g. C0 C0 o, ?
- - Q% t0 ]3 h# z# f
- driver.find_element_by_name('submit').click() K: ~6 l- h; J/ U/ N) e
U1 X* F4 n, T" ~) c- time.sleep(3) g3 B. r% l/ I, d( W2 ?
. q. l, t/ B7 U* M! l! W$ l2 z& M9 n- return 1' m: J1 u. @2 e" C" J9 f
v. W- Y0 O5 Q( t, M) n/ a9 D- except Exception as e:( `# B6 ]5 o" s8 @; O# e. x
- ! G- R1 J( i' W) y
- return 0
* y& X# @3 L; l. E7 R - 0 h( _" S$ L% a2 y. t( S% p) `
- def followbutton(driver):5 D7 V. o r0 O0 b$ C2 i8 w, M
4 W+ k/ K: y, a& g# a9 |1 z- try:
" ]8 C- ~! Q; ]! n7 O# U - + v; N" N- }* o5 H% Z. W7 G+ Q7 w
- driver.switch_to_window(driver.window_handles[0])8 B! E9 Y! z& B6 l: S
- 4 J4 J# y9 K2 w8 U$ h" x
- points = driver.find_element_by_id("currentpoints").text
$ p: Z9 r9 u' | R$ c - ! Z! h9 @) F6 v* T, o
- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()
. z6 }0 t( w3 |8 n
; P3 _$ S1 h0 \% e' y( k- driver.switch_to_window(driver.window_handles[1])
6 T! V5 Y, X: x( @8 Y( a
9 D `: V l5 r# j- VideoSource = ''.join(driver.page_source.split())
" Y8 a- V0 D- a5 ]) v
& E, t( {& |4 C. U! S- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
- b( m" Q3 ~$ b+ S' x - 5 q$ g3 t: K1 w# ~/ R( T" k2 k3 d
- print VideoSource- g9 F9 z7 |9 l9 u
: j8 w* f- A$ G! ]+ s4 ~* R+ k- driver.switch_to_window(driver.window_handles[0])
8 \/ }; D$ O4 ]7 S- |- b - 6 H5 l: C) B N- n& u
- print "Refresh..."
1 ^" A# i. u' ?+ z7 b; p- @ - ; `$ P, n; j- j
- driver.get("https://youlikehits.com/youtubenew2.php")2 c6 _: i5 ?/ X( Z
- 7 o8 a5 d/ Z$ G; K1 x% X# f; t) p
- time.sleep(1) A9 P/ s' F" G% V
* Z9 U, ~; c7 w- return points9 b6 u$ j2 o. B M6 m9 x6 i9 l
- 8 O' O8 B" K M) L( W5 E
- except Exception as e:
7 ?+ t9 d9 ~6 I( P - # g8 d9 W# k( ?# k7 \! d, Z
- return 0
0 b# @ U% v; l$ C/ ^6 Z - 4 I$ }% [: z' H" [ }4 s& m: K
- for i in range(0,5000):9 o; v/ }" k6 g$ S7 a
6 n2 A: D$ z6 K: p& y( S0 F- try:
6 v9 ^7 H5 u2 N, Z - + f( c, A1 L/ g
- captcha = checkcaptcha(driver)5 ^1 I k o2 M+ F' a& z* a
9 [. ?+ h) n4 ]; V2 R- time.sleep(1)# R! I/ U" |' l- H
, G# k9 W; [( b& E1 F; D/ G- checkRefresh(driver); d' G, c) |0 Q: y \# |
- 3 M% h9 P7 _5 S
- points = followbutton(driver)- d7 m3 x" H8 E Q* y: `
- ; H( j8 D4 \' {; w6 b! |
- time.sleep(65)0 w# ^: B! F$ i& A; C& s3 B Z) j
. ~' W; X1 D3 X- G1 k$ Q- driver.switch_to_window(driver.window_handles[0])6 S( B% ]) N u/ R
* M! l" |( c- f- tmpp = driver.find_element_by_id("currentpoints").text
! y( g* p) ^* f! F3 ^7 J/ f/ S+ @' w
; H, T, c- Q2 N$ n- print "points: " + str(tmpp)
- I+ u7 y6 }/ U8 Z+ _ - & H ~5 Y# Y6 u
- if points == tmpp:
0 M! ^6 C/ I6 ?, N - , s: m" W) z c( d
- print "Refresh..."+ D3 S3 D) p( k
- P& |4 _" Z* w; G
- driver.get("https://youlikehits.com/youtubenew2.php")6 ]7 u7 `( Q( o) r
- ?% [; f9 n) U
- time.sleep(5)
6 P, {7 ] ^: n7 Y. n - 1 M" V# o$ A5 l9 C
- except Exception as e:) _) N% [3 j$ e' U8 L; W }
% D# A- R' }! b4 Z8 v" G- driver.get("https://youlikehits.com/youtubenew2.php")
7 O9 l& t1 ^0 R; \0 C5 _1 \4 i+ ~) A - 6 \' ^8 T8 K5 |# Y( C
- print 'error: ' + str(e)) D- [3 P3 F( l. p9 p" O
6 _, W0 X' F& U4 r2 ]5 b/ q8 b- driver.quit()
* U0 q. e# G' A+ b9 Y9 @
复制代码
' \. v3 y: q2 `0 e5 D0 V/ j; f( A9 x) C+ b" P4 V
+ B% R3 }( |1 \8 I7 ^# P
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。
1 Q9 s S0 r- c$ b! X
, u' V) z6 W5 a8 v" V |
评分
-
查看全部评分
|