|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑 , P0 t' d- c1 S
/ ]! ?$ y* |/ @( Q9 d
( o4 D5 G; ?8 C5 H+ w: F/ f- ; c j R5 Q7 P- t; H
- # -*- coding: utf-8 -*-/ s m9 R- o h' c* L! o) S# H
, D9 P; S$ C/ x B) G! f" r- from aip import AipOcr" U/ t: p. ?3 }. i' i# b' l& ^
$ P' `! A0 I0 w& A) f" }# Q- from selenium import webdriver
$ t- @0 k' v' H5 }, v+ Y3 B$ [ - ; s+ j: T( W m8 o
- import time& ?9 V, [: r9 _8 ]1 N; j4 T
- 4 q* q# q4 K1 a5 F
- import random# C3 z* G' _7 L( s* Z1 ~
2 s8 L& N5 B# a% S7 s* R- import sys,re
. r, n8 j. b" R; b - 2 }! z" c% E5 G; J8 e- h" L( E+ i
- from PIL import Image, ImageDraw,ImageFont- o# t; U$ ]) D4 Y; x
( l! l& O) f7 P7 l$ Z$ I- """ 你的 APPID AK SK """, N0 @- `# ?9 K& V
- 3 j6 `* I: d) G& i3 ^7 E$ M
- APP_ID = 'xxx'
; O8 Z4 k) X8 W D8 `) S: n
2 d& R6 S0 D$ n0 z. {+ j* \- API_KEY = 'xxx'" [( |+ E- q% d' F' C# @
- R; m+ B) U- E- SECRET_KEY = xxx'
) {( B) `) U( M% b- i6 {& \ - , T* f( f2 {; G! v: |
- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)
* f {8 Z" w2 P& _) W( s9 F
5 _& E- u" g7 ?5 g3 K- #PROXY = "127.0.0.1:8118"4 _$ E5 @, E/ u: {$ q; ]$ z
- 1 M: u5 N7 M( K' B; d, ]' g9 H
- chrome_options = webdriver.ChromeOptions()
* B5 T+ k L& g& h# K9 ?0 R - , o: b$ U4 i$ ]. }
- chrome_options.add_argument('--headless')8 C; I3 k6 Z H2 G# j( B1 w# l
' y" c) p6 O/ T3 T& a/ y( w/ \- chrome_options.add_argument('--disable-gpu')
/ ]' Q! M6 R9 x* \" W6 X( l( x
) J( D$ l! J1 x3 v- #chrome_options.add_argument('--proxy-server=%s' % PROXY)8 S+ M( ~6 r0 B4 r& @
% d4 W1 ]0 h+ y# G5 \- I B7 q- chrome_options.add_argument("--incognito")
9 Z' B( [: ]& g9 W" y - % m* u$ m2 B$ _) S
- chrome_options.add_argument('--ignore-certificate-errors')
# Y- ]% N1 M4 @" e. [* }
; x* Q( K5 Y& Y6 w& N/ O- # Win
0 @" |) R: h* _! l3 P
5 j; S* X- m$ y, Z9 Y) @- # chrome_options.add_argument("--log-level=3")" c/ v6 o% U& P8 d7 Y. a$ E
- ; Y, A' V8 S( W. c E2 |
- # chrome_options.add_argument("--disable-logging")
$ y! y8 T3 i5 n
# `* ]4 c- L2 I/ i, }6 L3 B- # chrome_options.add_argument("--disable-logging")
8 d4 b2 y, v3 h3 E2 y - 9 B, N$ Z* G; b& }; n ~
- #chrome_options.add_argument('--no-sandbox')4 p, P5 Z# ^8 B! |- O/ s. P
. b" Y1 Z) y6 r1 r4 r( D- """ 读取图片 """" Y4 T4 t' z+ N6 h2 m: H" f
- 6 f% U# v" X2 W. I: {0 J x$ b0 @2 [
- def get_file_content(filePath):$ }2 `. @# }# A; W
- ! T' C6 ?/ | r3 F
- with open(filePath, 'rb') as fp:
2 q. S* s( R/ R" o I( t' V: F
' ~: e- h- I: ^# w# j0 z- return fp.read()
+ {2 ?0 I" R/ X( l( m! ^
8 o4 F8 Q j% C6 k- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
( m* S. ?# r1 y3 a6 h2 c7 f. L - $ {7 j* d6 R2 ~
- driver.set_window_size(1280, 727)+ Q2 {7 }2 ^# M' U' Q: X2 p; k2 |
! `9 g! r$ D$ v! ^- driver.get("https://youlikehits.com/")
4 ~' P0 P: ]2 s5 ~0 | - 3 S% n, b/ a5 ?: h* x4 [3 ?
- time.sleep(5)6 p6 o. ]3 }# k! |( F( l- M$ p- }( b) W
- 2 q) O' l; a. W
- driver.find_element_by_id("username").send_keys('11111') #user
: D) E1 i: B0 D& a4 x0 u: O
$ r" v) `3 E# o; S5 e: \- driver.find_element_by_id("password").send_keys('111111') #password
/ r+ i% B! A1 H5 [+ g9 `# Y, |$ _ g
8 x, F& v8 r9 M+ \: h3 H- driver.find_element_by_xpath("//input[@value='Login']").click()
5 L+ v9 X7 N+ e9 _9 F0 M: r
3 d6 {/ \2 W- [- driver.get("https://youlikehits.com/youtubenew2.php")
& \ u$ Z2 C6 T3 X- F - $ |' R+ d0 K$ |. G" C$ k" e
- time.sleep(1)
# i, ~! J) n7 d - * ^% Q( l# {7 |8 X! s' c
- #Try Again; N P- p/ B* Q. c# w
- % S4 D3 O9 l: ]3 G" q
- def checkRefresh(driver):
2 T# a2 ~2 s5 n9 F
7 e" d/ J7 S/ ?5 J- try:+ A& R0 Y( K- i F: m: o
0 r8 ~. A9 }) Z& n- Refresh = driver.find_element_by_id('loadmore')
% \% i( y! ^, w; d6 c" J - 5 j1 G6 ]" X. F% y- {) ?, ]
- Refresh.click()% n$ @8 P7 A1 c1 X7 u8 I7 x; G3 l1 h
6 o; y. F* P/ l, v1 c; T- driver.set_window_size(1280, 727)
0 F; e2 ], r# y6 ~: [: V2 U
3 A Q5 T7 o5 A3 k o. h! C- except Exception as e:
: H5 ^- r3 P) Y/ P' p% Q$ x6 R
4 h) T7 T8 z- J0 P3 o" K- pass# L8 ]- l0 G5 w* p- _8 p9 k& P
- ) o8 P/ V# m8 b6 L8 `" H/ `/ g5 G) n; t
- def checkcaptcha(driver):
6 D' Q( n5 Q+ b7 s3 @- D
3 F6 _% R$ ]6 E& g/ [+ o- try:
* G; Z8 |5 i0 S- C, l( X - - g4 J$ ]# t/ Y K" @! F" @
- captcha = driver.find_element_by_id('captcha')
$ S4 B4 d5 O* @& X - _, T- e+ R4 f( i1 _8 z/ {$ [
- print driver.get_window_size()+ ^* i5 A* A/ @/ T( R
2 E9 p2 C- g; g1 e" [- time.sleep(2)
8 m, H5 v9 h3 X0 V( @% L
" }/ ~- ]1 \9 f+ p2 Q( m; |) \, t- driver.save_screenshot('/tmp/screenshot.png')8 I1 _" n* n' s) U
- 2 x$ x6 q0 l8 f0 p+ {
- im = Image.open('/tmp/screenshot.png')
* X8 X5 y K2 E) O% {! z! N" A7 ^ - / u" u0 n6 u v
- #取消headless模式
+ I. k6 X2 R {" L9 P. ?" n+ R
: x) d3 I" ^2 o2 H/ u; s- #a = im.resize((1269, 610),Image.ANTIALIAS). i1 F8 q- W! D' Z# ^6 [8 Z- ^
1 V! ?3 y) T: l-
" H Y) f4 r! V; S
' \) |3 Y! x7 w; z, E. ]7 j- #开启headless模式
" j E: @, A/ ?/ v
4 T8 O& [. b8 y9 f. a- a = im.resize((1269, 727),Image.ANTIALIAS)5 U/ N/ i" Y" b u
9 L- W8 c. e( d( \- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
! J" q+ N/ i9 c$ b' O& l; J - 2 O8 X) D6 f# @9 D# ?6 ?0 {
- left = element.location['x']
+ R9 }. s S9 }6 W
' p/ M3 l7 x/ M- top = element.location['y'], G2 X: `5 M8 [8 D6 y- F
' E0 a$ U y; ~4 c. ]& X$ f- right = element.location['x'] + element.size['width']5 c8 |1 f! \& d% G t
+ |' y V' j& ]% i0 Z" s* p q4 l- bottom = element.location['y'] + element.size['height']
; j0 n7 g; L# |- V# ^( Q# Q+ ] - * B B# _( F0 a! P- c% F6 }. D$ k$ `
- 1 @6 n3 |1 S2 c2 f$ l; e p
( Y2 A1 i1 W9 {" r/ m' }- #element = driver.find_element_by_xpath("//div['#captcha']//img")
* K& t! P" ~' f# ~* e: A1 y - 7 g1 y' D# |. Q# Y9 W: \. D2 A
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")
' P/ p5 A# d. w |9 {& g
! a0 I# c$ {( w, h" u1 n/ N, U9 L- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")# ], P8 Z' U6 B; C& L, E
2 w s" u4 ]) G! q5 a: ^; e- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')* _* X C' }8 T, N9 U. I6 z! i
- {6 w+ H. H" N/ v- D. Q# e* G7 t
- image = get_file_content('/tmp/screenshot1.png')
, I& [) w5 g/ N* }; V
$ W% ^) `! B; k, u, A8 n- a = client.basicGeneral(image)/ K7 W4 H! Y5 C1 R& n* L2 ~4 y
- 8 ~- |9 G* }% P) F8 N% ^( f P
- print a
1 s& w9 U) u+ q* P7 k2 v
4 X- M1 W& Y; r! u! N9 q- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])2 o1 ~; M7 |3 x. \, T( o* \; r
$ h$ a1 a' d+ L+ c* t6 ^2 {! H- yzm = re.sub(r'\xf7',r'/',yzm)( X" w0 r- K( R; t: F/ n! i/ w2 \
( A) L# K' S4 z$ U; M3 e- yzm = re.sub(r'x',r'*',yzm); g3 l! H+ C& k( G& ?( ?' W7 U
1 t2 Y, b# h8 K; ]% y5 M& r- yzm = re.sub(r'X',r'*',yzm)
, e( p( G f4 R) m+ i
& O. b( ^% f$ ?5 x- yzm = eval(yzm)2 Z9 m: w# v, \/ [* g: d% J
' v0 }, A8 U4 ]0 q. `% `- X- #yzm = eval(yzm.replace('x','*').replace('÷','/'))
r& R, A d4 R3 E! N
% ^0 g! V) Y, B* n8 w- print '验证码: ' + str(yzm)
9 @# Q" {3 C+ {$ u! D0 T! k% y7 Z+ Q
0 R Y+ D i3 g; m0 x- driver.find_element_by_name('answer').send_keys(yzm); g4 L+ l3 y* v0 ~# x w
# h$ g" A; {' E+ J- driver.find_element_by_name('submit').click()9 @ ^/ z' X2 z) B2 E1 a1 W4 j! C; S2 z
t: ^/ `. ?: w* E9 U- time.sleep(3)# f5 x p9 }% @+ l
- ; t) H5 m% V6 I" V; s
- return 1) H, t/ A; d- ?) ?
5 O. n0 _1 V, o$ ]* A- except Exception as e:
, y! Q9 J% o3 z - m3 ] N0 j: F+ i, B7 ]
- return 0/ _2 Q7 B' @3 f/ [$ C# c" ^
- ) n$ P- Q: \0 w+ J7 l& _4 U( ^
- def followbutton(driver):& a4 k2 p8 C+ d: L* f3 q
7 p- @# q2 T1 {& {8 @) ^( H- try:
. @7 X5 I- L* X& C - 2 V7 M% m. `* Y
- driver.switch_to_window(driver.window_handles[0])
5 D2 j( v# e. D8 T" Z# S) l& O - / ]! U7 [) h5 y
- points = driver.find_element_by_id("currentpoints").text
/ K' o/ v3 R# W' u% F% c% T
/ A. E1 U# K1 f; k8 _7 U! O- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()9 M5 {- |! v n
- ) I* T* o& G8 q: S& f
- driver.switch_to_window(driver.window_handles[1])( v3 ~5 k* w1 A* e5 f
- . T) u: J. l+ Q$ Z
- VideoSource = ''.join(driver.page_source.split()); V7 |8 U \. s* L& L4 D) ~
3 W. e/ a% N5 T/ E% `- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:$ v3 w& Q0 A0 I" X: I
- * D: H& t( i% E" B) Q
- print VideoSource* X8 \, h, c! y7 ?+ \9 N
. ^9 L8 I8 G7 ]: c2 e' e$ T. {- b- driver.switch_to_window(driver.window_handles[0])
$ y8 S3 }9 `/ h, R - ' z% ~, @. j( A8 H) t* w2 p( U
- print "Refresh..."
' N2 k0 m5 p4 N; o1 i - % }: X1 C, S( q0 c4 z
- driver.get("https://youlikehits.com/youtubenew2.php")
6 W: F4 l# ], M. Z+ o
B5 |( A C! J% u5 p- time.sleep(1)
. T) M* j1 E% }% b" b/ w. |/ Z - , x! Q6 O$ W" u+ H% ~
- return points4 i$ ^: T3 m+ T) S r- \
- " i4 w S0 L! G/ K
- except Exception as e:/ j' J# R6 z7 [9 w+ K8 t6 S5 J7 L$ [$ ^
0 |5 L$ _, v' c8 g4 l2 P9 I9 T- return 0
( ? b8 v5 W& _& S1 I. J# W - `3 _8 t* x* v5 c3 s. j
- for i in range(0,5000):/ Z' s; K3 R( M, N- B' P
" d) Z! U( ]1 r9 Y- try:1 j" h' ^7 U; m% m# y4 Z, R; R
) ~0 z" u( Y7 F& ~: t- captcha = checkcaptcha(driver)
) j8 m; b3 _# q# i2 q' E - 3 z0 }, C$ ]/ [0 i2 o
- time.sleep(1)
) P! }1 R6 X) a8 W- F, z! `0 @" C( @ - * E% K7 g! ?) {; m) W% J v: V
- checkRefresh(driver)- c G0 e4 t0 l$ m% A! R% `
- $ R- w9 N8 N6 Y7 Z# G
- points = followbutton(driver)- l9 B5 o0 p7 F: D( u0 K7 Z0 c; |+ W$ i
9 Y0 X0 ?5 Y" _3 ]* y- time.sleep(65)& I6 C$ |2 q2 L! J
- 8 N- I- y) s: @! J3 o. G% O; Q
- driver.switch_to_window(driver.window_handles[0])% W4 ]: c& r7 L
8 P" s# o: t1 y) |: Y- tmpp = driver.find_element_by_id("currentpoints").text: X$ Z, g8 X" x$ T$ W
* \. @, w+ m; n- print "points: " + str(tmpp)
+ F! E+ G* |/ [1 r* q+ s$ Z
1 k6 o1 `5 c6 U3 d* Q' C6 Y- if points == tmpp:
# P: P8 K0 X4 X4 Q p - 8 i; e) P j, s- U) P; I
- print "Refresh..."5 u3 V$ Z: J5 z: g
; s! p0 [; V3 p) @% y( r- driver.get("https://youlikehits.com/youtubenew2.php")
+ D, L2 ~$ R! |8 x - . j7 |$ V9 B/ }) Z
- time.sleep(5)4 G* t: w! i* V
- 3 x2 Z5 @. ?* D' V
- except Exception as e:2 p/ G2 t% ?3 v4 ~4 e' C
$ Z6 t; P U& y1 |" n/ F- driver.get("https://youlikehits.com/youtubenew2.php")# x1 a% d. w% D2 G& i' y
- 1 y J8 S8 _. L8 z
- print 'error: ' + str(e) i2 E; L7 W4 ?5 C
- 4 |) ~2 a+ k W4 q" u) v0 w
- driver.quit()
C' k( d1 }: f: v
复制代码 : V% L* g: x0 ]7 z# G) g: ]' W
* ?2 W/ O$ M6 t3 T1 s1 j) K; {
_8 E& Q2 f8 C- J, I1 g
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。
. t9 K2 V& z3 p( w3 N
9 c' B6 z/ Z& k+ v3 D/ G+ u8 O |
评分
-
查看全部评分
|