|
|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑
1 `. l7 L5 z" u0 c" E2 ]' w2 I' R
9 ]" P9 e' ~) K3 j1 E" L2 u+ K! W7 J' [8 w0 D# @: y8 N3 l
" F' s% O7 @$ y( I- # -*- coding: utf-8 -*-; \* y+ v K5 i# F
- q2 O- I7 _+ `# i0 w9 \- O+ e
- from aip import AipOcr
0 _) x- Q- e' u3 j
7 b( ?& ^$ A$ `/ a' p% q- from selenium import webdriver
( }4 _. ?" [. |2 F( E. ]: A
* Z3 I" b E( U U' s* Y: O- import time3 ^( L# k" c' s r
8 |& K( K6 O' x& W9 a- import random
7 u) F5 p. z: R, a% A& ? - % I: |, e. W0 Y/ Q
- import sys,re
% M9 P f- |9 |/ O- W - / C& T6 ^' ^7 h9 p0 [# r
- from PIL import Image, ImageDraw,ImageFont
2 P" o! T5 Y/ o( a+ c - 1 ~, ^1 t7 t" p/ h- ]6 I3 K
- """ 你的 APPID AK SK """, y6 p& c4 ?9 Q f0 K. }+ K2 I
- , v$ q: ^& A! j5 Y4 N& n: z
- APP_ID = 'xxx'
/ p/ A1 `+ L- v - 7 f% _1 g# X6 x; j
- API_KEY = 'xxx'
0 z9 p$ j! J6 F1 Y' ~3 z1 }% o - 7 z; X5 Y0 F! J6 R0 k
- SECRET_KEY = xxx'
, `/ G' b. ^3 B4 P
* ^- z" T3 v! [- r- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)7 u3 m6 ]% x. [
- 6 b; k. J& b& H, d+ K
- #PROXY = "127.0.0.1:8118"
* ~/ Y! w6 O: S( R" `: i1 U
" o2 x) _* c* }3 t( D9 J% q- chrome_options = webdriver.ChromeOptions()8 m' l( v9 c# P/ e! `: t3 J
- % \, L0 J2 ^* Z( P0 |4 a4 B
- chrome_options.add_argument('--headless')
$ i7 p3 f+ _1 {1 _1 h- ^, f+ k0 x
, d& l3 }& ^ c8 B# ?- chrome_options.add_argument('--disable-gpu')7 [; r2 A% j% s3 P
- + f8 _; {+ E: {0 \$ l
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)
8 \& L* q' R' r+ D - * @) J# ]0 f/ X) ]0 h$ V2 v" \) m: a9 r. S
- chrome_options.add_argument("--incognito")0 J" G) o! B: J/ u
- 8 U! I- T, J- v
- chrome_options.add_argument('--ignore-certificate-errors')8 d7 x6 J& Q2 V" b+ B* ~
- 7 N0 V0 S K& N% l
- # Win
9 v) s: l% @- p) s5 N6 J8 D
/ E B6 I, B& u9 x6 | t- # chrome_options.add_argument("--log-level=3")
% c' ], w0 _& X' H( w - + B& X! A; b" c: H3 D- I
- # chrome_options.add_argument("--disable-logging")
, K, y8 J1 e6 `9 ]
' a! B& v* L& o7 _% M9 l s- # chrome_options.add_argument("--disable-logging")
. h1 t% Z# @' O L - + ]: i: a8 n% j% `
- #chrome_options.add_argument('--no-sandbox')
% b6 k! X9 P& B# o7 @ - 2 o! x% Y7 |* z
- """ 读取图片 """
0 y& V ~+ b: h8 r' z9 C6 i
( Q+ U: M* a& x+ e1 |- def get_file_content(filePath):) Z& @1 { c6 K: f3 f
- 8 E& ^) n+ H$ S7 j* A
- with open(filePath, 'rb') as fp:
; K4 P3 E+ H# B: ]2 p) m+ S
2 w& R9 m3 t) E0 _3 h- return fp.read()
: K- n7 R# ]% z0 o
! g. Y6 w# R8 m! w( e* x; P- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
9 r, w Q+ {# E/ r - 6 A! q9 A$ a: Z& a
- driver.set_window_size(1280, 727)2 B0 F( D; Y4 I! U7 G/ R) i: i$ _$ o
- / ` N8 V! B2 F& H) B" }
- driver.get("https://youlikehits.com/")
/ p9 r' \' R) _5 ]
+ \- E. B& C9 t$ B* o- time.sleep(5)) x M5 t! A8 d& d, n" ~* c
- 1 L9 {4 g8 Y' u/ O! }
- driver.find_element_by_id("username").send_keys('11111') #user' y) m. o4 T- d' L- m9 H
- 4 n& I; K4 e6 J$ |6 b
- driver.find_element_by_id("password").send_keys('111111') #password* ^% D( z4 U: ?4 {
1 h0 Z- q8 H& k k" I6 o9 T- driver.find_element_by_xpath("//input[@value='Login']").click()1 `+ e2 ^ T# L! Y4 R0 L0 r
- , H& r& {6 h' b4 ^+ t0 g! J6 M
- driver.get("https://youlikehits.com/youtubenew2.php")& O: E, k# o) s* _ `6 J" g
, u z1 L' I3 L" A- time.sleep(1)
1 c @! t/ g2 c* p$ V, s - / @, }. C" _; e* {4 |
- #Try Again
P) ?7 w; ^" n% L5 n7 n, R
2 }" {, c. E) ?: y# r4 l5 ]% D- def checkRefresh(driver):
5 t$ c' U# ^" m+ N$ Y: L; b$ T- @
3 P7 {( A* E; u/ H) R) T( \- try:, Q; w, b; Q# ~& S# ~
- 8 S( R5 Z# h, z, S
- Refresh = driver.find_element_by_id('loadmore'), w" o( z7 c+ w( d1 N
* M9 M5 ~! ?& X) q/ k' [0 z- Refresh.click()
( [3 S6 u' _1 t! s( G - 3 R$ V: k4 v8 g$ M& ^
- driver.set_window_size(1280, 727)
$ W# \, {# j' M% k6 ~ - 5 m+ n/ e. E; K& J* u4 [3 ~) n+ N p
- except Exception as e:
7 E1 g" o9 A2 ] - 1 d+ p# a) A8 g" f1 B) K1 m5 J) V
- pass
# N/ l8 s; L! U* Y( X - . ~) \2 D5 o" j7 I
- def checkcaptcha(driver):- V2 V, k8 c: b8 l' I5 n r
- . V( c, L6 `% L) z8 j: q
- try:+ M' s: g3 u" s
. A/ M8 v- G$ d) w) |; w- captcha = driver.find_element_by_id('captcha')
2 Z$ S. ^ I; z1 C. o( | M - - g' Q3 `6 f$ V/ U
- print driver.get_window_size()
# e5 ~7 y: F* M
# n9 y9 Y# |0 j4 L( \8 a- time.sleep(2)
) \0 I- P0 w% Z9 v5 Y% s; b
6 D b2 i/ w" G. L* f" P- |6 l2 c- driver.save_screenshot('/tmp/screenshot.png')
/ z$ J! _: a5 A( c b
7 J6 M( n+ q5 C0 `- \3 l- im = Image.open('/tmp/screenshot.png')
4 {% |+ O+ M! d5 _ - ; w8 N7 \: X8 G( w
- #取消headless模式
4 ]2 \- o7 E* N - / n4 b, ?; }$ U0 E6 {
- #a = im.resize((1269, 610),Image.ANTIALIAS)
1 K/ m" s: I* }: ~! u' r; E8 Z - # J6 `" P6 d( J8 t w; E7 G0 p
- 0 O6 x- \) e; e0 x E
- ) U4 W) U' I% y. D+ g: t
- #开启headless模式: c, x) l5 ?: I) O
- 2 n+ _% z8 R. L4 Z& ^ g
- a = im.resize((1269, 727),Image.ANTIALIAS)
/ o; N* ^# D; y
1 ]- i u/ {: ]4 c: L* s9 @( V( c8 a7 I- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")* _1 h+ G$ ~: R; g7 W2 ]
- 5 v' _, k6 V* O' [3 d
- left = element.location['x']+ V2 }& o" K. p2 @7 J
! L$ e. C5 Q8 T d( z- top = element.location['y']
: |( T% n5 h0 k4 h
. s0 Y% [+ `4 w$ I- right = element.location['x'] + element.size['width']; ^% U! g7 |, S6 k W& L% l
0 g; H, r8 L2 ^2 F2 d% n F( \- bottom = element.location['y'] + element.size['height']
, {0 S+ A8 A5 `4 `" `; b - 2 ]8 `6 H+ L9 ^. W
-
! Y; X3 |7 l- Y" \ - $ C7 ?; D) L8 X& i G) L) j6 z6 A" [
- #element = driver.find_element_by_xpath("//div['#captcha']//img")
0 ?6 k- k2 C- J) q+ a3 ~
6 ~4 C) }% a* f M5 U- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")5 K1 B5 i! h2 m4 M3 f
- 3 i& O$ o) ?1 e0 ~! g' i, ]6 Q
- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")4 e# G% }2 O% ` V' a
- - C6 F8 b7 r+ ?- h6 T# f
- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')3 d$ i' e+ O3 D2 }# d7 d
- 6 R/ ?6 v" a/ M% H! B! }+ j" r
- image = get_file_content('/tmp/screenshot1.png')
. u, e3 y0 Q9 f8 W* h
$ y+ k# Z' m& t l- a = client.basicGeneral(image)
6 Y4 K* {: I2 C - 7 _6 f; M9 M. r. W; S
- print a8 }' I. L7 R9 U+ H A, P
9 H+ J/ q; f; m% [* y8 u" N- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words']); z) p [& E) G- d$ y! d# s
" z* H' P* h+ f) _) S3 \7 l3 T- B- yzm = re.sub(r'\xf7',r'/',yzm)
! R3 B% Y/ Q1 T$ { - ; g; b6 [. j Z) A
- yzm = re.sub(r'x',r'*',yzm)/ D; Y& O) x: c# C' z* n0 v$ D
: u) ]7 G5 V' v# e- yzm = re.sub(r'X',r'*',yzm)
- ^+ T, l% V2 |: ? S4 ]
! H+ G* V$ q/ \9 f+ u( S- yzm = eval(yzm) ?6 x, t) V+ D* J) r5 n; v7 r
% D6 i2 `% g; p) @- #yzm = eval(yzm.replace('x','*').replace('÷','/'))" x; l* r. {$ I9 k6 @
- ! x: X2 H4 H9 h
- print '验证码: ' + str(yzm)
2 P8 K5 r( `6 n4 n - . u/ o8 j, K. o0 v) c
- driver.find_element_by_name('answer').send_keys(yzm)
3 e% v. u9 ^+ D3 x0 P/ C4 Z
9 R+ Z9 _# T8 q5 i: y- driver.find_element_by_name('submit').click()$ q3 S8 X% V$ t* W0 O4 K E! }
- / z0 Z8 C% d; f) [; E" z8 m
- time.sleep(3)
4 D( \7 P6 q: E7 \& T$ _" d5 B2 r
1 c! v, n* q7 F* E9 d+ S8 Q: s1 d. p- return 1! e1 _4 T; Y7 c. g1 c+ K
7 f" \. I0 C9 H9 `: T8 _9 h6 q2 Z; B- except Exception as e:9 s: z& B3 \6 e9 P: l
- ( r @: b2 {) Z# z, {: I
- return 0
9 t" j+ w5 s$ [( J0 z - : d7 T T( |4 p
- def followbutton(driver):
( K6 b: q0 Q9 \5 g
$ p7 [. o# j) f( c% L5 h" D+ M! Z' y- try:
" f, ]7 G( u( X - , y% }9 R& }) a, r d J6 N4 a
- driver.switch_to_window(driver.window_handles[0])" x& ^. i8 y4 l: |' Q6 }- o' n
' ?' z" |- q# P1 d# N7 C! v3 _; \- points = driver.find_element_by_id("currentpoints").text
+ g4 @) x9 o: ?- B! S$ B; h
6 N0 b: H' P% D. O2 z8 [. f0 l' H- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()! ]/ Y. S7 i- a) z5 K. j
9 q8 b, l9 z; ~2 G i- driver.switch_to_window(driver.window_handles[1])2 w/ I* }' A! F
- : E$ u+ O N' V' i
- VideoSource = ''.join(driver.page_source.split())
! P Q* m% O' Y8 g* m
$ ?! j+ k1 ~: E- p$ W; v- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:
+ |8 o; l; ^) r: C2 v4 m
2 r2 _, H3 {) C% X9 `4 [2 J- print VideoSource
' s6 H- j" b5 q% S, s. q, V
) T7 p/ N. R* Z* D* M: `% G3 h- driver.switch_to_window(driver.window_handles[0])
4 B$ z4 U0 h3 _) I) z. O; }
8 o* c: R% r9 T F3 o" S9 R& o- print "Refresh..."
/ ]- [# t2 U! w6 V3 d - 7 v% C3 Y, H6 {, b
- driver.get("https://youlikehits.com/youtubenew2.php")- b4 B+ a4 p9 ?5 E3 p: z
- . f, f6 O( d8 Q. Z3 {/ q$ N
- time.sleep(1)
/ `& d3 f r9 C+ Y - ; _3 B% V8 Q; i0 V6 x2 \
- return points, H( }& u! n) ~# [& l3 s
- c2 |# C* d2 i9 ^0 [" @- except Exception as e:
- y1 ~5 k$ y1 R1 ^% Y8 r+ w' ] X - * ]1 s7 j8 q# I) G' i0 w. K
- return 0
0 y; I; o1 T& }5 z/ _
9 O% M: B. x6 ]- for i in range(0,5000): d7 R b4 [0 b
- 2 z( `, D6 e5 L8 g& A
- try: [. p0 y3 s% H. W
6 R, X% p) ~$ O- captcha = checkcaptcha(driver). F H3 U6 B0 K% u2 ]* R
- # |5 C1 @% B' s" k0 @
- time.sleep(1)
4 M; ?0 B# b3 c1 |% ]
/ V6 `& v& D7 D0 {; @- checkRefresh(driver): `. i" V" v* p) _. i
- ! H* L8 [: y' ~
- points = followbutton(driver)
# A. B# J0 g* @8 o" p - . Q! A! l' i' z7 x4 W/ G, j
- time.sleep(65), ^7 g2 Y( }- V. e' K. f
- $ E# U; p* h' F! W6 Z h( ^) s
- driver.switch_to_window(driver.window_handles[0])
. J9 I# [; g$ ]- z8 D6 m - # l; v' z/ g. x& {/ X
- tmpp = driver.find_element_by_id("currentpoints").text. X( D1 g. C, h
- + X: t m, f) O, B" }9 U" }
- print "points: " + str(tmpp)5 d( t$ V6 q3 m `* x! j* x' W; T
" j, K" _9 O0 ~6 v( ~# l" \" {0 j% E* z- if points == tmpp:
! F! @! {: N2 M
: E4 ~( u3 I) u |3 R- print "Refresh..."3 U, {& D G# |) M. ?7 b) H
- 3 @: D9 W6 k9 l" c
- driver.get("https://youlikehits.com/youtubenew2.php")
" b% F' G/ W. |% H% i
% V8 _ t3 ]* E# F- P% s3 ]5 w! m- time.sleep(5)
' Z. H4 M. G) v/ V# [6 l7 j
9 s5 g' D# c2 }+ K6 \8 @- except Exception as e:
s5 H* R6 c( j* O9 U
' D! P# ? {; ` Y6 v+ W- driver.get("https://youlikehits.com/youtubenew2.php")0 K, B2 m# Y4 O% H* u
9 S$ G) m4 C7 {* O9 F0 g; H- print 'error: ' + str(e)4 S" Z( Y/ H$ I4 l' a" F
/ I3 U9 a0 Y9 z" `+ E% S5 D- driver.quit()
o' ?: E, d* G. t0 m @; V2 |
复制代码
( o1 \! N: b e9 E" U4 x* ^# k
2 `0 K: ^3 w6 ~9 e: o( O. P* C4 _% S2 R$ M& Y8 W) d
因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。 : y) C$ I+ W! d* [. {% H1 W
5 C( Z h) d4 E& L5 y7 D
|
评分
-
查看全部评分
|