|
本帖最后由 xiaoxue 于 2021-3-10 09:21 编辑
8 y" K# F+ _( x1 J
. P6 _2 l1 E+ E& y
7 M, r9 s/ }! c" m- + U. q' a" R! \8 E+ }
- # -*- coding: utf-8 -*-
7 M. h& `2 `) R* U" k - 4 M8 J; S( m7 w5 b1 `
- from aip import AipOcr
1 x, {" {5 @; e: ]! {8 v5 @# ] - + K) \: U* P7 C- _
- from selenium import webdriver. P+ f: z; l+ r8 @
3 d: ~) Q3 A8 t- e- import time0 Q9 a- T8 s0 b2 @. K' U0 w6 r+ | ?
- % o( w1 y, a- k( \
- import random" \8 k& n5 b, P$ n9 F" S: I
" X; Y6 M7 z1 S+ ]' U- import sys,re
) p9 z' F, @& r6 g6 m) U& T$ W+ z - 6 R" ?, o3 \0 D" d& N" u2 z! B
- from PIL import Image, ImageDraw,ImageFont
e$ I3 N% P) W5 Z0 p- G2 Z. M
8 C. Z h0 N$ p Y: n; T- """ 你的 APPID AK SK """: I+ j, R2 o& W( H3 a
- Y7 @! ~ T" l4 p6 ?, O2 n
- APP_ID = 'xxx'
' j) S6 Q0 G [/ `3 i- d) s - % Q" A' K* M- D3 c0 r7 ^
- API_KEY = 'xxx'2 c4 `: G& e) A/ ?2 Q( d2 F/ I( q
- 3 n) l( G" P6 r: g
- SECRET_KEY = xxx'
9 y6 ?( s2 i, i
5 d. {( Y" Z/ ~* j6 T6 q1 p( ^) S- client = AipOcr(APP_ID, API_KEY, SECRET_KEY)6 H1 a' i) Z0 x, {) ]; G' \! m
! t1 x2 U( |" [- #PROXY = "127.0.0.1:8118"6 D J" _! f. t& W" i
, G1 w; L G Z* }! O: X1 i6 W- chrome_options = webdriver.ChromeOptions()
" J7 P# y( U6 r* T! d7 J - _; x- M( o1 X9 p7 e
- chrome_options.add_argument('--headless')* U* a* d' x9 ~( o, P
0 F4 Q3 i0 s( f' {9 _0 y6 `- chrome_options.add_argument('--disable-gpu')
- D% Y! F$ i& n - + C" I# r$ B l: I6 y( g
- #chrome_options.add_argument('--proxy-server=%s' % PROXY)) Z# y2 M- T% a4 f7 C q
# T' C% q5 o; A! B- o+ ~# k9 h- chrome_options.add_argument("--incognito")
! O( e. g7 x/ `1 u - ; g" x( D A% b( V; q
- chrome_options.add_argument('--ignore-certificate-errors')5 F! s7 k$ h* }. P8 |3 [$ Y
- % k) ]$ m/ ?5 G. l- M& h
- # Win
# L; X s, E% Y3 ^! J7 R+ ]& ?
% c& B- d; {3 T: k! ?/ S" B! d- # chrome_options.add_argument("--log-level=3")( j+ {0 X8 f) w2 _
- / e' E8 G N# I3 Z, N% u
- # chrome_options.add_argument("--disable-logging")6 n- D& Q. q/ j1 J$ _0 U2 ?7 h# |3 C
, c9 t& h9 I; w8 j3 h; m- # chrome_options.add_argument("--disable-logging")
. L! L ~: E; n6 P3 G
* M; r B" ^! @* L% J- #chrome_options.add_argument('--no-sandbox')
3 Z# x- D. [' y% |, c+ k. i( M
+ h$ _" @7 e7 S+ K; r- m- """ 读取图片 """
1 O" Y4 u |7 X4 Y; L - $ g+ _' T M, x
- def get_file_content(filePath):
/ I' }1 j, N& U* I2 l - 2 c {6 u, `: n% v. u; x
- with open(filePath, 'rb') as fp:
, R- }$ [- ]8 }, p# F [
! L9 l6 X" B' T6 e$ @- return fp.read()2 K+ E* V2 O3 G3 y
- d& B" J' l( l; F) E, }5 C7 A- driver = webdriver.Chrome('~/chromedriver',chrome_options=chrome_options)
5 e* s" F9 |4 @( q) T8 Y; C
& k' ^) F- q q, c$ z7 s0 L- driver.set_window_size(1280, 727)
5 k' `3 E) @: y% l; _
. k/ I# c2 h/ A1 U- driver.get("https://youlikehits.com/")
) J' E$ X+ I$ m8 G( m - 7 {: `3 d4 n* q* a
- time.sleep(5)
h8 a( o4 L: k; v* x
3 O6 O" h/ l# n, m- driver.find_element_by_id("username").send_keys('11111') #user
# s3 ]$ U! c2 ~
1 X+ ~$ P2 H1 r' Y# |3 q7 [/ F- driver.find_element_by_id("password").send_keys('111111') #password. M3 G* X9 L, K/ B. l q P
: V+ \. {; e) g8 w$ [ z2 k- driver.find_element_by_xpath("//input[@value='Login']").click()! ]! [+ `4 [7 L! a
- % O8 [, ^9 C5 T: D [9 a9 t
- driver.get("https://youlikehits.com/youtubenew2.php")
* O0 S& H1 K- ]- v# b* ]) l: A ? - % `) `: ] i5 B; F5 Y- u! n
- time.sleep(1)" g: \6 D( d/ z! H0 q% Q
8 l( H+ `: w4 L- #Try Again
/ A4 k: |% h' r - " q" Y9 _& D, B) j$ p- d8 j! N
- def checkRefresh(driver):: F0 ^/ c: b( X* b% T% M$ C
) S0 R* v/ _9 G$ c0 ?- try:
- s- ?- S$ P4 S1 c# C( t/ C - 7 l8 F8 e1 z( X
- Refresh = driver.find_element_by_id('loadmore')
# q7 h6 c% v, C1 q: Q: [3 w - * ~ L! g2 y' k/ u
- Refresh.click()
$ _$ |2 g6 ] }1 G% P - 1 p- a+ Q- N* ^% X
- driver.set_window_size(1280, 727)
- S8 A. ]3 U8 t: A- l0 X - ' [7 x5 r4 X% n/ T4 y
- except Exception as e:
8 I4 B9 s- h3 o6 g" w1 t# \) ]1 w
7 r* g! P P8 d; W( I1 L- pass% o) q" [' I) G, S. }/ v
% \( Y; Q, a! O) Q9 Q) X- def checkcaptcha(driver):9 u5 v! u3 S G& Z" d+ H! p' V
" r* ~# J/ V$ d- try:, m3 j- I1 Q3 S; A% N4 i
% L- G$ K% ?+ F* d# |4 }8 c# t2 }- captcha = driver.find_element_by_id('captcha')
" E+ u$ n6 c7 ?5 C# ^5 f
7 L4 X- r- u3 u; U0 S* }- print driver.get_window_size()9 ^2 K7 @! Z" ^% c. K, d, M9 t5 ^' e+ |
- 7 z, D, n+ w" m: E6 B
- time.sleep(2)
- l# B9 O/ h2 Q- J' F+ @
0 z) D9 D1 r' a9 ?3 k; _5 F, b- driver.save_screenshot('/tmp/screenshot.png')
: _& v6 d: C0 V( Q3 U' a T' u
5 `$ `' _! @) E% l- im = Image.open('/tmp/screenshot.png')
' @3 [/ |( D/ | M
3 x- Q' U; X8 `: u% Z `- #取消headless模式
2 G) U/ ^) e! {8 \. u# n4 q" t - ) o3 E( @; K# l8 b
- #a = im.resize((1269, 610),Image.ANTIALIAS)9 B8 U. s7 X* r0 {' D: o) W
5 \! W" J i$ L9 l-
) I; N m( i' {" Q5 g; m
. @5 w% D7 P9 e& w4 k- #开启headless模式: X4 C* r( n* m- b
( l: i: U, Q0 d4 u" w- a = im.resize((1269, 727),Image.ANTIALIAS)3 J0 Z* }& K% N
" U" W$ B3 a5 Q% X! ~. {- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")
& Y) q6 V, M1 y. n - 7 Y8 t) c! H ?
- left = element.location['x']
' ]- l0 N$ E |6 q& p; y' N - " i, A1 h0 e# k) R: C& I
- top = element.location['y']# x9 K' _. {: |7 l( T: j
- & y# B5 b9 s' |/ ?8 j) L
- right = element.location['x'] + element.size['width']
; M" R) a* a8 Q
; i' \' u+ Q! K7 b3 o o. Z5 S; r/ [9 w- bottom = element.location['y'] + element.size['height']
7 \! d; D8 f7 U
) }! F3 i4 u- G6 _; m-
8 |0 Y8 L2 _1 ~8 U/ n& _3 C4 j4 } - - ?# h' _+ l5 j; F( i
- #element = driver.find_element_by_xpath("//div['#captcha']//img")
. N. \" V+ R+ a! u* s& C - N i$ H4 z" V( r
- #driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']").get_attribute("src")
- B @0 x* Y( X2 j2 m1 ^+ ?2 W* E
; f1 _2 t* {) v* R- f; [- element = driver.find_element_by_xpath("//div['#captcha']//img[@src='captchayt.php']")& h) D. k g0 ?5 X2 N
- / v9 T' ?* f& V7 w: F) d$ Z' i) b q
- a.crop((left, top, right, bottom)).save('/tmp/screenshot1.png')9 U8 W5 t" }) M4 p! L, ~& P
- - Q& V9 |0 G: Z' F$ W4 d! e
- image = get_file_content('/tmp/screenshot1.png')$ C7 c9 ~8 ?- b/ {1 N* k: j
% N% ^. K4 f, x' j) ?- a = client.basicGeneral(image)
, h5 y% p8 V! G* d; n# U0 v$ H# n
/ S( |2 C" A. F0 m8 u2 V- print a
: @; v+ G0 g9 ^& s9 v& B
( | Z& S$ K% N+ I& }5 h7 M" ?- yzm = re.sub(r'\xd7',r'*',a['words_result'][0]['words'])1 _2 ]5 e- c1 y6 B
' [$ m2 l# U/ W" T% `- yzm = re.sub(r'\xf7',r'/',yzm)
& X; c+ D6 R3 C9 x2 R: L2 s
" n4 Y6 w9 i" N+ `) F- s% u2 \- yzm = re.sub(r'x',r'*',yzm)) K* {% D. d: l- P' V
- 2 E/ l' a8 n0 _; J! k
- yzm = re.sub(r'X',r'*',yzm)$ d: K" k1 m8 z
- 8 ]7 u- X$ h+ P; g0 ?9 c4 D
- yzm = eval(yzm)
. I) |4 H8 E9 [! {- S - / K% Z" c+ B8 n" i
- #yzm = eval(yzm.replace('x','*').replace('÷','/'))0 Y- B; n4 r. k
- I) _- S O" K) Q+ B* m- print '验证码: ' + str(yzm)
* d9 O$ G$ H/ m; }% y0 u
# s9 L5 b0 }; ]( ?3 H/ t- driver.find_element_by_name('answer').send_keys(yzm)
. S. t- U/ q: T - 2 ?* S9 P6 _; ~- B+ ~
- driver.find_element_by_name('submit').click()
. g" G; q1 V5 d. Y& N/ \. x - . s, c- Y% H4 Y( ]; b( m
- time.sleep(3)1 Q& q5 t" @6 R2 f4 Y5 ?
6 A/ v6 Y& @; R* B6 `- return 1
2 h1 @6 R- y* Q3 G: v' }2 ~' y0 D - 1 k% Q. Z2 W3 t% S) ^5 l
- except Exception as e:( u( A) S% O8 J: A s6 M- Q
- ' p* B/ v/ y1 l% b; Y3 R
- return 0$ s+ r0 Y* S' s! b
; ?. x3 X6 W e! j7 y- def followbutton(driver):+ X' V& l) J0 I" Q
- 2 i; @1 x; L) A& T+ o
- try:
1 D- X/ i* m" t; A - 9 X) N: c3 z# P) N0 n: A- J4 y) M6 X
- driver.switch_to_window(driver.window_handles[0])
5 W3 ]% W( Y) M4 H+ }& \2 i8 G - . _4 B* m6 t& c9 J9 H2 U1 Z
- points = driver.find_element_by_id("currentpoints").text
1 V4 ?1 n2 x2 F& e/ i: k; n
5 q& i" z, S9 d9 B Z+ S; R- driver.execute_script('return document.querySelector("a[class=followbutton]")').click()
, r/ {! D P* B: L2 n( T
( f$ u6 B+ p* W3 [8 a- driver.switch_to_window(driver.window_handles[1])
/ {; P g- |1 G' K( c8 v - 6 i2 j7 K( w% G$ i4 N
- VideoSource = ''.join(driver.page_source.split())
6 ^7 s1 e2 W k) Q. T
; J% I D) J! S) e5 d- if VideoSource.find("This") < 100 and VideoSource.find("This") >= 0:7 I: R5 t$ L0 j) `& M1 M$ I
3 y7 |- B4 `9 g" R/ O( F; d# v7 y- print VideoSource
- ~ l/ {7 e6 ^3 R - % o' Z( b1 R4 R
- driver.switch_to_window(driver.window_handles[0]), ]4 X- F0 ?3 ]
! e3 p* q; M) I! Y0 y3 J- print "Refresh..."
4 a% Z- t: G$ m6 D; O# o1 K
$ r" ~ N6 E- o# g* i1 F2 `0 p! H- driver.get("https://youlikehits.com/youtubenew2.php")
4 m4 d' w8 b4 V8 @
1 G0 c8 A6 K1 g0 a" O, F- time.sleep(1)
- ~/ V$ O, ~1 q - 4 Q% K" l2 _+ M3 Z$ \: f. |
- return points
) |9 `+ y: _2 t: B5 W$ e
4 x! o* q# p/ a# w- except Exception as e:
8 \; M; A3 r. d- _! y
; \! z1 y" R' P7 q9 ] Y- return 09 ^: b+ J( U; B* h) Y) N
) K" r9 ?* [0 n2 ]- for i in range(0,5000):
* t+ m$ `4 O% B
: B1 c3 W1 v, ]% f- try:. [/ W. ^6 G) S$ O! ?( |: J; i
6 e: i0 S9 _( @% |" |- captcha = checkcaptcha(driver)
5 C5 V+ O8 u: \4 T
. c0 x8 D* l; I4 V, R# ^9 ^ U- time.sleep(1)
" F! [$ F+ R! y% t+ ^
x/ [/ |& X/ j$ s- checkRefresh(driver)3 ]/ w6 B# Z& e- ~1 P
6 j4 |5 _. g& w5 z- points = followbutton(driver)6 d, \- S) A' i4 b4 s
0 E. b% `. A( S$ d$ t; k- time.sleep(65)* } B4 J- X/ }: y% T
9 o: v: w% ]2 l+ e$ q5 q y5 x, g- driver.switch_to_window(driver.window_handles[0])
% b6 l1 L& U% n4 E3 R3 V
; J L8 [! q P- tmpp = driver.find_element_by_id("currentpoints").text
6 F" S) S& B2 b. o - 2 p! G4 I4 g' E' s! F4 g) _1 {4 y
- print "points: " + str(tmpp)
$ S& U a0 m' `# |+ x& Z
# s0 K' l& \ w# ]- if points == tmpp:7 ?8 J4 Z+ |3 s& U+ Z
) ?2 X& ?, i% d$ T% ?" e- print "Refresh..."
7 {' [: e/ @6 m8 @, T0 w0 x - 2 B+ J, x( v. o/ u, K
- driver.get("https://youlikehits.com/youtubenew2.php")
# K: ]- n/ J- o" b - ; f! n7 \2 C) M- t8 M; h7 X
- time.sleep(5)& h! x5 d9 W" G+ Y' k `, N) w- K6 d2 g
- . B4 U' U! H) l1 P" H) d1 K
- except Exception as e:5 P. F$ F! T0 o0 E! }) e& Z0 b
- $ v) I0 l' z* V8 |- o) s0 {3 |' N
- driver.get("https://youlikehits.com/youtubenew2.php")
, E5 ~- J8 y; l- v( A( O
/ o6 y# k! I/ f( G- print 'error: ' + str(e)$ g8 o" [' v2 k4 S
- : l$ d( Y, g8 ^4 L' W; C. }4 f' ^
- driver.quit()
- s' L+ C. L* @: G' R, X1 j
复制代码 ( w" @' ^8 O: _
) I# u0 Q6 b ^* k6 V% D
* p* R- `1 [2 k 因为headless ,可以挂在vps上跑。。 百度识别验证码正确率还是满高的。
+ y( i2 B) h$ d4 O+ ]- ^7 \) X( Y9 r7 W% E1 \0 `$ k: u
|
评分
-
查看全部评分
|