|
|
发表于 2016-2-26 16:23:12
|
显示全部楼层
) w9 J6 I0 y+ X缺少一个gethtml,用下面这个:) Y3 T% L- v- C
- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码
0 r3 z! D6 c9 c8 F; u v - {' o& _1 c' t8 X& ]
- string strWebData = "error";
" n- R) |0 b1 F - try' O0 W i4 ?; y% K* p
- {
4 S8 `$ Q, B& y6 [0 J% j6 P - WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
9 n( _ G* N6 u; ^2 D - // 需要注意的:
3 W) g6 u/ p3 v' Q) y9 q - //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等 ) w/ B6 Q9 X, G" S
- //这是就要具体问题具体分析比如在头部加入cookie
! C: n p. e& c7 L9 E& h - // webclient.Headers.Add("Cookie", cookie);
0 }* T3 U# |. Z- { - //这样可能需要一些重载方法。根据需要写就可以了8 K7 q" X& c }( a/ q( V
- myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");
3 \. y! ?' J% ^. T* C7 v: M - //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");' ~% N$ C& b- j9 T9 j) U
- //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。
( i# \% V9 B( Q5 x; v# p% H: G - myWebClient.Credentials = CredentialCache.DefaultCredentials;6 j5 u- {+ S& X {1 o" _; R
- //如果服务器要验证用户名,密码 - C+ R+ W" A- V) M: b6 Z8 K4 h2 w' j
- //NetworkCredential mycred = new NetworkCredential(struser, strpassword);
x# ?0 j2 h9 H8 F- E. q; b - //myWebClient.Credentials = mycred; ! ]& x8 T) o* _! x" M9 m7 n
- //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号)
- J$ `, A: J+ l3 y( T - byte[] myDataBuffer = myWebClient.DownloadData(url);; M {0 D1 F6 k! i6 q
- strWebData = Encoding.Default.GetString(myDataBuffer);
- b, P- U7 ?5 Z" A- n6 ] - , M* p" O( g" I' L
- //获取网页字符编码描述信息 " v! {& _: m6 v- }2 d9 N
- Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);$ f- ~% c# T% i' I; |* M% ]
- string webCharSet = charSetMatch.Groups[2].Value;
) N2 I3 R3 l, F8 V+ s0 ^( i3 O - if (charSet == null || charSet == "")
$ g" d& }, e9 l7 Y6 L7 V - charSet = webCharSet;
6 w% j7 J5 X+ p8 L! j - if (charSet.Length > 0); g4 A% p8 \0 j9 M0 S9 @& W; D
- {
6 G) B$ n' i: S. C1 e, g8 V4 t - charSet = charSet.Replace(""", "");
' n5 R; ^4 |% x9 ]9 p, d P - }
5 z5 \' B7 k( k2 g/ h0 r8 H, _ - if (UseUTF8CharSet)
! Q6 G3 C/ U- ?8 U6 D8 V. o" U" F* P0 O - {
7 I# ?# M3 W: H( n5 A* [ - if (charSet == null || charSet.Length == 0)
# i6 m4 n! ?" Y8 m! | - {
% V) }( f% ?! s' w | - charSet = "utf-8";, A# W- y8 z9 n1 L
- }
8 f- s4 D( j$ l2 J* O - }: O% L+ A9 [* R# H& u2 j
- if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)0 Z5 \* J. m! l: N6 i% E& {: P) ^
- strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);) l/ W4 C% w! `" a- e2 c( r
- . M& v# m% |4 f8 s- n/ y2 [$ Z
- }
- V+ q9 V# D- `) \* B4 h - catch (Exception)3 q% k" J: ?9 W6 O
- {( `/ ]5 P( s [; m& c* F7 V( y
- strWebData = "error";5 q" P. M, g2 p1 `, i
- }
" G: o3 ~9 i( i8 k- ]5 I - W/ E; R/ r6 C6 J! j
- return strWebData;
0 a) N- a& @$ @ - }
复制代码
5 I4 r) b( v' y5 I; _* E! i' w* y
|
|