|
|
发表于 2016-2-26 16:23:12
|
显示全部楼层
0 S6 P/ { F( T
缺少一个gethtml,用下面这个:
$ H3 U, q( l( {& i' A2 l- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码
# v/ G' p/ r5 E% u) O - {1 x D+ {) a6 r" x# H8 _/ k
- string strWebData = "error";
- ]# P# O% K( O( k* y - try/ F7 ^1 O! m3 u& ]
- {! p6 h) q8 E# [, }' n2 b
- WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient # f2 G+ C+ h3 \: n7 |8 Y& d
- // 需要注意的: " {2 ^* @$ M- v1 r
- //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等
" v/ g2 L! ?; T' B# t - //这是就要具体问题具体分析比如在头部加入cookie
: z0 F, H. A$ Y! A - // webclient.Headers.Add("Cookie", cookie); ; a4 D. V, {* F; ^. D' D& d
- //这样可能需要一些重载方法。根据需要写就可以了- q( I6 M" H& ?9 J" N
- myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");
& _2 C; G$ y; b! M. i. s; E7 U, w - //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");
2 S; l1 X2 H5 |' k/ V - //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。 - ~1 F6 y6 z5 i4 X
- myWebClient.Credentials = CredentialCache.DefaultCredentials;
! g& `' G% i, H: S% l0 b! E$ }4 Q - //如果服务器要验证用户名,密码
2 Q1 ^9 c% L7 G( s& O6 \1 y: f1 p4 \ - //NetworkCredential mycred = new NetworkCredential(struser, strpassword); 6 d8 S$ {1 U A) V' o- E) U
- //myWebClient.Credentials = mycred; - q% T7 m u s1 O ^
- //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号) 3 D1 r+ ^* m+ m1 o- Y
- byte[] myDataBuffer = myWebClient.DownloadData(url);
- l9 ]- `6 {% _- G. ~, Y - strWebData = Encoding.Default.GetString(myDataBuffer);# ^0 Z& }; n" z9 `
9 }: m$ E# h* f( c; y( h0 j- //获取网页字符编码描述信息 ' F% }3 b, r" Y5 ?# m- ^
- Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);4 C- G0 D- A, P3 f$ U
- string webCharSet = charSetMatch.Groups[2].Value;( H' ~( t: E9 {. v2 q5 K# A
- if (charSet == null || charSet == ""); a5 \% |$ {3 t9 h
- charSet = webCharSet;
& j" _( d& H1 T4 F# [ - if (charSet.Length > 0)
- y! G6 r4 v' D! s& s1 o& F - {
$ s9 M7 q! Q6 @) z$ o3 ]5 o - charSet = charSet.Replace(""", "");) a8 P$ q) p; X. P
- }/ A- o# A2 Y$ H& w; A
- if (UseUTF8CharSet)* p& f; U% L5 ~0 N( `: y7 Z( B$ a
- {
7 N1 h) I( B% o) M+ T - if (charSet == null || charSet.Length == 0)
* u& r- ~) L* K; ^6 ?9 P, a, J& r - {! ~3 _9 N: y K
- charSet = "utf-8";
3 V3 ]2 ]3 i2 {/ Y) y; q _* j& @ - }, J1 k3 F+ ~, R. t) E3 @0 P! H
- }
" @ K: b: g; d0 k/ W3 o; W$ L, P8 ? - if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
# h' w- F, p8 b: `0 l* l - strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);0 s$ \$ W/ t8 e/ ?
7 R7 M8 _) ^1 D2 {! b- }
9 k$ D4 H( x/ ^ H; Y2 ? - catch (Exception)
# r" V* q- v4 J1 \ - {
9 Z' k0 p0 h, w- N9 o - strWebData = "error";) t; s' w" F) m6 D. P1 }
- }
3 f5 W, Y; T" u! `2 K
2 U2 a c2 t' i- x# _- z$ Q7 q- return strWebData;1 z. l$ t a) p4 c+ T% A5 s
- }
复制代码
+ f$ k4 j2 k; W$ M9 L4 q$ I5 P
. a& Z2 k2 [/ R: Q: c( e |
|