|
发表于 2016-2-26 16:23:12
|
显示全部楼层
! k! L0 |& U7 ~8 j* R$ F
缺少一个gethtml,用下面这个:7 j8 }% o* O, ^; N2 E z
- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码 . e5 d! G l x2 W. _
- {5 ~ e y( M, i# `2 |' J
- string strWebData = "error";0 S1 I3 [" B: P# w- y
- try
% s/ F* g8 H0 k I) x - {' j* Z: z& w2 c4 v
- WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
3 Q. w" ?& N9 W: f e- D( n T- I$ u - // 需要注意的: 4 A: l* A: \$ s0 A8 D. H
- //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等 2 r0 w9 \1 ]9 C: ?* R! f0 n4 D: D
- //这是就要具体问题具体分析比如在头部加入cookie
/ j8 @* }+ W# i6 J/ r - // webclient.Headers.Add("Cookie", cookie);
( L! S3 d' r G - //这样可能需要一些重载方法。根据需要写就可以了
) E& X, K4 Q( H- u; w# r" Z# p - myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");
9 d& \& c6 }! Z6 S% V" f - //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");3 |5 r$ A% X6 \$ `
- //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。 7 C" ^+ b9 o! d v
- myWebClient.Credentials = CredentialCache.DefaultCredentials;
! |4 i7 z2 f! w1 `+ Q9 x/ } - //如果服务器要验证用户名,密码 ) {) J" h7 W# \) Y; Q/ P
- //NetworkCredential mycred = new NetworkCredential(struser, strpassword); , C( n( [' n. Y+ a# W! _
- //myWebClient.Credentials = mycred; ( D5 s, |& f# [% q0 g4 X4 \5 ]2 x5 u
- //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号)
+ N+ K2 V2 `/ s$ u' q - byte[] myDataBuffer = myWebClient.DownloadData(url);
; Q- \. J, F. z. Z" S/ ?& b - strWebData = Encoding.Default.GetString(myDataBuffer);
3 m. m6 Y! |- B4 f) n5 { - 3 \7 a1 B2 c6 j0 h n* K& e' c. r
- //获取网页字符编码描述信息
5 V/ I8 W0 S/ s9 b! A - Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
# J4 h0 I- [- z6 j# I - string webCharSet = charSetMatch.Groups[2].Value;
5 W1 Q" b0 c& f# z6 M% A; w4 |/ ` - if (charSet == null || charSet == "")
* h% I1 ~* I3 q% R( } - charSet = webCharSet;# ]% Y- K2 y; \- J
- if (charSet.Length > 0), r* l9 q B. `8 F% ]" U
- {
3 D- p. R' e' R; y h - charSet = charSet.Replace(""", "");# M+ H0 y* a2 n) Q) P6 b
- }
5 P. d. f; d/ z- B* o3 y8 V+ T - if (UseUTF8CharSet)
4 b8 M. i: w9 Z7 \ - {
/ n7 r1 K1 H @) r - if (charSet == null || charSet.Length == 0)
% I- Z. A# c3 ^3 a1 B) H" a - {0 j4 H/ _' T6 c
- charSet = "utf-8";% _. E4 A: T" G, p/ T
- }
; Y3 m0 r0 H, e, [, v% e0 C - }
! p2 m2 k [' L* X4 V5 ?5 k - if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)" E. f+ l' O- s4 v2 O* F
- strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);) D4 |4 d7 \9 r: H& n& d$ H
/ J: _' Q8 f+ @3 ^- }8 u, Z/ B* d0 L& k
- catch (Exception)) {$ d* X! K- x& z
- {
7 ^* I) h+ Q5 ]6 z% f - strWebData = "error";
{6 o4 J" J& G% O0 A% z - }
! ~: w R c) S9 m - ! J! D% F* R5 H {- z8 Z$ W$ @; s
- return strWebData;. P4 W4 G' k; g' Q# F3 M% ]
- }
复制代码 ) A& n* N. W: A8 V; Z
! l6 _2 s& [6 G# ^
|
|