|
|
发表于 2016-2-26 16:23:12
|
显示全部楼层
2 X' m; l' S( _! I3 s g$ F3 l
缺少一个gethtml,用下面这个:+ {% V1 R6 y0 I
- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码 % R2 I, M7 n' e2 F' `3 I3 Y8 L I
- {. H4 ^- ]: h; z( [- z
- string strWebData = "error";
5 R% Q' w+ |" U0 f - try
4 K+ e6 D# U. i5 o `) N, i - {
5 u" f$ B" [* \, S - WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
4 U- X5 F% N& M" l4 x" u. M0 i - // 需要注意的: 1 J0 D' N& ~) n) }! M
- //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等
' O% Z9 I5 k- R: C3 j - //这是就要具体问题具体分析比如在头部加入cookie
1 m- \6 ~0 _, H - // webclient.Headers.Add("Cookie", cookie);
1 m/ c- F- A3 }& [! I$ M - //这样可能需要一些重载方法。根据需要写就可以了% H& P' E& j8 |
- myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");9 z1 S+ A5 l- J4 q1 X$ O' |
- //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");" y& c0 w2 F M# r3 _
- //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。
: V8 K% a) K* D3 W' T& u - myWebClient.Credentials = CredentialCache.DefaultCredentials;: z+ Z5 w( O: ^+ f2 K# I
- //如果服务器要验证用户名,密码 ; V) s: h* Z: E
- //NetworkCredential mycred = new NetworkCredential(struser, strpassword); , z. @& J2 |" ], f
- //myWebClient.Credentials = mycred;
7 o, J, s i# ^- F - //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号)
2 I y* O/ R$ g) d5 v* w - byte[] myDataBuffer = myWebClient.DownloadData(url);5 E( t$ u) R$ ]
- strWebData = Encoding.Default.GetString(myDataBuffer);
" f2 O% g* x3 Q
* z: T) _' v [- I" w- //获取网页字符编码描述信息
9 I' b9 `8 h, ]( T8 p) `/ H4 a - Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);+ Q9 i) P+ X% Y: j) v% o2 }
- string webCharSet = charSetMatch.Groups[2].Value;$ i- W2 H6 J) V& l
- if (charSet == null || charSet == "")
6 V' r$ B# I$ X; Y4 G - charSet = webCharSet;; v5 L& b% Q% q; j8 f
- if (charSet.Length > 0)
% m& J; x. v2 `# P - {- o. p# g! N# d, T! o
- charSet = charSet.Replace(""", "");
; _! M3 |. N0 {0 S# C8 ] - }
+ D" f+ h! ]2 {3 i1 y9 M! V% I/ u3 l - if (UseUTF8CharSet)
* U4 Z* p, o% l' `6 ` - {/ u" l8 L4 C6 O
- if (charSet == null || charSet.Length == 0)1 Q7 Y& a+ h4 q- Q4 ]/ C! R6 [
- {
% g: O; V& w9 I X/ J$ { - charSet = "utf-8";
$ h$ J2 I2 S. }% |7 P" B- e, @ - }
, C" r0 }0 {+ [; r6 J - }5 g! a. \( u$ [% f9 V1 K
- if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
6 X) k0 r _6 i1 S' X) p9 a: E - strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);& N" G. X* g4 x _8 p
- ( V9 A+ p) q& \7 X
- }
2 o6 ^% ^ Q4 p; A - catch (Exception)
; |, _ _$ _/ O, _" Q - {
* M F4 {) D4 W6 r& C - strWebData = "error";: x' q; ]% |: U2 ^* O3 p0 w+ ]
- }% m: d% R# T% S& v+ H3 s
1 v6 s2 [) j- w7 q8 H& a- return strWebData;
( o2 V% q( k2 ^/ h! }+ Q - }
复制代码
5 ]9 w. \+ @9 M0 E \3 s7 y" S
, I1 k# P" o8 S3 m# U |
|