|
|
发表于 2016-2-26 16:23:12
|
显示全部楼层
) a! V# V C, P) D* n6 |
缺少一个gethtml,用下面这个:
# M; Y& x3 x# m# A- q/ c" u! b o- P- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码 2 v# A8 ^/ e5 y) X( d& \0 c
- {3 v/ j1 _' w) W( N% J
- string strWebData = "error";8 q" @2 ?0 l0 \/ A( S
- try
1 \& y/ a- z3 @% L' }/ ? - {! S0 C1 v: ^6 |. U9 u$ ]2 ^
- WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
* a% Z- F5 H8 V, i9 v" k+ v `! S - // 需要注意的:
* j) E! s: y* E$ [# j - //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等
. L" l6 @0 B1 ^: w j- T. h1 A. n - //这是就要具体问题具体分析比如在头部加入cookie
5 b* m8 P7 X3 c, M0 O - // webclient.Headers.Add("Cookie", cookie); ; v# k' U, r2 y) ~1 F
- //这样可能需要一些重载方法。根据需要写就可以了" d& w- B: ~4 ?) j% R5 D
- myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");0 n: L6 U) M8 U$ c1 q
- //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");
" R$ C. b/ x$ a* w - //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。
; C7 B' I# y7 e" O; Q7 ^$ J) h - myWebClient.Credentials = CredentialCache.DefaultCredentials;
" h5 ~6 \. t% p, U3 y0 h - //如果服务器要验证用户名,密码
* p) g2 {: G- a/ u4 b - //NetworkCredential mycred = new NetworkCredential(struser, strpassword); 3 v! P- b2 L) `- }. i2 U) H
- //myWebClient.Credentials = mycred; 6 g/ d7 D: X# r! {& h
- //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号) $ e. Q$ P% P1 q) |, ]
- byte[] myDataBuffer = myWebClient.DownloadData(url);2 e6 v5 V) u6 }6 ?+ u* s
- strWebData = Encoding.Default.GetString(myDataBuffer);
4 f; Z1 y+ B; S3 t2 x+ t
, A% u6 [5 U& \. O* G! {- //获取网页字符编码描述信息
! G8 b( ]* c7 |( W - Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);+ X) J, Q2 g: f8 E
- string webCharSet = charSetMatch.Groups[2].Value;
6 F, s- `9 Q7 W$ M5 z9 { - if (charSet == null || charSet == "")' P' ?, a; X! Y- a1 w5 `
- charSet = webCharSet;: v5 r' o C6 o( h% i) R; |
- if (charSet.Length > 0)- ?5 w3 {/ c7 v4 E! _( w. x0 {0 E
- {+ B# @$ C, V4 E' W. d$ w3 B
- charSet = charSet.Replace(""", "");
8 j3 J' n; \& b! }; K4 W2 A0 O - }
1 O3 i u) K* k$ Z - if (UseUTF8CharSet)" a% U2 n; O3 {9 A% q" @: m
- {5 S) B7 h* R7 n( ?$ u- y! M
- if (charSet == null || charSet.Length == 0)
' J8 u& P5 F/ _ - {, _" F/ d4 P: B g
- charSet = "utf-8";
; Y) \, h' K7 @. w- G - }' q9 H% K! S0 o; c0 I4 c
- }
& J# {' p% _' }' o3 N - if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default). q- T' V/ d ?0 G6 D( H
- strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
6 b1 l' T9 d- w9 n+ H7 t4 v3 { - - s+ ~% h( W3 l* ]
- }
4 A+ A- P. g F' j8 q. w9 h% X! M - catch (Exception)
9 f% W7 u; t1 F( Q7 x* X$ R - {4 a/ J: ~9 V4 [8 z5 j% E; A5 U" q
- strWebData = "error";
. p( `& X; c0 G% [+ p+ t- ~4 l3 ^: e - }
6 B* Z% |# y0 W5 `, \3 E - / _0 \* @0 ]& P6 t0 Y z
- return strWebData;8 v+ W# _# k) m* U6 c, p, G$ t
- }
复制代码 0 n& w4 Z( _9 E; Z
* M' F& ]" N L
|
|