|
|
发表于 2016-2-26 16:23:12
|
显示全部楼层
" c" X5 J! P+ L8 h- U
缺少一个gethtml,用下面这个:8 d+ a7 V" i2 |. g
- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码 ) z7 H1 ^2 |2 F: {$ ~% l+ ~
- {
3 @: @* B/ ?$ n' K - string strWebData = "error";* `7 P( c5 ~( U9 @ @
- try
8 X% J @ K0 T2 m. F1 R1 Q9 H2 a - {
$ v, E1 B$ P, Y: U8 V8 d - WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient : N; e4 g0 m6 o$ R" K
- // 需要注意的: % h0 G, _7 a0 q7 O
- //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等 1 M2 S+ y8 N+ B- m9 a
- //这是就要具体问题具体分析比如在头部加入cookie 5 L, w. K9 P6 n3 }
- // webclient.Headers.Add("Cookie", cookie); ! @1 y0 q1 M" R3 V* {8 D
- //这样可能需要一些重载方法。根据需要写就可以了
9 k) d! I, z7 \/ H- L) n- | - myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");
( s- F+ ^# T7 m2 S" A i, h- T2 k - //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");7 ^: `, U+ I2 A: n/ Q- v$ I
- //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。 4 F* z3 u& O- |7 L$ A8 y
- myWebClient.Credentials = CredentialCache.DefaultCredentials;
6 k5 m4 q6 r, y7 J - //如果服务器要验证用户名,密码
6 b. u$ b. g/ t9 C4 M* h# v - //NetworkCredential mycred = new NetworkCredential(struser, strpassword);
. z, x; z4 {) X. t9 w - //myWebClient.Credentials = mycred; ' f5 F% T7 j' @! s
- //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号) : u6 D) c, {/ P% O @
- byte[] myDataBuffer = myWebClient.DownloadData(url);: ?+ I$ R7 H2 _8 c( ^6 W7 @8 Q, w5 l
- strWebData = Encoding.Default.GetString(myDataBuffer);- k& x% [; ^/ z3 C3 b# [* d
- # K4 \" q2 E! P2 @# G5 d
- //获取网页字符编码描述信息 4 c: }. ?5 s, B6 D& C- D% ^$ h1 E
- Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
) Q' h) o2 w) h6 r5 z0 x - string webCharSet = charSetMatch.Groups[2].Value;
: j: g6 A( w8 l" Y+ g - if (charSet == null || charSet == "")
/ ^/ f! w2 v! p, z: v- X$ f - charSet = webCharSet;* _+ J) y9 I! @
- if (charSet.Length > 0)
& y% H, `9 }! X1 _& r5 f - {
, B( a- X* q' ^$ p, V( I - charSet = charSet.Replace(""", "");& e; Z% r& J( }* c
- }
- i8 `, T5 h0 Z$ O1 U) x - if (UseUTF8CharSet)
( \0 w1 k0 N. G1 R$ j8 c4 N - {
2 f8 I3 M! |& G* V+ R - if (charSet == null || charSet.Length == 0)& H5 e, H% v# v I2 \
- {
5 o+ m; L8 q0 z) X. a$ p- x/ T2 w - charSet = "utf-8";: I4 y% o& X `( |9 r+ J$ ~7 N5 ^
- }, {$ u. `* K+ p4 ] X
- }
7 U- t7 R3 c. s9 `7 q$ e( ?) ~ - if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)4 R& V& c+ i4 h8 Z9 j& I3 ?
- strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
5 n" w" ~" E- I4 }& a$ Q# r& I: p. o
8 Y5 h1 p% v& }, o3 B- }
# D7 Y' o' S( ~; o# |/ P- U - catch (Exception)
H- ~/ X3 z$ C. ^2 Z - {
i5 @9 w9 L, ` - strWebData = "error";
% A5 y4 X! j9 f6 ^: S; v: { - }
5 V/ L' l- q6 h6 O - # C# t$ s+ w$ B; F1 p
- return strWebData;
5 b; H3 A% `3 r. I3 ` - }
复制代码 2 w6 I1 c% w8 U# u
* K/ x% s3 ]' ?/ n8 H9 T! [6 l
|
|