|
|
发表于 2016-2-26 16:23:12
|
显示全部楼层
( W/ U6 g( [1 _
缺少一个gethtml,用下面这个:( a' t, v- n) e# d1 z
- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码 * ?. b$ m: R$ Y& Y7 K
- {5 f% B" J2 j9 {3 Z& j+ @
- string strWebData = "error";
1 S8 ]. C* I% q- Y! ` - try
* ?1 C, s# N) w+ e - {0 o( W7 W, E" z. { Q
- WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
r' Z4 S# T! N* [ - // 需要注意的:
9 _+ v: V* k/ C - //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等 , u. ]; f1 a; `% u3 D: M
- //这是就要具体问题具体分析比如在头部加入cookie
* i. `# T, w! O8 K1 U - // webclient.Headers.Add("Cookie", cookie);
7 G5 X- d$ r* G3 _: \* Q - //这样可能需要一些重载方法。根据需要写就可以了
5 L. q: O% B: b) e5 h - myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");3 R+ f) k) s5 f% j& e6 g0 B
- //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");2 S4 T8 [; P. l6 E ^
- //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。
$ i# `9 i7 t* t8 I/ j - myWebClient.Credentials = CredentialCache.DefaultCredentials;
+ J( I2 R1 q! G& |% p, J* {) }* c - //如果服务器要验证用户名,密码
' F5 d7 n1 O: ?, h - //NetworkCredential mycred = new NetworkCredential(struser, strpassword); 3 Y1 o9 M1 n' D/ @7 o
- //myWebClient.Credentials = mycred; 6 L+ w2 j, {& F q) [: i1 @
- //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号) 1 g3 w' }% _" S( V5 b
- byte[] myDataBuffer = myWebClient.DownloadData(url);
. m# `% Q, [. L( ] - strWebData = Encoding.Default.GetString(myDataBuffer);& a& c! C: Z7 u* ~# q
- ' \9 Q4 Z0 W$ ` M, g
- //获取网页字符编码描述信息
/ `$ w6 e/ I& T/ [+ ^ - Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
9 n6 X. j' n, q8 u# b - string webCharSet = charSetMatch.Groups[2].Value;
6 P- ]( }" f9 M - if (charSet == null || charSet == "")6 }: c. Q {. q
- charSet = webCharSet;$ }2 Q8 j5 Z7 @" L8 u# G% O, z
- if (charSet.Length > 0)
4 h2 S0 |+ D- E9 C4 { - {
" O7 J! k$ }3 I - charSet = charSet.Replace(""", "");
9 J9 B1 h4 o0 l8 N* W2 I/ ?% s) s - }
$ X/ v, m6 @) i - if (UseUTF8CharSet)" h( v( P4 f+ L* y' H" W
- {2 L' H" B, u' W1 |) l9 ?7 c1 V
- if (charSet == null || charSet.Length == 0)
/ ]; Y& U! ^8 C$ @9 b0 ]$ e* z1 R - {6 E4 J; p1 z/ C* ~
- charSet = "utf-8";3 ?3 I/ n9 k1 ]$ {# }
- }# o# c1 _( z6 [( Y0 ^
- }
6 ]8 I I. f: C$ L! d c8 { - if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
# ~% w( v: b3 y0 H5 l" E5 G - strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);9 l+ v$ ~: }6 \; v7 k4 K
: ^1 q6 _3 i$ f- }' C# u5 [* T' Z! I/ Y
- catch (Exception)
( ]6 t. I8 Q* P6 f" P4 Z4 l* c - {
# [0 P J( }3 ~0 X0 S - strWebData = "error";, q/ M- I% P6 g. R5 @3 x- _
- }
6 q0 D, p. o" U& j - 1 J+ W7 m4 o: O# G7 E
- return strWebData;0 ?; E+ M& w8 i6 \1 m3 t
- }
复制代码 + B7 J* F+ A# h; m
0 `6 l* ]+ \* Q
|
|