|
|
发表于 2016-2-26 16:23:12
|
显示全部楼层
! g4 f. p: N+ z& K- _/ y
缺少一个gethtml,用下面这个:* f/ w% G1 Q6 a% l
- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码
) n! s$ ^$ u- j - {6 `; \' u: u, y8 z% B* D
- string strWebData = "error";6 P1 L! H0 a/ }4 H5 f F/ k
- try
2 N T( G; D7 X! ]; k' e! Z - {
5 ]; w' ~; ]6 Q: Y4 ~, L( e4 ^ - WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
7 A; J7 f, n5 i, } - // 需要注意的:
* w! X# \. Z- I b. E" u - //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等 3 \" f3 ]5 l7 r( M+ P7 x) E
- //这是就要具体问题具体分析比如在头部加入cookie 7 z8 o6 y# D; K+ |( w; w
- // webclient.Headers.Add("Cookie", cookie);
" Z; Q2 u1 g9 B: |( M) F0 z4 Y* m - //这样可能需要一些重载方法。根据需要写就可以了7 h0 g% `2 z6 o& N8 f' ]
- myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");
8 f8 f- }, c5 G! V3 F - //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)"); P6 q1 k/ m9 L
- //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。
) W+ N& T: s7 e! g% e# W4 U - myWebClient.Credentials = CredentialCache.DefaultCredentials; X1 C. g% l( l. p! r
- //如果服务器要验证用户名,密码
2 |" _/ W9 y9 t2 R- l0 I+ V - //NetworkCredential mycred = new NetworkCredential(struser, strpassword); 1 Q ?2 F* e# d7 R/ O- l, M
- //myWebClient.Credentials = mycred;
3 V9 l7 q1 _' x/ A - //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号) ! K, m& u; \( L" d
- byte[] myDataBuffer = myWebClient.DownloadData(url);
* y- }- d1 A1 S+ {& H" P7 o - strWebData = Encoding.Default.GetString(myDataBuffer);1 c0 x$ l: R& P
. W% z5 o U0 T- ~% b+ B, v7 o- //获取网页字符编码描述信息 / ?. ~. s8 a# O* ?4 L1 L
- Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
, y* h% i, F4 Q! C8 H4 L# C. L - string webCharSet = charSetMatch.Groups[2].Value;0 z6 A8 G0 o* p5 D# ~" V9 G1 e ~4 }
- if (charSet == null || charSet == "")
3 ?: B$ K! F! P0 Q( T( _ - charSet = webCharSet;* {6 {3 N" F& y0 M, ]
- if (charSet.Length > 0)
) j9 e S) t% E r H& M - {
* l1 p: H; p5 L - charSet = charSet.Replace(""", "");3 t: V8 ]$ v: ~+ h3 X
- }7 D2 _" c! K7 J# J9 Z
- if (UseUTF8CharSet), {' c; u: Q0 H1 S8 i+ c$ t
- {
7 P! Y5 E' }# `2 a; p. M - if (charSet == null || charSet.Length == 0)6 M; K4 M( n8 A; ^! W
- {
- k! T5 b3 Z* k* |0 r/ }- V - charSet = "utf-8";
5 a7 z" V. r: K, k& X. R) j F% t9 b$ Q - }
! D" R" n# O0 c" y2 X/ q - }: O* C* B/ M7 B/ B
- if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default), V: ~" X% ]! D1 a$ h2 M7 m. F
- strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
) b9 r; T. \1 P - - U" m4 b) ^7 T$ s9 i0 C% S2 @8 f
- }
: c! a# h( b6 H - catch (Exception)2 v3 `& C. Y9 |- c* W& q2 p
- {
( X+ P; A) B' K7 `( n0 ^( y - strWebData = "error";8 b7 c: Z. }5 p
- }
4 U& \& j% J8 f1 Y3 W" C2 Y
) X% k' F& j; [, z. P8 T% R- return strWebData;
0 L0 Z+ G' R* I1 A. y' t9 v - }
复制代码 / K _& O" w9 j6 T+ Q0 X
$ {5 S" E1 a# [# \ j, Q: d |
|