|
|
发表于 2016-2-26 16:23:12
|
显示全部楼层
6 b# |' s2 c" i! X+ f+ e# @+ O6 n
缺少一个gethtml,用下面这个:
3 @1 X1 H% j# |- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码 1 V2 G5 J# C' X7 w; _# [ c: R6 S4 }
- {
4 J: F) v2 q" | - string strWebData = "error";1 c/ X5 M: r( M0 D- J
- try
) Q0 W0 }+ K( o* Y9 i) b - {& B+ z! E7 P5 Z/ L" R
- WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient r" e7 f& z4 G6 H+ E
- // 需要注意的: % i6 F2 P' s' W6 R9 s5 p# \7 q
- //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等
5 t9 t9 d% w; _. P - //这是就要具体问题具体分析比如在头部加入cookie
2 a- e" G! k$ c9 N - // webclient.Headers.Add("Cookie", cookie); 6 p8 |' P8 ^+ b- b6 e% m
- //这样可能需要一些重载方法。根据需要写就可以了% Y6 i6 ^/ K$ R1 w# y8 a, b z
- myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");& D: L: {' g( v2 x6 }' }
- //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");
! G( j( b6 O% f$ E, z, Z( [* i - //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。 : m% l; ^$ C# y5 t
- myWebClient.Credentials = CredentialCache.DefaultCredentials;0 J' j$ h, P+ j9 ]. }
- //如果服务器要验证用户名,密码 $ w2 k) a( `: h( w5 h, S5 M1 a
- //NetworkCredential mycred = new NetworkCredential(struser, strpassword);
0 u9 U6 d0 w7 L D3 B) n - //myWebClient.Credentials = mycred;
- l# D1 @, ~; u: [, B1 } v - //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号)
* X, v! q l- V; t- m- \- t - byte[] myDataBuffer = myWebClient.DownloadData(url);
- T1 h1 d, |9 S& O! ~ - strWebData = Encoding.Default.GetString(myDataBuffer);, r: v* V7 n* }
" }# J7 w/ d7 B5 R% D1 i- //获取网页字符编码描述信息 0 x, }3 _' M" u3 j5 i) R
- Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
" k: m0 b/ G3 o - string webCharSet = charSetMatch.Groups[2].Value;9 z& X1 R3 g5 R
- if (charSet == null || charSet == "")
; D" I7 {+ J7 M, |. d! A - charSet = webCharSet;' I6 K% I6 p& a2 Q }& `
- if (charSet.Length > 0); c! R B* l h, P( }
- {
) _7 }0 }' i X$ i - charSet = charSet.Replace(""", "");
) ~* ` t3 ]& ?) [$ A - }
9 q) M* H3 W0 w" V6 m% e" W - if (UseUTF8CharSet)
7 t' A5 A$ G+ s E0 q5 f - {7 F9 v" S( k0 F. J% Q; V6 Z3 c
- if (charSet == null || charSet.Length == 0)9 s3 M3 Q* a* O7 f
- {/ M' H! @/ I# A/ p6 t
- charSet = "utf-8";# i' ~1 M% {9 }7 A/ M
- }
8 T7 q1 H. j$ w& ?' _ - }, f+ k6 @+ p- f6 n
- if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)1 _9 C( J4 n" m8 O4 Q' n+ Y. U! T& f3 s
- strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);0 b/ e$ K; f2 W8 O
- 9 l, R* @: ?" m7 f8 s, j
- }5 D* F3 [, Y- [; t" Y& T
- catch (Exception)
: j3 ~/ l" e; B: o+ B" o - {1 P7 O: H0 v# a" l
- strWebData = "error";
1 Q4 r& `4 z n& E3 l, S3 E/ x% O | - }% L& N" A. k1 [$ X1 x" w
' K& Q" l/ s; g: x1 F- return strWebData;
, \- G2 H9 [* ^ - }
复制代码
! s) G. t" r9 K1 m* V- o& {1 P: @9 A& K/ \2 Y/ g% w/ x' g& `. K* ]
|
|