|
|
发表于 2016-2-26 16:23:12
|
显示全部楼层
, W+ a) }( M! h3 [; V
缺少一个gethtml,用下面这个:; z: T; F8 C4 D* o; x
- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码
. Z K' X u' Z+ Z6 W \ - {# {; S& ~: j6 Z2 e- N
- string strWebData = "error";1 ~! A* i( H3 j. `1 h
- try
, _6 _" f9 ~" b - {
" n' B6 u) Z- R9 n% p* | - WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
- C! u. m+ `9 Q: R - // 需要注意的: 5 g E {1 f: q) t
- //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等 0 x" G# ^: @ q' m- R
- //这是就要具体问题具体分析比如在头部加入cookie
+ p% L1 ]) n7 M8 l& X - // webclient.Headers.Add("Cookie", cookie); % ?. O# Z. P4 p
- //这样可能需要一些重载方法。根据需要写就可以了/ J2 L- N7 A" F* c5 E) @" _" E
- myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");
4 ~: P! s3 s# r4 n; K$ q3 S/ T - //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");
' \# ?& e* V' C. } - //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。 ( B5 g9 l. D6 n# Q+ A- T8 H
- myWebClient.Credentials = CredentialCache.DefaultCredentials;& o7 P' b: W$ L
- //如果服务器要验证用户名,密码
( B, p0 m/ B f6 V" n - //NetworkCredential mycred = new NetworkCredential(struser, strpassword); e% s8 g$ f1 b2 l3 J
- //myWebClient.Credentials = mycred; : `+ R8 r# J8 P
- //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号) 5 D) z6 M2 C( e& a
- byte[] myDataBuffer = myWebClient.DownloadData(url);
5 \) F: e8 k4 i2 b& z& t - strWebData = Encoding.Default.GetString(myDataBuffer);" I) ]2 A! p8 S/ S
1 `1 v5 N2 K% T3 @" U- //获取网页字符编码描述信息 r3 W0 g/ V6 Y# ^, l
- Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
% j: }8 o* l( X7 u. R8 n - string webCharSet = charSetMatch.Groups[2].Value;! H" J9 c) Y% ]/ X1 M' [
- if (charSet == null || charSet == "")
# O3 |. g* \( E' \ - charSet = webCharSet;3 F: v& T' e5 F% E [
- if (charSet.Length > 0). Q; d% R( }$ Y! Y' e9 n
- {
! N7 W$ w$ k$ H: ?0 N2 _ - charSet = charSet.Replace(""", "");
9 |3 b# H' v2 q - }
/ Z' _! |. B5 T* W& V. P - if (UseUTF8CharSet)
3 a2 n. h5 g3 S; J! [ - {
% \( B' W& g! M! B/ u - if (charSet == null || charSet.Length == 0)
% }# g* M8 [9 x% A6 h - {
. H( [% {7 h9 K m+ Y - charSet = "utf-8";- _4 \/ g w+ r/ \2 a5 c9 N6 q
- }5 Q( C# H9 h) q; K
- }
a4 ]% H \$ \ - if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default), P1 b( [8 X* i9 s w
- strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);* ~! y* D- K6 ]* F) V0 z6 M. L* c, O
A5 v% |5 N* A8 c- }
! X( ^* m0 u- a) Q1 ?; t- D5 h - catch (Exception)
9 H8 `2 P- P" l( B8 Z - {
& m% X! j' I: e% j5 J8 q* _ - strWebData = "error";
: k {3 C( y x1 |5 B& A2 S) u8 O - }. }: q- K3 _4 b% @* N1 ^
. C7 W) T9 z4 |1 M- return strWebData;
; l$ V H$ N* w - }
复制代码
8 O1 M! n# C1 c0 ~3 ~/ ~% i$ P
- Q [: O! b( n- _( s: p( p% _/ K |
|