|
|
发表于 2016-2-26 16:23:12
|
显示全部楼层
" I$ {: A& d3 u缺少一个gethtml,用下面这个:
7 N; H* ~& w% I F- L- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码
5 [8 d/ S6 \! E& o: {7 k. h4 e& K - {$ [, {& k2 c& p" P+ q _
- string strWebData = "error";) C+ q7 \1 q3 |2 P# U
- try
' f* K1 r l; r3 }& ~) K# B - {
- m6 J3 d0 w' B$ e3 t - WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
7 X4 T$ y1 E* E - // 需要注意的:
/ L6 M1 }3 L" L6 A' M - //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等
8 }0 l6 B6 u) z/ v: S$ G1 Z - //这是就要具体问题具体分析比如在头部加入cookie
3 i S/ I, q* z0 [" |; F - // webclient.Headers.Add("Cookie", cookie);
* t- W7 I% q" a: p8 [ - //这样可能需要一些重载方法。根据需要写就可以了2 R0 w7 h4 P3 I9 _8 g2 C( Y: ` C
- myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");
# K) K5 R% S, f8 u, ?2 t - //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");
. x. P" Y5 f" y - //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。
3 v0 Z6 W# e" @3 C - myWebClient.Credentials = CredentialCache.DefaultCredentials;
8 M- j! J2 d% @# B0 [4 T - //如果服务器要验证用户名,密码
- A s+ U8 }6 @% D7 l. h' f) r) M% ~ - //NetworkCredential mycred = new NetworkCredential(struser, strpassword);
( w S* k- E* Z - //myWebClient.Credentials = mycred;
6 ^5 R' m" `( \, J - //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号) 0 F p& j" q4 o4 E
- byte[] myDataBuffer = myWebClient.DownloadData(url);
' l( Y: c O' |* c/ P - strWebData = Encoding.Default.GetString(myDataBuffer);
+ ]( Y/ `- Q: F0 m/ a' I d
" J! ]3 |4 i4 t; J- //获取网页字符编码描述信息
d) ~: v' [4 j/ s" [3 o - Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
1 |6 B: o! q8 e8 O4 x, A. } Z - string webCharSet = charSetMatch.Groups[2].Value;
; \9 |0 w" U( w# e& V- \$ E$ x - if (charSet == null || charSet == "")
0 r. | u/ Z3 T1 _/ g - charSet = webCharSet;
) J9 U0 J2 M5 _ - if (charSet.Length > 0)
! u* I* C8 Y& {* ]+ Q" i - {
& Y0 U& r& S. r6 V - charSet = charSet.Replace(""", "");
" j( s7 a+ H% t6 F. |% ]4 ?( Z - }
3 Y9 E$ m6 O# F7 O+ \3 H - if (UseUTF8CharSet)1 N i# b+ J7 L; j4 m
- {
; E9 _& b$ Y0 f) ^/ ~$ { - if (charSet == null || charSet.Length == 0)
`# o, E) Z6 _ - {
0 o; ~$ X2 ?( y - charSet = "utf-8";
1 U7 b3 ]2 T0 w% A4 R - } v5 O% e$ l! ^1 z! @, b. z
- }$ S5 ]. X4 C% n' J, n
- if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default). F) n( L( m; x
- strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);& d6 F# W' V Y8 Y& F
" l# e, ^! r* W1 I$ p- }$ z9 @9 n, @$ n7 H3 N
- catch (Exception)4 s% k Z" i! j7 k
- { [- _$ ^' {6 A" {5 m6 _
- strWebData = "error";
1 ]5 @# W- V3 ], u1 f - }
! _, ]; @; y3 Z5 j; Y
: ?" A" d4 i% F+ P- return strWebData;
, n2 x- _0 }( {) R& S: O - }
复制代码
& \3 f, Z3 s9 @6 v; q3 u& d- q7 @: x2 O
|
|