|
|
发表于 2016-2-26 16:23:12
|
显示全部楼层
$ |' R5 |2 U: E缺少一个gethtml,用下面这个:6 B3 X4 x# c0 r6 A
- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码 9 `/ q( }' }% m6 u. \, t$ x, `
- {
$ C( @5 c6 s) b& { - string strWebData = "error";! |# [2 U4 \9 H4 N1 J/ h
- try& I% p4 }) U0 {. v* i, U
- {, C+ p" l; o- l: ~+ h" M
- WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
6 N Q" H4 ?2 K - // 需要注意的:
( }# k0 d5 X0 V- `( | - //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等
& e4 k, x" ^1 S# P; K" l) M - //这是就要具体问题具体分析比如在头部加入cookie ! P4 d) H3 z! D# [
- // webclient.Headers.Add("Cookie", cookie); 5 c9 v/ C! r2 f9 w3 {% u
- //这样可能需要一些重载方法。根据需要写就可以了' R# O* o+ }% `2 O
- myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");
9 z" I8 B$ V" ]% P- P1 m7 Y$ r - //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");
) |/ O! ^% s5 ?4 U- d/ x$ Z* j - //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。
! m+ r- Y& {* {! P6 l' p - myWebClient.Credentials = CredentialCache.DefaultCredentials;
* w3 _# | p* ]9 U r3 U) |! t - //如果服务器要验证用户名,密码 . R: e1 E9 J" r' Z; \( z! U; v
- //NetworkCredential mycred = new NetworkCredential(struser, strpassword); 2 ?+ [' j$ L" _8 Y) [2 E
- //myWebClient.Credentials = mycred;
) p% X% w/ T8 h5 C/ U0 P2 g7 @, X - //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号) * o* ^8 m0 P) [- x" f
- byte[] myDataBuffer = myWebClient.DownloadData(url);
7 d$ m9 @& L# K% k% i8 _0 O - strWebData = Encoding.Default.GetString(myDataBuffer);7 E; ]+ N! O: _
- 1 M" N1 a9 F7 } X- J$ V2 W
- //获取网页字符编码描述信息
2 s8 ?7 t2 t0 ]1 u - Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
6 _! t( B0 n6 [& p0 z+ C+ t5 C5 {% o0 F - string webCharSet = charSetMatch.Groups[2].Value;' n8 b; Z W& M, [( @; R
- if (charSet == null || charSet == "")
. W1 A' F% h$ I/ m" S6 z - charSet = webCharSet;
; I: ]. L7 B& V2 W2 |" E - if (charSet.Length > 0)
% w8 m" x0 w# A9 J( Q! u: I! j3 b6 c - {( [; D6 `- L5 \: |
- charSet = charSet.Replace(""", "");
6 w7 M2 D" N$ f; T. S( t% [ - }- ~3 X& @" B2 g6 `4 L% j
- if (UseUTF8CharSet)& g" g* O3 z$ w+ _7 w" j1 k" V) t
- { l" n, H8 E9 j7 u
- if (charSet == null || charSet.Length == 0)
6 O% U1 _0 I$ |' v6 v5 ]; f - {
9 \ s- L! `* B+ d - charSet = "utf-8";
/ C( ?: q y7 @/ m% b - }
/ G- w2 V& K% M! n - }+ [: x8 F, r3 k! K8 W. F( u1 D
- if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default) }: X+ l3 }* p! ~: w: D
- strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer);
# x" V# C: j0 q0 p- n- G% V
" U1 w/ v; v" }/ d) D6 q( Q- }
* ^. o# }6 w7 J- c- Y) Q1 A - catch (Exception)
( U! O# t' _5 E( M2 Y - {6 f/ u+ G% @) u3 Q7 |. c9 P; q
- strWebData = "error";9 ~" k0 `; _# ]6 t! k
- }" k8 Y- S0 H9 f' K% {
* C% S' `9 J! P7 G, q: D: r5 K- return strWebData;
3 @; R% u1 M/ L+ \1 I1 U) `9 K0 S - }
复制代码 0 r, Y. v' o5 C: z, N3 |
3 s/ ]. y( F# X
|
|