|
|
发表于 2016-2-26 16:23:12
|
显示全部楼层
! R' [" ^6 i- g* r1 M缺少一个gethtml,用下面这个:
$ U$ S/ {6 V1 h. ^* s" O- public string getHtml(string url, string charSet, bool UseUTF8CharSet)//url是要访问的网站地址,charSet是目标网页的编码,如果传入的是null或者"",那就自动分析网页的编码 . j% V5 ~8 {- P' a
- {
% ]( z& i* u- A. ~* \ - string strWebData = "error";
% w# |9 [ A; B+ G: I5 F, w - try. N- c' I y) G
- {% { F, X Q1 ^; d
- WebClient myWebClient = new WebClient(); //创建WebClient实例myWebClient
7 I) N& C! B' k6 [ k - // 需要注意的:
" c P6 _2 f' ?, B# W1 c - //有的网页可能下不下来,有种种原因比如需要cookie,编码问题等等 + |( m! `6 m' ?. Z
- //这是就要具体问题具体分析比如在头部加入cookie
: B% @1 ?1 N" T/ N - // webclient.Headers.Add("Cookie", cookie);
4 F$ g5 Q( m5 i1 d - //这样可能需要一些重载方法。根据需要写就可以了( l" u: }5 t$ ` i$ |. f: }1 A+ N
- myWebClient.Headers.Add("User-agent", "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; .NET4.0C; .NET4.0E)");8 e! A6 j+ e. K3 v
- //myWebClient.Headers.Add("User-agent", "Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");$ S3 J3 h3 y) X- b% E7 w& ^/ Y
- //获取或设置用于对向 Internet 资源的请求进行身份验证的网络凭据。 3 W/ Z) N; {1 x1 s( n8 x/ s
- myWebClient.Credentials = CredentialCache.DefaultCredentials;) X1 t6 h p+ j- E' t5 F* C
- //如果服务器要验证用户名,密码 ( D( l: L' v6 F3 ]3 ]0 J' w
- //NetworkCredential mycred = new NetworkCredential(struser, strpassword); 6 ^" {: G9 E' \. ~6 `' L# U9 p
- //myWebClient.Credentials = mycred;
/ x: y+ E& P* V1 _; B/ j- P$ n - //从资源下载数据并返回字节数组。(加@是因为网址中间有"/"符号) + {& g2 {) j2 l; ~: d% v6 \
- byte[] myDataBuffer = myWebClient.DownloadData(url);$ C; V& K) { J6 q7 h" _8 |% w
- strWebData = Encoding.Default.GetString(myDataBuffer);
5 |7 g1 F- o! G3 f - 3 O, l% b2 H& h6 A% H! Y: s
- //获取网页字符编码描述信息
9 n: `; c P" h$ t _ - Match charSetMatch = Regex.Match(strWebData, "<meta([^<]*)charset=([^<]*)"", RegexOptions.IgnoreCase | RegexOptions.Multiline);
0 e: q2 R& [1 k) z- K - string webCharSet = charSetMatch.Groups[2].Value;
, ~% e- t0 K0 v6 R9 s" m2 o - if (charSet == null || charSet == "")
8 F# h6 [2 @+ N/ U D - charSet = webCharSet;4 v- v( Z0 o, i, X! {# Y! z
- if (charSet.Length > 0)
) E2 R3 y2 o. b+ a- v3 S. W( [8 ? - {
4 N8 y$ {* G" V9 _& S" W% L" w - charSet = charSet.Replace(""", "");6 Y- K! l9 F" ]( _: k4 q
- }0 r4 y1 u5 ]6 |/ P, r
- if (UseUTF8CharSet)
9 Z* S) A% ^) k' X: I - {
$ g2 X# r1 M& G3 f& g. ~ - if (charSet == null || charSet.Length == 0) j( H# C- E6 u$ O. e- E
- {1 Z1 M! X$ B' Z$ v' D& | J; z
- charSet = "utf-8";
, B" J& H2 G1 L+ A$ ? - }6 W! E" c: p% z8 ]; I
- }
/ o( d" E; D: \5 b/ b& ^4 w* |7 S - if (charSet != null && charSet != "" && Encoding.GetEncoding(charSet) != Encoding.Default)
1 w" ?2 l2 K0 H5 z1 P - strWebData = Encoding.GetEncoding(charSet).GetString(myDataBuffer); v& ~+ U' v( [6 M; {9 z# `& H. P" E
# N: P z. A6 G2 O/ U, i- }; `2 H( r: p$ K& v& t7 D. c
- catch (Exception)& z& r: W. x# {5 N. O3 ]+ @
- {4 ~6 ^# e& _& X0 V$ e- m
- strWebData = "error";3 m& n; }0 A- G6 Y: f; y$ U
- }' U: d& @5 p( r" E
4 u" ^0 X/ Y; D0 x- return strWebData;5 d" m( d6 {) x; r9 g
- }
复制代码 - R. G, i9 q1 _- p9 {. p9 J
: r% Q* k, w; p. F! G7 J
|
|