博客
关于我
强烈建议你试试无所不能的chatGPT,快点击我
.net网站数据抓取
阅读量:5304 次
发布时间:2019-06-14

本文共 7594 字,大约阅读时间需要 25 分钟。

最新项目需要抓取人民币汇率中间价的数据,所以就写了个简单的爬虫抓取数据。抓取的网站为:

#region 执行数据抓取(人民币汇率中间价)/// /// 执行数据抓取(人民币汇率中间价)/// public void CaptureData(){            isExecuting = true;            StringBuilder msg = new StringBuilder();            msg.AppendFormat("执行时间:{0}\r\n", DateTime.Now);            msg.Append("开始抓取人民币汇率中间价...\r\n\r\n");            SetLogging(msg.ToString());            var db = new dbContext();            var trans = db.Database.BeginTransaction();            string title = "";            DateTime dt = DateTime.Now;            if (executeType == "true")            {                title += dt.ToString("yyyy-MM-dd") + "的数据抓取";            }            else            {                title += "时间范围为:开始时间为" + startTime + ",结束时间为" + endTime + "的数据抓取";            }            try            {                string url = "";                string basePath = "http://www.safe.gov.cn/AppStructured/view/project_RMBQuery.action";                if(executeType == "true")                {                    var date = DateTime.Now.ToString("yyyy-MM-dd");                    url = basePath + "?projectBean.startDate=" + date + "&projectBean.endDate=" + date + "&queryYN=true";                }                else                {                    url = basePath + "?projectBean.startDate=" + startTime + "&projectBean.endDate=" + endTime + "&queryYN=true";                }                //string url = "http://www.safe.gov.cn/AppStructured/view/project_RMBQuery.action?projectBean.startDate=2017-03-15&projectBean.endDate=2017-03-15&queryYN=true";                //发送请求                HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url);                request.Method = "GET";                request.ProtocolVersion = HttpVersion.Version11;                request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36";                request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8";                request.Timeout = 100000;                  request.Headers.Add("Accept-Encoding", "gzip, deflate");                request.Headers.Add("Accept-Language", "zh-CN,zh;q=0.8");                request.Headers.Add("Accept-Charset", "GBK,utf-8;q=0.7,*;q=0.3");                request.CookieContainer = new CookieContainer();                //接收请求                HttpWebResponse response = (HttpWebResponse)request.GetResponse();                Stream stream = response.GetResponseStream();                string resultStr = ""; //返回字符串,若翻译失败则返回空字符串                using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding("utf-8")))                {                    //开始解释结果                    string result = reader.ReadToEnd();                    if(!string.IsNullOrEmpty(result))                    {                        result = result.Replace("\n","");       //过滤\n转换为空                        result = result.Replace("\r", "");      //过滤\r转换为空                        result = result.Replace("\t", "");      //过滤\t转换为空                        result = result.Replace("\\", "");      //过滤\转换为空                                                                                    result = Regex.Replace(result, @"
", "", RegexOptions.IgnoreCase); //过滤注释 result = result.Replace(" ", ""); //过滤nbsp标签 string tableHtml = Regex.Match(result, "
.*
").ToString(); MatchCollection trHtmls = Regex.Matches(tableHtml, "(.*?)"); foreach (Match tr in trHtmls) { #region 插入一条信息 Regex reg2 = new Regex("
(.*?)"); MatchCollection mc2 = reg2.Matches(tr.Value); List
dataList = new List
(); Match[] marr = mc2.OfType
().ToArray(); for(int i=0;i
p.TIME == dateTime).FirstOrDefault(); if(item == null) { RMB_EXCHANGERATE obj = new RMB_EXCHANGERATE(); obj.ID = Guid.NewGuid().ToString(); obj.TIME = Convert.ToDateTime(dataList[0]); obj.USD = Convert.ToDecimal(dataList[1]); obj.EUR = Convert.ToDecimal(dataList[2]); obj.JPY = Convert.ToDecimal(dataList[3]); obj.HKD = Convert.ToDecimal(dataList[4]); obj.GBP = Convert.ToDecimal(dataList[5]); obj.MYR = Convert.ToDecimal(dataList[6]); obj.SUR = Convert.ToDecimal(dataList[7]); obj.ZAR = Convert.ToDecimal(dataList[8]); obj.KRW = Convert.ToDecimal(dataList[9]); obj.AED = Convert.ToDecimal(dataList[10]); obj.SR = Convert.ToDecimal(dataList[11]); obj.HUF = Convert.ToDecimal(dataList[12]); obj.PLN = Convert.ToDecimal(dataList[13]); obj.DKK = Convert.ToDecimal(dataList[14]); obj.SEK = Convert.ToDecimal(dataList[15]); obj.NOK = Convert.ToDecimal(dataList[16]); obj.ITL = Convert.ToDecimal(dataList[17]); obj.PHP = Convert.ToDecimal(dataList[18]); obj.AUD = Convert.ToDecimal(dataList[19]); obj.CAD = Convert.ToDecimal(dataList[20]); obj.NZD = Convert.ToDecimal(dataList[21]); obj.SGD = Convert.ToDecimal(dataList[22]); obj.CHF = Convert.ToDecimal(dataList[23]); obj.CREATETIME = DateTime.Now; db.RMB_EXCHANGERATE.Add(obj); } #endregion } db.SaveChanges(); trans.Commit(); StringBuilder msg2 = new StringBuilder(); msg2.AppendFormat("执行时间:{0}\r\n", DateTime.Now); msg2.AppendFormat("{0}成功\r\n\r\n",title); SetLogging(msg2.ToString()); } else { StringBuilder msg2 = new StringBuilder(); msg2.AppendFormat("执行时间:{0}\r\n", DateTime.Now); msg2.AppendFormat("{0}为空\r\n\r\n\r\n",title); SetLogging(msg2.ToString()); } } isExecuting = false; //无论执行成功还是失败,完成后都要恢复状态 } catch (Exception ex) { trans.Rollback(); var message = logTemplate2(ex, title+"失败"); SetLogging(message); if (ex.Message == "请求超时") { //循环抓取 CaptureData(); } isExecuting = false; //无论执行成功还是失败,完成后都要恢复状态 } } #endregion

 

 

转载于:https://www.cnblogs.com/kehaocheng/p/7503812.html

你可能感兴趣的文章
POJ 1015 Jury Compromise(双塔dp)
查看>>
论三星输入法的好坏
查看>>
Linux 终端连接工具 XShell v6.0.01 企业便携版
查看>>
JS写一个简单日历
查看>>
LCA的两种求法
查看>>
Python 发 邮件
查看>>
mysql忘记密码的解决办法
查看>>
全面分析Java的垃圾回收机制2
查看>>
[Code Festival 2017 qual A] C: Palindromic Matrix
查看>>
修改博客园css样式
查看>>
Python3 高阶函数
查看>>
初始面向对象
查看>>
docker一键安装
查看>>
leetcode Letter Combinations of a Phone Number
查看>>
Exercise 34: Accessing Elements Of Lists
查看>>
angular中的代码执行顺序和$scope.$digest();
查看>>
ALS算法 (面试准备)
查看>>
思达BI软件Style Intelligence实例教程—房地产分析
查看>>
Unity 3D 如何修改新建脚本中的 C# 默认创建的 Script 脚本格式
查看>>
Unity 5.4 测试版本新特性---因吹丝停
查看>>