最新项目需要抓取人民币汇率中间价的数据,所以就写了个简单的爬虫抓取数据。抓取的网站为:
#region 执行数据抓取(人民币汇率中间价)////// 执行数据抓取(人民币汇率中间价)/// public void CaptureData(){ isExecuting = true; StringBuilder msg = new StringBuilder(); msg.AppendFormat("执行时间:{0}\r\n", DateTime.Now); msg.Append("开始抓取人民币汇率中间价...\r\n\r\n"); SetLogging(msg.ToString()); var db = new dbContext(); var trans = db.Database.BeginTransaction(); string title = ""; DateTime dt = DateTime.Now; if (executeType == "true") { title += dt.ToString("yyyy-MM-dd") + "的数据抓取"; } else { title += "时间范围为:开始时间为" + startTime + ",结束时间为" + endTime + "的数据抓取"; } try { string url = ""; string basePath = "http://www.safe.gov.cn/AppStructured/view/project_RMBQuery.action"; if(executeType == "true") { var date = DateTime.Now.ToString("yyyy-MM-dd"); url = basePath + "?projectBean.startDate=" + date + "&projectBean.endDate=" + date + "&queryYN=true"; } else { url = basePath + "?projectBean.startDate=" + startTime + "&projectBean.endDate=" + endTime + "&queryYN=true"; } //string url = "http://www.safe.gov.cn/AppStructured/view/project_RMBQuery.action?projectBean.startDate=2017-03-15&projectBean.endDate=2017-03-15&queryYN=true"; //发送请求 HttpWebRequest request = (HttpWebRequest)HttpWebRequest.Create(url); request.Method = "GET"; request.ProtocolVersion = HttpVersion.Version11; request.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36"; request.Accept = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; request.Timeout = 100000; request.Headers.Add("Accept-Encoding", "gzip, deflate"); request.Headers.Add("Accept-Language", "zh-CN,zh;q=0.8"); request.Headers.Add("Accept-Charset", "GBK,utf-8;q=0.7,*;q=0.3"); request.CookieContainer = new CookieContainer(); //接收请求 HttpWebResponse response = (HttpWebResponse)request.GetResponse(); Stream stream = response.GetResponseStream(); string resultStr = ""; //返回字符串,若翻译失败则返回空字符串 using (StreamReader reader = new StreamReader(stream, Encoding.GetEncoding("utf-8"))) { //开始解释结果 string result = reader.ReadToEnd(); if(!string.IsNullOrEmpty(result)) { result = result.Replace("\n",""); //过滤\n转换为空 result = result.Replace("\r", ""); //过滤\r转换为空 result = result.Replace("\t", ""); //过滤\t转换为空 result = result.Replace("\\", ""); //过滤\转换为空 result = Regex.Replace(result, @" ", "", RegexOptions.IgnoreCase); //过滤注释 result = result.Replace(" ", ""); //过滤nbsp标签 string tableHtml = Regex.Match(result, "