Demo entry 6770192

py

   

Submitted by anonymous on Nov 12, 2018 at 09:21
Language: Python. Code size: 846 Bytes.

采集万方网站专利数据代码
# 导入采集网站数据需要的相关库
import urllib.request  
import time
import socket
# 定义采集网站的信息
x1="http://patentool.wanfangdata.com.cn/Patent/Search?Query=%E7%94%B3%E8%AF%B7%E6%97%A5%E6%9C%9F%3A2005%2F01%2F01-2005%2F12%2F31%20%E4%B8%93%E5%88%A9%E6%9D%83%E4%BA%BA%3A%E8%82%A1%E4%BB%BD%E6%9C%89%E9%99%90%E5%85%AC%E5%8F%B8%20%E4%B8%93%E5%88%A9%E7%B1%BB%E5%9E%8B%3A%E5%8F%91%E6%98%8E%E4%B8%93%E5%88%A9&Page="
socket.setdefaulttimeout(300)
x2="&Count=50&SortBy=sortby%20relevance&DisplayWay=list"
# 运用循环进行网络数据采集和处理
for i in range(291,471):
    # 读取网站数据
    x3=x1+str(i)+x2
    sd=2
    time.sleep(sd)
    res=urllib.request.urlopen(x3)
    h1=res.read()
    res.close()
   # 对数据进行解码处理
    h2=h1.decode('UTF-8','replace')
    x4="test"+str(i)+".txt"
    # 将处理后的数据写入文件
    f=open(x4,"w",encoding='utf-8')
    f.write(h2)
f.close()

This snippet took 0.00 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).