Demo entry 6602500

spider

   

Submitted by anonymous on Jun 05, 2017 at 05:23
Language: Python 3. Code size: 2.8 kB.

#注:请修改 login()学号密码进行爬取
# -*-encoding:utf-8-*-
# coding=utf-8
__author__ = 'ysc'
import requests
#import csv
from bs4 import BeautifulSoup

class ScrapeGrade:
    def __init__(self, auth_url=None, log_url=None):
        if not auth_url:
            self.auth_url = "http://ids.xidian.edu.cn/authserver/login?service=http%3A%2F%2Fjwxt.xidian.edu.cn%2Fcaslogin.jsp"
            self.log_url = "http://jwxt.xidian.edu.cn/caslogin.jsp"
        else:
            self.auth_url = auth_url
            self.log_url = log_url
        self.session = requests.Session()

    def login(self, id='学号', password='密码'):
        r = self.session.get(self.auth_url)
        data = r.text
        bsObj = BeautifulSoup(data, "html.parser")
        lt_value = bsObj.find(attrs={"name": "lt"})['value']
        exe_value = bsObj.find(attrs={"name": "execution"})['value']
        params = {'username': id, 'password': password,
                  "submit": "", "lt": lt_value, "execution": exe_value,
                  "_eventId": "submit", "rmShown": '1'}
        headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0",
           'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
           "Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3",
           "Accept-Encoding": "gzip, deflate",
           "Referer": "http://ids.xidian.edu.cn/authserver/login?service=http%3A%2F%2Fjwxt.xidian.edu.cn%2Fcaslogin.jsp",
           "Content-Type": "application/x-www-form-urlencoded"}
        s = self.session.post(self.auth_url, data=params, headers=headers)
        s = self.session.get(self.log_url)

    def Print(self):
        grade_page = self.session.get("http://yjsxt.xidian.edu.cn/info/findAllBroadcastMessageAction.do?flag=findAll")
        bsObj2 = BeautifulSoup(grade_page.text, "html.parser")

        #nameList = bsObj2.findAll("td", {"class":"textTop"})
        #tr_odd = bsObj2.findAll("tr", {"class":"odd"})
        #tr_even = bsObj2.findAll("tr", {"class":"odd"})
        table = bsObj2.find("table", {"class":"row"})
        tds = table.findAll("td")
        count = 0
        f = open('./results.txt', 'wt')
        for name in tds:
          #数据处理,存入数据库redis
          lis = name.findAll("li")
          for livalue in lis:
            value = livalue.get_text().strip().replace(' ', '').replace('\n', '').replace('\t', '').replace('\r', '')
            print(value)
            f.write(value)
            f.write(',')
            count+=1
            if count%6 == 0:
              f.write('\n\n')
        f.close()
            
          #print(name.get_text())


if __name__ == '__main__':
    # 初始化爬虫对象
    sg = ScrapeGrade()
    # 登录(在此处传入正确的个人学号与密码信息)
    sg.login(id='1601120289', password='802139')
    sg.Print()

This snippet took 0.00 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).