Demo entry 6347735

test

   

Submitted by anonymous on Feb 16, 2017 at 12:48
Language: Python. Code size: 16.5 kB.

# -*- coding: utf-8 -*-

from selenium import webdriver
import os
import re
import pandas as pd
import os.path
import math
import datetime
import glob

def amedas(year_list,month_list,region,point):
    flg=0

    result_now=[]
    rst_log=[]

    for (year,month) in zip(year_list,month_list):

        url="http://www.data.jma.go.jp/obd/stats/etrn/view/daily_a1.php?prec_no="+str(region)+"&block_no="+str(point.replace('"',''))+"&year="+str(year)+"&month="+str(month)+"&day=&view=p1"
        driver.get(url)

        tmp_matrix=driver.find_elements_by_xpath("//tr[@class='mtx']")

        if(flg==0):
            tit=[]
            tit.append(u"年")
            tit.append(u"月")
            tit.append(u"日")
        
            tmp_row_0=tmp_matrix[0].text
            row_split_0=tmp_row_0.split(" ")

            tit.append(row_split_0[1])

            tmp_row_1=tmp_matrix[1].text
            row_split_1=tmp_row_1.split(" ")

            for i in range(2,5):
                tit.append(row_split_1[i]+row_split_0[2])

            tit.append(row_split_0[4].replace("\n",""))

            flg=1

        result_month=[]
        for i in range(3,len(tmp_matrix)):

            try:
                tmp_row=tmp_matrix[i].text

                tmp_row=re.sub(r'[]*)*$]', '', tmp_row)
                tmp_row=re.sub(r'\s{2}', ' ', tmp_row)


                tmp1=tmp_row.split("\n")

                day=int(tmp1[0])

                tmp2=tmp1[1].split(" ")

                kosui=tmp2[0]
                if kosui == "--"or kosui == u"×" or kosui ==u"///":
                    kosui =0
                kosui=float(kosui)

                tempe_ave=tmp2[3]
                if tempe_ave == "--"or tempe_ave == u"×" or tempe_ave ==u"///":
                    tempe_ave =0
                tempe_ave=float(tempe_ave)

                tempe_max=tmp2[4]
                if tempe_max == "--"or tempe_max == u"×" or tempe_max ==u"///":
                    tempe_max =0
                tempe_max=float(tempe_max)

                tempe_min=tmp2[5]
                if tempe_min == "--"or tempe_min == u"×" or tempe_min ==u"///":
                    tempe_min =0
                tempe_min=float(tempe_min)                
                
                nissyou=tmp2[12]
                if nissyou == "--"or nissyou == u"×" or nissyou ==u"///":
                    nissyou =0
                nissyou=float(nissyou)  

                listdata=[]

                listdata.append(year)
                listdata.append(month)
                listdata.append(day)
                listdata.append(kosui)
                listdata.append(tempe_ave)
                listdata.append(tempe_max)
                listdata.append(tempe_min)
                listdata.append(nissyou)

                result_month.append(listdata)        

            except:
                log = str(r)+str(p)+str(year)+str(month)
                rst_log.append(log)
                break


        result_now.extend(result_month)


    result_now=pd.DataFrame(result_now)

    result_now.columns=tit

    os.chdir(os.path.dirname(__file__)+r"\data_ave")
    data_ave=pd.read_csv(str(p.replace('"',''))+'_ave.csv',encoding="shift-jis")

    result_now=pd.merge(result_now,data_ave,on=[u'月',u'日'],how='left')   
    result_now=pd.merge(result_now,table_tim,on=[u'年',u'月',u'日'],how='left')          


    os.chdir(os.path.dirname(__file__)+r"\data")

    if os.path.isfile(str(p.replace('"',''))+'.csv')==True:
        old_data=pd.read_csv(str(p.replace('"',''))+'.csv',encoding="shift-jis")
        tit=old_data.columns

        koshin=pd.merge(old_data,result_now,how='outer',on=[u'年',u'月',u'日'],indicator=True)
        koshin=koshin[koshin._merge=='right_only']
        koshin=koshin.ix[:,~koshin.columns.str.endswith(u"x")]        
        koshin=koshin.iloc[:,0:(len(koshin.columns)-1)]

        koshin.columns=tit
        result_now=pd.concat([old_data, koshin])

    result_now.to_csv(str(p.replace('"',''))+'.csv', index=False,encoding='shift-jis')

    return rst_log





def other(year_list,month_list,region,point):
    flg=0
    result_now=[]
    rst_log=[]

    for (year,month) in zip(year_list,month_list):
        
          	
        url="http://www.data.jma.go.jp/obd/stats/etrn/view/daily_s1.php?prec_no="+str(region)+"&block_no="+str(point.replace('"',''))+"&year="+str(year)+"&month="+str(month)+"&day=&view=p1"
        driver.get(url)

        tmp_matrix=driver.find_elements_by_xpath("//tr[@class='mtx']")

        if(flg==0):
            tit=[]
            tit.append(u"年")
            tit.append(u"月")
            tit.append(u"日")

            tmp_row_0=tmp_matrix[0].text
            row_split_0=tmp_row_0.split(" ")

            tit.append(row_split_0[2])

            tmp_row_1=tmp_matrix[2].text
            row_split_1=tmp_row_1.split(" ")

            for i in range(4,7):
                tit.append(row_split_1[i]+row_split_0[3])

            tit.append(row_split_0[6].replace("\n",""))

            flg=1

        result_month=[]

        for i in range(4,len(tmp_matrix)):

            try:
                tmp_row=tmp_matrix[i].text

                tmp_row=re.sub(r'[]*)*$]', '', tmp_row)
                tmp_row=re.sub(r'\s{2}', ' ', tmp_row)


                tmp1=tmp_row.split("\n")

                day=int(tmp1[0])

                tmp2=tmp1[1].split(" ")

                kosui=tmp2[2]
                if kosui == "--"or kosui == u"×" or kosui ==u"///":
                    kosui =0
                kosui=float(kosui)

                tempe_ave=tmp2[5]
                if tempe_ave == "--"or tempe_ave == u"×" or tempe_ave ==u"///":
                    tempe_ave =0
                tempe_ave=float(tempe_ave)

                tempe_max=tmp2[6]
                if tempe_max == "--"or tempe_max == u"×" or tempe_max ==u"///":
                    tempe_max =0
                tempe_max=float(tempe_max)

                tempe_min=tmp2[7]
                if tempe_min == "--"or tempe_min == u"×" or tempe_min ==u"///":
                    tempe_min =0
                tempe_min=float(tempe_min)                
                
                nissyou=tmp2[15]
                if nissyou == "--"or nissyou == u"×" or nissyou ==u"///":
                    nissyou =0
                nissyou=float(nissyou)                

                listdata=[]

                listdata.append(year)
                listdata.append(month)
                listdata.append(day)
                listdata.append(kosui)
                listdata.append(tempe_ave)
                listdata.append(tempe_max)
                listdata.append(tempe_min)
                listdata.append(nissyou)

                result_month.append(listdata)        

            except:
                log = str(r)+str(p)+str(year)+str(month)
                rst_log.append(log)
                break
        
        result_now.extend(result_month)

    result_now=pd.DataFrame(result_now)

    result_now.columns=tit
    
    os.chdir(os.path.dirname(__file__)+r"\data_ave")
    data_ave=pd.read_csv(str(p.replace('"',''))+'_ave.csv',encoding="shift-jis")

    result_now=pd.merge(result_now,data_ave,on=[u'月',u'日'],how='left')
    result_now=pd.merge(result_now,table_tim,on=[u'年',u'月',u'日'],how='left')
    
          
    os.chdir(os.path.dirname(__file__)+r"\data")

    if os.path.isfile(str(p.replace('"',''))+'.csv')==True:
        old_data=pd.read_csv(str(p.replace('"',''))+'.csv',encoding="shift-jis")
        tit=old_data.columns
        koshin=pd.merge(old_data,result_now,how='outer',on=[u'年',u'月',u'日'],indicator=True)
        koshin=koshin[koshin._merge=='right_only']
        koshin=koshin.ix[:,~koshin.columns.str.endswith(u"x")]        
        koshin=koshin.iloc[:,0:(len(koshin.columns)-1)]
        koshin.columns=tit
        result_now=pd.concat([old_data, koshin])

    result_now.to_csv(str(p.replace('"',''))+'.csv', index=False,encoding='shift-jis')

    return rst_log



def amedas_ave(region,point,name):
    result_ave=[]

    cnt=[0,1,2,3,4,5]

    for month in range(1,13):
        
        url="http://www.data.jma.go.jp/obd/stats/etrn/view/nml_amd_d.php?prec_no="+str(region)+"&block_no="+str(point.replace('"',''))+"&year="+str(year)+"&month="+str(month)+"&day=&view=p1"
        driver.get(url)
        
        tmp_matrix=driver.find_elements_by_xpath("//tr[@class='mtx']")

        tit=[u"月",u"日",u"降水量(mm)_平均",u"平均気温(℃)_平均",u"最高気温(℃)_平均",u"最低気温(℃)_平均",u"日照時間(h)_平均"]

        result_month=[]
        for i in range(3,len(tmp_matrix)):

            tmp_row=tmp_matrix[i].text

            row_split=tmp_row.split(" ")

            temp_ave=[]
            temp_ave.append(month)
            for j in cnt:

                tmp=row_split[j]
                tmp=tmp.replace(u'日','')

                temp_ave.append(tmp)

            result_month.append(temp_ave)

        result_ave.extend(result_month)

    result_ave=pd.DataFrame(result_ave)
    result_ave.columns=tit
    
    os.chdir(os.path.dirname(__file__)+r"\data_ave")
    result_ave.to_csv(str(name.replace('"',''))+'_ave.csv', index=False,encoding='shift-jis')



def other_ave(region,point,name):
    result_ave=[]


    cnt=[0,1,2,3,4,5]
    for month in range(1,13):
        
        url="http://www.data.jma.go.jp/obd/stats/etrn/view/nml_sfc_d.php?prec_no="+str(region)+"&block_no="+str(point.replace('"',''))+"&year="+str(year)+"&month="+str(month)+"&day=&view=p1"
        driver.get(url)

        tmp_matrix=driver.find_elements_by_xpath("//tr[@class='mtx']")

        tit=[u"月",u"日",u"降水量(mm)_平均",u"平均気温(℃)_平均",u"最高気温(℃)_平均",u"最低気温(℃)_平均",u"日照時間(h)_平均"]

        result_month=[]
        for i in range(3,len(tmp_matrix)):

            tmp_row=tmp_matrix[i].text

            row_split=tmp_row.split(" ")

            temp_ave=[]
            temp_ave.append(month)
            for j in cnt:

                tmp=row_split[j]
                tmp=tmp.replace(u'日','')

                temp_ave.append(tmp)

            result_month.append(temp_ave)

        result_ave.extend(result_month)

    result_ave=pd.DataFrame(result_ave)
    result_ave.columns=tit
    
    os.chdir(os.path.dirname(__file__)+r"\data_ave")
    result_ave.to_csv(str(name.replace('"',''))+'_ave.csv', index=False,encoding='shift-jis')

def data_diffe_ave():

    os.chdir(os.path.dirname(__file__)+r"\data")

    flist=glob.glob("*.csv")

    for f in flist:

        os.chdir(os.path.dirname(__file__)+r"\data")
        original_data=pd.read_csv(f,encoding="shift-jis")

        rst=pd.DataFrame()

        tit=[u"降水量(mm)",u"平均気温(℃)",u"最高気温(℃)",u"最低気温(℃)",u"日照時間(h)"]
        for t in tit:

            tmp=original_data[t]-original_data[t+u"_平均"]
            tmp=pd.DataFrame(tmp,columns=[t+u"_平均差"])

            rst=pd.concat([rst,tmp],axis=1)

        result=pd.concat([original_data[[u"年",u"月",u"日"]],rst],axis=1)
        result=pd.concat([result,original_data[u"時系列"]],axis=1)
        
        os.chdir(os.path.dirname(__file__)+r"\data_diffe_ave")
        result.to_csv(f,index=False,encoding="shift-jis")    

def data_integration():

    os.chdir(os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)))
    table_con=pd.read_csv("conversion_table.csv",encoding="shift-jis")
    table_con=table_con.drop_duplicates([u"品目コード"])
    table_con=table_con.loc[:,[u"品目コード",u"積算期間",u"タイムラグ"]]
    table_tim=pd.read_csv("time_table.csv",encoding="shift-jis")


    os.chdir(os.path.dirname(__file__)+r"\data")
    flist=glob.glob('*.csv')


    for p in range(1,(len(table_con)+1)):


        p_code=table_con.iloc[0,0]
        set_term=table_con.iloc[0,1]
        time_lag=table_con.iloc[0,2]

        for f in flist:
            os.chdir(os.path.dirname(__file__)+r"\data_diffe_ave")
            
            term=set_term        
            tit=[u"年",u"月",u"日",u"降水量(mm)_積算",u"平均気温(℃)_積算",u"最高気温(℃)_積算",u"最低気温(℃)_積算" ,u"日照時間(h)_積算",u"時系列"]

            data=pd.read_csv(f,encoding="shift-jis")

            result=pd.DataFrame()   
            
            date_no=1
            while date_no <=(len(table_tim)-1):
                rst_data=pd.DataFrame()
                tmp_data=data[data[u"時系列"]<=term]
                tmp_data=tmp_data[tmp_data[u"時系列"]>=term-(set_term-1)]

                tmp_data=tmp_data.loc[:,[u"降水量(mm)_平均差",u"平均気温(℃)_平均差",u"最高気温(℃)_平均差",u"最低気温(℃)_平均差",u"日照時間(h)_平均差"]]

                data_sum=tmp_data.sum()

                date_no=term+time_lag

                date_row=table_tim[table_tim[u"時系列"]==date_no]
                
                rst_data=pd.DataFrame(
                    {u'年':[int(date_row.iloc[0,0])],
                     u'月':[int(date_row.iloc[0,1])],
                     u'日':[int(date_row.iloc[0,2])],
                     u'降水量(mm)_積算':[round(data_sum[0]*100)/100],
                     u'平均気温(℃)_積算':[round(data_sum[1]*100)/100],
                     u'最高気温(℃)_積算':[round(data_sum[2]*100)/100],
                     u'最低気温(℃)_積算':[round(data_sum[3]*100)/100],
                     u'日照時間(h)_積算':[round(data_sum[4]*100)/100],
                     u'時系列':[int(date_row.iloc[0,3])]})

                rst_data=rst_data.ix[:,tit]

                result=pd.concat([result,rst_data])

                term = term + 1


            os.chdir(os.path.dirname(__file__)+r"\data_integration")
            result.to_csv(str(p_code)+"_"+f,index=False,encoding='shift-jis')


#-------------------------------------------------------------main-----------------------------------------------------------

os.chdir(os.path.dirname(__file__))
year_now=datetime.datetime.today().year
month_now=datetime.datetime.today().month

trg=year_now*100+month_now


if os.path.isfile('latest.txt')==False:

    f = open('latest_preset.txt')
    latest = f.read()
    f.close()
else:
    f = open('latest.txt')
    latest = f.read()
    f.close()

y_set=int(latest[0:4])
m_set=int(latest[4:6])

flg_y=0
flg_m=0

ylist=[]
mlist=[]

year=1900
month=1

for y in range(y_set,year_now+1):

    if(flg_y==0 and y_set==y):
        year=y
        flg_y=1
    elif flg_y==1:
        year=y
            
    for m in range(1,13):

        if(flg_m==0 and m_set==m):
            month=m
            flg_m=1
        elif flg_m==1:
            month=m

        
        if(flg_y==1 and flg_m==1):

            if(year*100+month<=trg):
                ylist.append(year)
                mlist.append(month)

            else:
                break

os.chdir(os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir)))

table_tim=pd.read_csv("time_table.csv",encoding="shift-jis")

table_con=pd.read_csv("conversion_table.csv",encoding="shift-jis")


table_con=table_con.drop_duplicates([u"観測コード"])
region=table_con[u"地方コード"]
point=table_con[u"観測コード"]

table_con_ave=table_con.drop_duplicates([u"観測コード_平均値"])
region_ave=table_con_ave[u"地方コード_平均値"]
point_ave=table_con_ave[u"観測コード_平均値"]
name_ave=table_con_ave[u"観測コード"]

driver = webdriver.Chrome("C:\Python27\chromedriver.exe")
rst_log=pd.DataFrame()

for (r,p,r_ave,p_ave,n_ave) in zip(region,point,region_ave,point_ave,name_ave):

    
    os.chdir(os.path.dirname(__file__)+r"\data_ave")

    if os.path.isfile(str(n_ave.replace('"',''))+'_ave.csv')==False:

        if len(str(p_ave.replace('"','')))==4:
            amedas_ave(r_ave,p_ave,n_ave)
        else:
            other_ave(r_ave,p_ave,n_ave)


    if len(str(p.replace('"','')))==4:
        log = amedas(ylist,mlist,r,p)
    else:
        log = other(ylist,mlist,r,p)

    if len(pd.DataFrame(log))>=2:
        rst_log=pd.concat([rst_log,pd.DataFrame(log)])


driver.quit()


os.chdir(os.path.dirname(__file__))
if len(rst_log)!=0:
    rst_log.to_csv('error_log'+str(datetime.now().year*10000+datetime.now().month*100+datetime.now().day)+".txt", index=False,header=False,encoding='shift-jis')

f = open('latest.txt', 'w')
f.write(str(ylist[len(ylist)-1]*100+mlist[len(mlist)-1]))
f.close()

data_diffe_ave()

data_integration()

This snippet took 0.03 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).