Demo entry 5851671

py

   

Submitted by wang on Jul 19, 2016 at 20:06
Language: Python. Code size: 2.8 kB.

#coding: utf-8

import tushare as ts
import pandas as pd
import os
import sys
reload(sys)
sys.setdefaultencoding('utf8')

#如果项目目录下不存在沪深300股票代码的excel文件,则用tushare获取并存在本地
if (not os.path.isfile('hs300_code.xlsx')):
    hs300 = ts.get_hs300s()
    hs300_code = pd.DataFrame()
    hs300_code['code'] = hs300['code']
    hs300_code.to_excel('./hs300_code.xlsx')

#读取沪深300股票代码excel文件并存在一个DataFrame中
hs300_df = pd.read_excel('hs300_code.xlsx')

#如果项目目录下不存在沪深300股票池因子的excel文件,则通过沪深300股票代码和所有A股财务因子的DataFrame取交集得到
if (not os.path.isfile('hs300_factors.xlsx')):
    factors = pd.read_excel('A_factors.xlsx')
    hs300_factors = pd.merge(hs300_df, factors)
    hs300_factors.to_excel('./hs300_factors.xlsx')

if (not os.path.isfile('hs300_earning.xlsx')):
    earning_ratio = pd.read_excel('A.xls')
    hs300_earning = pd.merge(hs300_df, earning_ratio)
    hs300_earning.to_excel('./hs300_earning.xlsx')

hs300_earning = pd.read_excel('hs300_earning.xlsx')
hs300_factors = pd.read_excel('hs300_factors.xlsx')
hs300_factors.sort(columns='date',inplace=True)

#如需重新获取排序后的excel文件请取消注释下一句代码
# hs300_factors.to_excel('./hs300_factors.xlsx')


#设置股票代码为索引,便于查找
hs300_earning.set_index('code',inplace=True,drop=True)


hs300_factors_result = pd.DataFrame();

for i in range(len(hs300_factors['date']) / 300):#股票以300只一组分组,即对每个月操作
    # hs300_factors_result = hs300_factors_result.append(hs300_factors[0:300])
    earning_ratio_avg = 0#用于求组合平均收益率
    market_cap_sum = 0 #用于求组合市值总和

    temp_factors_result = hs300_factors[0:300]#取出前300只股票

    for factor in temp_factors_result.columns[4:]:#从第一个因子开始循环操作
        temp_factors_result.sort(columns = factor)#对需要操作的因子排序
        for j in range(len(temp_factors_result) / 60):#排序完对将300只股票分为60只一组操作
            temp = temp_factors_result[0:60]#取出前60只股票操作
            for k in range(len(temp)):
                groups = temp.groupby('code')  # 按照code列的值分组求和
                market_cap_sum = groups['market_cap'].sum()  #按照code列的值分组求市值组和
            for k in range(len(temp)):
                temp_code = temp.values[k][0]#得到这60只股票的股票代码
                temp_earning = hs300_earning.loc[temp_code][i + 146]#从回报率表中的第146开始获取数据
                temp_market_cap = hs300_factors.lco[temp_code]['market_cap']#从因子表中求得股票市值
                earning_ratio_avg = earning_ratio_avg + (temp_earning * temp_market_cap / market_cap_sum)#求得组合平均收益率
            temp['earning_ratio_avg'] = earning_ratio_avg#将组合收益率作为一个新的dataframe添加到temp中
            hs300_factors_result = hs300_factors_result.append(temp)
            temp_factors_result = temp_factors_result[60:]#删掉已经操作的60只股票
    hs300_factors = hs300_factors[300:]#删掉已经操作的300只股票

hs300_factors_result.to_excel('./hs300_factors_result.xlsx')

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).