Demo entry 6627023

123

   

Submitted by anonymous on Jun 27, 2017 at 17:08
Language: Python 3. Code size: 1.7 kB.

import os
import pandas as pd
import numpy as np
import time
import gensim 
import jieba.analyse

VBA_key_data = pd.read_csv(open('VBA_key_words.csv',
	'rU',encoding='utf-8'), error_bad_lines=False)
old_key_words = VBA_key_data['key_words']

model = gensim.models.Word2Vec.load('w2v_trained_basedata')
print(model)
ms = model.most_similar

def extract(similar_list):
	exctract_words = []
	exctract_words_vectors = []
	for item in similar_list:
		exctract_words.append(item[0])
		#exctract_words_vectors.append(model[str(item[0])])
	return exctract_words#, exctract_words_vectors

def built_key_list(old_key_list):
	key_list = []
	for item in old_key_list:
		try:
			similar_list = ms(item, topn=20)
			part_key_list = extract(similar_list)
			key_list += part_key_list
		except:
			next
	return key_list

new_key_words_test = built_key_list(old_key_words)
new_key_words = list(set(new_key_words_test))
import pickle

#dump读写的方法!
#pickle.dump(new_key_words, open("new_key_words.pkl", "wb"))
# obj2 = pickle.load(open("tmp.pkl", "rb"))
#pickle.close
#np.asarray(obj2) 这样可以竖着打印

import os  
import codecs  

#TXT读写的方法!
fp = codecs.open('new_key_words.txt', 'w', 'utf-8')
target = []
target.append(' '.join(new_key_words))
fp.write(target[0])
fp.close()

# fp = codecs.open('new_key_words.txt', 'r', 'utf-8')
# new_key_words = fp.readline()
# new_key_words = new_key_words.split(' ')
# fp.close()




# f = open('cdays_test.txt')
# result = new_key_words
# for line in f.re adlines():
# 	line = line.strip()
# 	if not len(line) or line.startswith('#'):
# 		continue
# result.append(line) 
# open('cdays-4-result.txt','w').write('%s' % '\n'.join(result))        #保存入结果文件

This snippet took 0.00 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).