Demo entry 6627092

123

   

Submitted by anonymous on Jun 27, 2017 at 21:27
Language: Python 3. Code size: 1.8 kB.

import os
import pandas as pd
import numpy as np
import time
import os
import gensim
import jieba.analyse

online_data_read = pd.read_csv(open('MB Online master sheet LV.csv','rU',
	encoding='utf-8'), error_bad_lines=False)
cac_data_read = pd.read_csv(open('CAC Mastersheet LV.csv','rU',
	encoding='utf-8'), error_bad_lines=False)



from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
import jieba
online_data = online_data_read.replace(np.nan,'Error', regex=True)
cac_data = cac_data_read.replace(np.nan,'Error', regex=True)

online_data = online_data["Title"]+ "。" + online_data["Description"]
cac_data = cac_data["Description"]
X = pd.concat([cac_data,online_data],ignore_index=True)

X_tog = ''
for i in range(len(X)):
	#X_tog.append([X[i]])
	X_tog = X_tog + '/' + X[i]

replace_list = [',',',','!','?','?','。',' ']
for item in replace_list:
	X_tog = X_tog.replace(item,'/')
	
X_tog = X_tog.split('/')
from collections import defaultdict
import sys



t1 = time.time()
import jieba
import jieba.analyse
import re


def cut_words_w2v(test_contents): 
	target = []
	for content in test_contents:
		try:
			result = jieba.cut(content)
			result = list(result)
			target.append(result)
		except:
			target.append(["Error"])
	return target 
X_cut_w2v = cut_words_w2v(X_tog)
print("Cut_w2v finished in: %.3fs" % (time.time() - t1))

model = gensim.models.Word2Vec(X_cut_w2v, min_count=1)
model.save('w2v_trained_basedata')
model.most_similar('发动机',topn = 20) #Engine
#[('变速箱', 0.7456176280975342), ('涡轮', 0.6495620012283325), 
#('发电机', 0.6316986083984375), ('排气管', 0.6208968758583069), 
#('皮带', 0.5802627801895142), ('方向机', 0.5778461694717407), ('发送机', 0.5695295333862305), ('ESP', 0.5532896518707275), ('分动箱', 0.5531634092330933), ('仪表盘', 0.5469154715538025)]
w2v = dict(zip(model.index2word, model.syn0))

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).