# Demo entry 6796414

**ex3.2**

Submitted by **anonymous**
on May 14, 2019 at 21:48

Language: Python 3. Code size: 2.4 kB.

import pickle import numpy as np import pandas as pd import lda import scipy.sparse book_path = 'C:/Users/shani/Documents/data saience/books.csv' rating_path = 'C:/Users/shani/Documents/data saience/ratings.csv' # load user ratings ratings = pd.read_csv(rating_path,encoding='utf-8') # ratings =ratings[ratings['rating']>=0] print(ratings.head()) users = pd.DataFrame(ratings['user_id'].unique(), columns=['user_id']) books = pd.DataFrame(ratings['book_id'].unique(), columns=['book_id']) number_of_books = len(books) number_of_users = len(users) corpus = scipy.sparse.lil_matrix( (number_of_books, number_of_users),dtype=np.int) corpus[ratings['book_id'],ratings['user_id']] = 1 print(corpus.shape) # run LDA lda_model = lda.LDA(n_topics=20,n_iter=500,random_state=1) lda_model.fit(corpus) # save on pkl with open ("lda_books.pkl", "wb") as pic_file: pickle.dump(lda_model,pic_file) # use lda without running with open ("lda_books.pkl", "rb") as pic_file: lda_model = pickle.load(pic_file) from sklearn.metrics.pairwise import cosine_similarity # # load list of books # book = pd.read_csv(book_path,encoding='utf-8') print(book.head()) def find_similar_books(book, lda_model, sample_movie, recommendation_size): '''returns 10 most similar movies. Receives: index of movies lda_model name of the movie ''' book_index = np.array(book['book_id'][book['original_title'] == sample_movie]) if len(book_index) == 0: return None # didn't find the movie. else: book_index = book_index[0] # index of the required book book_representation = lda_model.doc_topic_[book_index, :] # vector representation of the book # compute similarity between the movie vector and all other books similarity = cosine_similarity(book_representation.reshape(1, -1), lda_model.doc_topic_) sorted_index = similarity.argsort()[0][::-1] # sort all books by the similarity to the requested one # take most similar books and their similarity. recommended_book = [(book['original_title'][book_index], similarity[0, book_index]) for book_index in sorted_index[:recommendation_size]] return recommended_book print( find_similar_books(book, lda_model, "The Secret Garden", 10))

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.