Demo entry 6796414

ex3.2

   

Submitted by anonymous on May 14, 2019 at 21:48
Language: Python 3. Code size: 2.4 kB.

import pickle
import numpy as np
import pandas as pd
import lda
import scipy.sparse

book_path = 'C:/Users/shani/Documents/data saience/books.csv'
rating_path = 'C:/Users/shani/Documents/data saience/ratings.csv'

# load user ratings
ratings = pd.read_csv(rating_path,encoding='utf-8')
# ratings =ratings[ratings['rating']>=0]
print(ratings.head())

users = pd.DataFrame(ratings['user_id'].unique(), columns=['user_id'])
books = pd.DataFrame(ratings['book_id'].unique(), columns=['book_id'])


number_of_books = len(books)
number_of_users = len(users)
corpus = scipy.sparse.lil_matrix( (number_of_books, number_of_users),dtype=np.int)

corpus[ratings['book_id'],ratings['user_id']] = 1

print(corpus.shape)

# run LDA
lda_model = lda.LDA(n_topics=20,n_iter=500,random_state=1)
lda_model.fit(corpus)

# save on pkl

with open ("lda_books.pkl", "wb") as pic_file:
    pickle.dump(lda_model,pic_file)

#  use lda without running
with open ("lda_books.pkl", "rb") as pic_file:
    lda_model = pickle.load(pic_file)


from sklearn.metrics.pairwise import cosine_similarity

# # load list of books
#
book = pd.read_csv(book_path,encoding='utf-8')
print(book.head())


def find_similar_books(book, lda_model, sample_movie, recommendation_size):
    '''returns 10 most similar movies.
        Receives:
            index of movies
            lda_model
            name of the movie
    '''
    book_index = np.array(book['book_id'][book['original_title'] == sample_movie])
    if len(book_index) == 0:
        return None  # didn't find the movie.
    else:
        book_index = book_index[0]  # index of the required book
        book_representation = lda_model.doc_topic_[book_index, :]  # vector representation of the book
        # compute similarity between the movie vector and all other books
        similarity = cosine_similarity(book_representation.reshape(1, -1), lda_model.doc_topic_)

        sorted_index = similarity.argsort()[0][::-1]  # sort all books by the similarity to the requested one
        # take most similar books and their similarity.
        recommended_book = [(book['original_title'][book_index], similarity[0, book_index])
                              for book_index in sorted_index[:recommendation_size]]
        return recommended_book

print( find_similar_books(book, lda_model, "The Secret Garden", 10))

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).