Demo entry 6747837

py

   

Submitted by anonymous on Jun 06, 2018 at 14:58
Language: Python 3. Code size: 1.6 kB.

# data tenderer for "train.txt"
# output four files:"users.pkl","items.pkl","scores.pkl" and "matrix.pkl"
# return a tuple of total users,items and scores
# return(len(users),len(items),len(scores))
def tender_data_train(filename):
    # open file and read id and nums
    # line should only be id and nums
    f = open(filename)
    line = f.readline()

    users = []
    items = []
    scores = []
    # possible error if there is an empty line at the end of input
    while (line):

        idandnums = line.strip().split("|")
        # print(idandnums[0], idandnums[1])
        idandnums[0] = int(idandnums[0])  # user id
        idandnums[1] = int(idandnums[1])  # nums (according to id)

        for i in range(0, idandnums[1]):

            subline = f.readline()

            idandscore = subline.strip().split("  ")
            idandscore[0] = int(idandscore[0])  # item id
            idandscore[1] = int(idandscore[1])  # item score

            if idandscore[1] != 0:  # valid score, save record to list
                users.append(idandnums[0])
                items.append(idandscore[0])
                scores.append(idandscore[1])

        line = f.readline()

    save_obj_to_pickle(users, "users.pkl")
    save_obj_to_pickle(items, "items.pkl")
    save_obj_to_pickle(scores, "scores.pkl")

    save_obj_to_pickle(sparse.coo_matrix((scores, (items, users)), shape=(totalItems, totalUsers)).asfptype(),
                       "matrix.pkl")

    return (len(users), len(items), len(scores))

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).