Demo entry 6782129

houchaoqun

   

Submitted by anonymous on Jan 08, 2019 at 16:53
Language: Python. Code size: 3.5 kB.

# -*- coding: utf-8 -*-
#
# Copyright © dawnranger.
#
# 2018-05-08 10:15 <dawnranger123@gmail.com>
#
# Distributed under terms of the MIT license.
from __future__ import division, print_function
import numpy as np
import torch
from torch.utils.data import Dataset


def load_mnist(path='./data/mnist.npz'):
    f = np.load(path)

    x_train, y_train, x_test, y_test = f['x_train'], f['y_train'], f[
        'x_test'], f['y_test']
    f.close()
    x = np.concatenate((x_train, x_test))
    y = np.concatenate((y_train, y_test)).astype(np.int32)
    x = x.reshape((x.shape[0], -1)).astype(np.float32)
    x = np.divide(x, 255.)
    print('MNIST samples', x.shape)

    ## x, y: is a numpy ndarray
    ## y: []
    return x, y


class MnistDataset(Dataset):

    def __init__(self):
        self.x, self.y = load_mnist()

    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, idx):
        return torch.from_numpy(np.array(self.x[idx])), torch.from_numpy(
            np.array(self.y[idx])), torch.from_numpy(np.array(idx))

def load_images(dataset_path):

    import matplotlib.image as mImage
    x_list = []
    y_list = []
    ## dataset_path: contains of all unlabeled instances, not only just train dir
    category_list = []
    for sub_category_dir in os.listdir(dataset_path):
        category_list.append(sub_category_dir)

    for category in category_list:
        path = os.path.join(dataset_path, category)
        x = mImage.imread(path)
        if len(x.shape) == 3:
            x = skt.resize(x, (x.shape[0], x.shape[0], 3))
            x = np.reshape(x, [1, x.shape[0] * x.shape[1] * x.shape[2]])[0,:]
        elif len(x.shape) == 2:
            x = skt.resize(x, (x.shape[0], x.shape[0], 2))
            x = np.reshape(x, [1, x.shape[0] * x.shape[1]])[0,:]

        x = np.divide(x, 255.)
        x_list.append(x)
        for i in range(len(x_list)):
            y_list.append(category)

    x_list = np.divide(x_list, 255.)
    x_list = np.array(x_list)
    y_list = np.array(y_list)
    print("x_list.shape = {} ||| y_list.shape = {}".format(x_list.shape, y_list.shape))
    print('{}: Image samples = {}'.format(dataset_path, x_list.shape))



class ImageDataset(Dataset):

    def __init__(self, dataset_path):
        self.x, self.y = load_images(dataset_path)

    def __len__(self):
        return self.x.shape[0]

    def __getitem__(self, idx):
        return torch.from_numpy(np.array(self.x[idx])), torch.from_numpy(
            np.array(self.y[idx])), torch.from_numpy(np.array(idx))



#######################################################
# Evaluate Critiron
#######################################################


def cluster_acc(y_true, y_pred):
    """
    Calculate clustering accuracy. Require scikit-learn installed

    # Arguments
        y: true labels, numpy.array with shape `(n_samples,)`
        y_pred: predicted labels, numpy.array with shape `(n_samples,)`

    # Return
        accuracy, in [0,1]
    """
    y_true = y_true.astype(np.int64)
    assert y_pred.size == y_true.size
    D = max(y_pred.max(), y_true.max()) + 1
    w = np.zeros((D, D), dtype=np.int64)
    for i in range(y_pred.size):
        w[y_pred[i], y_true[i]] += 1
    from sklearn.utils.linear_assignment_ import linear_assignment
    ind = linear_assignment(w.max() - w)
    return sum([w[i, j] for i, j in ind]) * 1.0 / y_pred.size

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).