Demo entry 6628222

龙弋

   

Submitted by a on Jul 02, 2017 at 17:19
Language: C++. Code size: 5.4 kB.

#include <iostream>
#include <fstream>
#include <string>
#include <ctime>
#include <cmath>
#include <windows.h>

#define TRAIN_NUM 1866819
#define TEST_NUM 282796
#define LOOP_NUM 200
using namespace std;


struct truple {
    int num;
    int key[201];
    double value[201];
};

truple *train, *test;
int *label;
double *hx;
double theta[202], a = 0.01;

    // improve theta[i]
DWORD WINAPI improve(LPVOID Param) {
    int index_ = *(DWORD*)Param;
    double sum = 0;
    int m, i, num;
    if (index_ == 0) {
        for (m = 0; m < TRAIN_NUM; m++) {
            sum += hx[m] - label[m];
        }
        theta[index_] -= a * sum / TRAIN_NUM;
        return 0;
    }
    for (m = 0; m < TRAIN_NUM; m++) {
        num = train[m].num;
        for (i = 0; i < num; i++) {
            if (train[m].key[i] == index_) {
                sum += (hx[m] - label[m]) * train[m].value[i];
                break;
            } else if (train[m].key[i] > index_) {
                break;
            }
        }
    }
    theta[index_] -= a * sum / TRAIN_NUM;
    return 0;
}

int main() {
    ifstream train_set;
    ofstream output;
    string s;
    int begin, i, j, length, maohao, key;
    int n, m;
    double auc, bestAUC = 0, bestTheta[202];
    DWORD ThreadId;
    HANDLE ThreadHandle;
    ifstream test_set;
    
    test_set.open("test_data.txt");
    train_set.open("train_data.txt");
    output.open("submission.txt");
    test = new truple[TEST_NUM];
    train = new truple[TRAIN_NUM];
    label = new int[TRAIN_NUM];
    hx = new double[TRAIN_NUM];

    for (i = 0; i < TRAIN_NUM; i++) {
        train[i].num = 0;
        getline(train_set, s);
        label[i] = s[0] - '0';
        begin = 2;
        length = s.length();
        for (j = begin + 1; j < length; j++) {
            if (s[j] == ':') {
                maohao = j;
            } else if (s[j] == ' ') {
                train[i].key[train[i].num] = atoi(s.substr(begin, maohao - begin).c_str());
                train[i].value[train[i].num] = atof(s.substr(maohao + 1, j - maohao - 1).c_str());
                train[i].num++;
                begin = j + 1;
            }
        }
        train[i].key[train[i].num] = atoi(s.substr(begin, maohao - begin).c_str());
        train[i].value[train[i].num] = atof(s.substr(maohao + 1, length - maohao - 1).c_str());
        train[i].num++;
    }

    for (i = 0; i < TEST_NUM; i++) {
        test[i].num = 0;
        getline(test_set, s);
        begin = 1;
        length = s.length();
        while (s[begin++] != ' ') { }
        for (j = begin; j < length; j++) {
            if (s[j] == ':') {
                maohao = j;
            } else if (s[j] == ' ') {
                test[i].key[test[i].num] = atoi(s.substr(begin, maohao - begin).c_str());
                test[i].value[test[i].num] = atof(s.substr(maohao + 1, j - maohao - 1).c_str());
                test[i].num++;
                begin = j + 1;
            }
        }
        test[i].key[test[i].num] = atoi(s.substr(begin, maohao - begin).c_str());
        test[i].value[test[i].num] = atof(s.substr(maohao + 1, length - maohao - 1).c_str());
        test[i].num++;
    }
    train_set.close();
    test_set.close();
    int index[202], num;
    double h;
    srand(time(NULL));
    
    for (j = 0; j < 202; j++) {
        index[j] = j;
        theta[j] = double(rand() % 100) / 100;
    }
    for (n = 0; n < LOOP_NUM; n++) {
        for (m = 0; m < TRAIN_NUM; m++) {
            hx[m] = theta[0];
            num = train[m].num;
            for (j = 0; j < num; j++) {
                hx[m] += theta[train[m].key[j]] * train[m].value[j];
            }
            hx[m] = 1.0 / (1.0 + exp(-1 * hx[m]));
        }
        for (j = 0; j < 202; j++) {
            QueueUserWorkItem(&improve, index + j, 0);
        }
           // test training set
        int tmp, hit = 0, max = TRAIN_NUM / 10, indexx;
        for (i = 0; i < max; i++) {
            h = theta[0];
            indexx = rand() % 10 + i * 10;
            num = train[indexx].num;
            for (j = 0; j < num; j++) {
                h += theta[train[indexx].key[j]] * train[indexx].value[j];
            }
            h = 1.0 / (1.0 + exp(-1 * h));
            if (h >= 0.5) tmp = 1;
            else tmp = 0;
            if (tmp == label[indexx]) hit++;
        }
        auc = (double)hit / max;
        cout << '[' << n << '/' << LOOP_NUM << ']' << ": hit " << hit << " in " << max
             << ", AUC: "<< auc << endl;
        if (auc > bestAUC) {
            bestAUC = auc;
            for (j = 0; j < 202; j++) {
                bestTheta[j] = theta[j];
            }
        }
    }
    

    output.setf(ios::fixed, ios::floatfield);
    output.precision(6);
    output << "id,label\n";
    for (m = 0; m < TEST_NUM; m++) {
        h = bestTheta[0];
        num = test[m].num;
        for (j = 0; j < num; j++) {
            h += bestTheta[test[m].key[j]] * test[m].value[j];
        }
        h = 1.0 / (1.0 + exp(-1 * h));
        output << m << ',' << h << endl;
    }
    cout << "Best AUC is " << bestAUC << ".\n";
    
    delete []hx;
    delete []train;
    delete []label;
    delete []test;
    output.close();
    return 0;
}

This snippet took 0.02 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).