Python实现之RankGauss

作者:luozhipeng   发表日期:2017-12-17  浏览:140次


import pandas as pd
import numpy as np 
from scipy.special import erfinv 

def RankGauss(dtrain, dtest, cols):
    dtrain = dtrain.fillna(-1)
    dtest = dtest.fillna(-1)

    size = dtrain.shape[0]
    df = pd.concat([dtrain, dtest], axis=0)

    for col in cols:
        x = df[col].values
        t = list(set(list(x)[:]))
        #t = list(x)[:]

        for i in range(len(x)):
            x[i] = t.index(x[i])
        x = x + 1
        x = x/(len(t)+1)
        x = erfinv(2 * x - 1)
        #x = erfinv(x)
        df[col] = x

    dtrain = df.iloc[:size, :]
    dtest = df.iloc[size:,:]
    return dtrain, dtest

本文固定链接: http://www.luozhipeng.com/?p=1299
转载请注明: luozhipeng 2017-12-17 于 罗志鹏的BLOG 发表

上一篇: :下一篇
返回顶部