bbalasub1 · sayeg84 · May 13, 2020
diff --git a/FAQ.readme b/FAQ.readme
@@ -11,10 +11,10 @@ contributed by Rachael Mansbach:
 
 I've been using the glmnet python package (great code by the way!) and I've discovered that when trying to run this code with more up-to-date versions of numpy it errors out on line 260 of cvglmnet.py:
 
- ma = scipy.tile(scipy.arange(nfolds), [1, int(scipy.floor(nobs/nfolds))])
+ ma = np.tile(np.arange(nfolds), [1, int(np.floor(nobs/nfolds))])
 
 Due to computation of nobs/nfolds as float division rather than int division.  This is easily solved by changing the line to
 
- ma = scipy.tile(scipy.arange(nfolds), [1, int(scipy.floor(int(nobs/nfolds)))])
+ ma = np.tile(np.arange(nfolds), [1, int(np.floor(int(nobs/nfolds)))])
 
 but I did have to install from source to get it to work.  It might be worth a note in the installation section or a code update?
diff --git a/build/lib/glmnet_python/GLMnet.so b/build/lib/glmnet_python/GLMnet.so
diff --git a/build/lib/glmnet_python/__init__.py b/build/lib/glmnet_python/__init__.py
@@ -0,0 +1,40 @@
+from __future__ import absolute_import
+import sys
+import os
+sys.path.append(os.path.join(os.path.dirname(__file__)))
+
+from .glmnetSet import glmnetSet
+from .glmnet import glmnet
+from .glmnetPlot import glmnetPlot 
+from .glmnetPrint import glmnetPrint
+from .glmnetCoef import glmnetCoef
+from .glmnetPredict import glmnetPredict
+from .cvglmnet import cvglmnet
+from .cvglmnetCoef import cvglmnetCoef
+from .cvglmnetPlot import cvglmnetPlot
+from .cvglmnetPredict import cvglmnetPredict
+from .coxnet import coxnet
+from .cvelnet import cvelnet
+from .cvlognet import cvlognet
+from .cvmultnet import cvmultnet
+from .fishnet import fishnet
+from .glmnetControl import glmnetControl
+from .lognet import lognet
+from .printDict import printDict
+from .wtmean import wtmean
+from .cvcompute import cvcompute
+from .cvfishnet import cvfishnet
+from .cvmrelnet import cvmrelnet
+from .elnet import elnet
+from .loadGlmLib import loadGlmLib
+from .mrelnet import mrelnet
+from .structtype import structtype
+from .dataprocess import dataprocess
+
+__all__ = ['glmnet', 'glmnetPlot', 'glmnetPrint', 'glmnetPrint', 'glmnetPredict', 'cvglmnet', 'cvglmnetCoef',
+           'cvglmnetPlot', 'cvglmnetPredict' , 'coxnet', 'cvelnet',  'cvlognet', 'cvmultnet', 'fishnet',
+           'glmnetControl', 'lognet', 'printDict', 'wtmean', 'cvcompute', 'cvfishnet', 'cvmrelnet', 'elnet',
+           'glmnetSet', 'loadGlmLib', 'mrelnet', 'structtype', 'dataprocess']
+
+#__version__ = get_versions()['version']
+#del get_versions
diff --git a/build/lib/glmnet_python/coxnet.py b/build/lib/glmnet_python/coxnet.py
@@ -0,0 +1,186 @@
+# -*- coding: utf-8 -*-
+"""
+Internal function called by glmnet. See also glmnet, cvglmnet
+
+time -- column 0
+status -- column 1
+"""
+# import packages/methods
+import numpy as np
+import numpy as np
+import ctypes
+from loadGlmLib import loadGlmLib
+
+def coxnet(x, is_sparse, irs, pcs, y, weights, offset, parm,
+          nobs, nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam, 
+          thresh, isd, maxit, family):
+
+    # load shared fortran library
+    glmlib = loadGlmLib() 
+
+    # pre-process data     
+    ty = y[:, 0]
+    tevent = y[:, 1]
+    if np.any(ty <= 0):
+        raise ValueError('negative event time not permitted for cox family')
+    if len(offset) == 0:
+        offset = ty*0
+        is_offset = False
+    else:
+        is_offset = True
+
+    # now convert types and allocate memory before calling 
+    # glmnet fortran library
+    ######################################
+    # --------- PROCESS INPUTS -----------
+    ######################################
+    # force inputs into fortran order and scipy float64
+    copyFlag = False
+    x = x.astype(dtype = np.float64, order = 'F', copy = copyFlag) 
+    irs = irs.astype(dtype = np.int32, order = 'F', copy = copyFlag)
+    pcs = pcs.astype(dtype = np.int32, order = 'F', copy = copyFlag)    
+    ty = ty.astype(dtype = np.float64, order = 'F', copy = copyFlag)    
+    tevent = tevent.astype(dtype = np.float64, order = 'F', copy = copyFlag)    
+    offset = offset.astype(dtype = np.float64, order = 'F', copy = copyFlag)    
+    weights = weights.astype(dtype = np.float64, order = 'F', copy = copyFlag)    
+    jd = jd.astype(dtype = np.int32, order = 'F', copy = copyFlag)        
+    vp = vp.astype(dtype = np.float64, order = 'F', copy = copyFlag)    
+    cl = cl.astype(dtype = np.float64, order = 'F', copy = copyFlag)    
+    ulam   = ulam.astype(dtype = np.float64, order = 'F', copy = copyFlag)    
+
+    ######################################
+    # --------- ALLOCATE OUTPUTS ---------
+    ######################################
+    # lmu
+    lmu = -1
+    lmu_r = ctypes.c_int(lmu)
+    # ca
+    ca   = np.zeros([nx, nlam], dtype = np.float64)
+    ca   = ca.astype(dtype = np.float64, order = 'F', copy = False)    
+    ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
+    # ia
+    ia   = -1*np.ones([nx], dtype = np.int32)
+    ia   = ia.astype(dtype = np.int32, order = 'F', copy = False)    
+    ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
+    # nin
+    nin   = -1*np.ones([nlam], dtype = np.int32)
+    nin   = nin.astype(dtype = np.int32, order = 'F', copy = False)    
+    nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
+    # dev
+    dev   = -1*np.ones([nlam], dtype = np.float64)
+    dev   = dev.astype(dtype = np.float64, order = 'F', copy = False)    
+    dev_r = dev.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
+    # alm
+    alm   = -1*np.ones([nlam], dtype = np.float64)
+    alm   = alm.astype(dtype = np.float64, order = 'F', copy = False)    
+    alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
+    # nlp
+    nlp = -1
+    nlp_r = ctypes.c_int(nlp)
+    # jerr
+    jerr = -1
+    jerr_r = ctypes.c_int(jerr)
+    # dev0
+    dev0 = -1
+    dev0_r = ctypes.c_double(dev0)
+
+    #  ###################################
+    #   main glmnet fortran caller
+    #  ###################################  
+    if is_sparse:
+        # no sparse coxnet implemented
+        raise ValueError('Cox model not implemented for sparse x in glmnet')
+
+    else:
+        # call fortran coxnet routine
+        glmlib.coxnet_( 
+              ctypes.byref(ctypes.c_double(parm)), 
+              ctypes.byref(ctypes.c_int(nobs)),
+              ctypes.byref(ctypes.c_int(nvars)),
+              x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
+              ty.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
+              tevent.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
+              offset.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
+              weights.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
+              jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)), 
+              vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
+              cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
+              ctypes.byref(ctypes.c_int(ne)), 
+              ctypes.byref(ctypes.c_int(nx)), 
+              ctypes.byref(ctypes.c_int(nlam)), 
+              ctypes.byref(ctypes.c_double(flmin)), 
+              ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)), 
+              ctypes.byref(ctypes.c_double(thresh)), 
+              ctypes.byref(ctypes.c_int(maxit)), 
+              ctypes.byref(ctypes.c_int(isd)), 
+              ctypes.byref(lmu_r),
+              ca_r, 
+              ia_r, 
+              nin_r, 
+              ctypes.byref(dev0_r),
+              dev_r,
+              alm_r, 
+              ctypes.byref(nlp_r), 
+              ctypes.byref(jerr_r)
+              )
+
+    #  ###################################
+    #   post process results
+    #  ###################################  
+
+    # check for error
+    if (jerr_r.value > 0):
+        raise ValueError("Fatal glmnet error in library call : error code = ", jerr_r.value)
+    elif (jerr_r.value < 0):
+        print("Warning: Non-fatal error in glmnet library call: error code = ", jerr_r.value)
+        print("Check results for accuracy. Partial or no results returned.")
+
+    # clip output to correct sizes
+    lmu = lmu_r.value
+    ca = ca[0:nx, 0:lmu]    
+    ia = ia[0:nx]
+    nin = nin[0:lmu]
+    dev = dev[0:lmu]
+    alm = alm[0:lmu]    
+
+    # ninmax
+    ninmax = max(nin)
+    # fix first value of alm (from inf to correct value)
+    if ulam[0] == 0.0:
+        t1 = np.log(alm[1])
+        t2 = np.log(alm[2])
+        alm[0] = np.exp(2*t1 - t2)        
+    # create return fit dictionary
+    if ninmax > 0:
+        ca = ca[0:ninmax, :]
+        df = np.sum(np.absolute(ca) > 0, axis=0)
+        ja = ia[0:ninmax] - 1    # ia is 1-indexed in fortran
+        oja = np.argsort(ja)
+        ja1 = ja[oja]
+        beta = np.zeros([nvars, lmu], dtype = np.float64)
+        beta[ja1, :] = ca[oja, :]
+    else:
+        beta = np.zeros([nvars, lmu], dtype = np.float64)
+        df = np.zeros([1, lmu], dtype = np.float64)
+
+    fit = dict()
+    fit['beta'] = beta
+    fit['dev'] = dev
+    fit['nulldev'] = dev0_r.value
+    fit['df']= df
+    fit['lambdau'] = alm
+    fit['npasses'] = nlp_r.value
+    fit['jerr'] = jerr_r.value
+    fit['dim'] = np.array([nvars, lmu], dtype = np.integer)
+    fit['offset'] = is_offset
+    fit['class'] = 'coxnet'    
+
+    #  ###################################
+    #   return to caller
+    #  ###################################  
+
+    return fit
+#----------------------------------------- 
+# end of method coxnet
+#----------------------------------------- 
+
diff --git a/build/lib/glmnet_python/cvcompute.py b/build/lib/glmnet_python/cvcompute.py
@@ -0,0 +1,34 @@
+# -*- coding: utf-8 -*-
+"""
+Internal glmnet function. See also cvglmnet.
+
+Compute the weighted mean and SD within folds, and hence the SE of the mean
+"""
+import numpy as np
+from wtmean import wtmean
+
+def cvcompute(mat, weights, foldid, nlams):
+    if len(weights.shape) > 1:
+        weights = np.reshape(weights, [weights.shape[0], ])
+    wisum = np.bincount(foldid, weights = weights)
+    nfolds = np.amax(foldid) + 1
+    outmat = np.ones([nfolds, mat.shape[1]])*np.NaN
+    good = np.zeros([nfolds, mat.shape[1]])
+    mat[np.isinf(mat)] = np.NaN
+    for i in range(nfolds):
+        tf = foldid == i
+        mati = mat[tf, ]
+        wi = weights[tf, ]
+        outmat[i, :] = wtmean(mati, wi)
+        good[i, 0:nlams[i]] = 1
+    N = np.sum(good, axis = 0)
+    cvcpt = dict()
+    cvcpt['cvraw'] = outmat
+    cvcpt['weights'] = wisum
+    cvcpt['N'] = N
+
+    return(cvcpt)
+
+# end of cvcompute
+#=========================    
+
diff --git a/build/lib/glmnet_python/cvelnet.py b/build/lib/glmnet_python/cvelnet.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+"""
+Internal cvglmnet function. See also cvglmnet.
+
+"""
+import numpy as np
+from glmnetPredict import glmnetPredict
+from wtmean import wtmean
+from cvcompute import cvcompute
+
+def cvelnet(fit, \
+            lambdau, \
+            x, \
+            y, \
+            weights, \
+            offset, \
+            foldid, \
+            ptype, \
+            grouped, \
+            keep = False):
+
+    typenames = {'deviance':'Mean-Squared Error', 'mse':'Mean-Squared Error', 
+                 'mae':'Mean Absolute Error'}
+    if ptype == 'default':
+        ptype = 'mse'
+
+    ptypeList = ['mse', 'mae', 'deviance']    
+    if not ptype in ptypeList:
+        print('Warning: only ', ptypeList, 'available for Gaussian models; ''mse'' used')
+        ptype = 'mse'
+    if len(offset) > 0:
+        y = y - offset
+
+    predmat = np.ones([y.size, lambdau.size])*np.NAN               
+    nfolds = np.amax(foldid) + 1
+    nlams = [] 
+    for i in range(nfolds):
+        which = foldid == i
+        fitobj = fit[i].copy()
+        fitobj['offset'] = False
+        preds = glmnetPredict(fitobj, x[which, ])
+        nlami = np.size(fit[i]['lambdau'])
+        predmat[which, 0:nlami] = preds
+        nlams.append(nlami)
+    # convert nlams to scipy array
+    nlams = np.array(nlams, dtype = np.integer)
+
+    N = y.shape[0] - np.sum(np.isnan(predmat), axis = 0)
+    yy = np.tile(y, [1, lambdau.size])
+
+    if ptype == 'mse':
+        cvraw = (yy - predmat)**2
+    elif ptype == 'deviance':
+        cvraw = (yy - predmat)**2
+    elif ptype == 'mae':
+        cvraw = np.absolute(yy - predmat)
+
+    if y.size/nfolds < 3 and grouped == True:
+        print('Option grouped=false enforced in cv.glmnet, since < 3 observations per fold')
+        grouped = False
+
+    if grouped == True:
+        cvob = cvcompute(cvraw, weights, foldid, nlams)
+        cvraw = cvob['cvraw']
+        weights = cvob['weights']
+        N = cvob['N']
+
+    cvm = wtmean(cvraw, weights)
+    sqccv = (cvraw - cvm)**2
+    cvsd = np.sqrt(wtmean(sqccv, weights)/(N-1))
+
+    result = dict()
+    result['cvm'] = cvm
+    result['cvsd'] = cvsd
+    result['name'] = typenames[ptype]
+
+    if keep:
+        result['fit_preval'] = predmat
+
+    return(result)
+
+# end of cvelnet
+#=========================