Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update to numpy #48

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions FAQ.readme
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,10 @@ contributed by Rachael Mansbach:

I've been using the glmnet python package (great code by the way!) and I've discovered that when trying to run this code with more up-to-date versions of numpy it errors out on line 260 of cvglmnet.py:

ma = scipy.tile(scipy.arange(nfolds), [1, int(scipy.floor(nobs/nfolds))])
ma = np.tile(np.arange(nfolds), [1, int(np.floor(nobs/nfolds))])

Due to computation of nobs/nfolds as float division rather than int division. This is easily solved by changing the line to

ma = scipy.tile(scipy.arange(nfolds), [1, int(scipy.floor(int(nobs/nfolds)))])
ma = np.tile(np.arange(nfolds), [1, int(np.floor(int(nobs/nfolds)))])

but I did have to install from source to get it to work. It might be worth a note in the installation section or a code update?
Binary file added build/lib/glmnet_python/GLMnet.so
Binary file not shown.
40 changes: 40 additions & 0 deletions build/lib/glmnet_python/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
from __future__ import absolute_import
import sys
import os
sys.path.append(os.path.join(os.path.dirname(__file__)))

from .glmnetSet import glmnetSet
from .glmnet import glmnet
from .glmnetPlot import glmnetPlot
from .glmnetPrint import glmnetPrint
from .glmnetCoef import glmnetCoef
from .glmnetPredict import glmnetPredict
from .cvglmnet import cvglmnet
from .cvglmnetCoef import cvglmnetCoef
from .cvglmnetPlot import cvglmnetPlot
from .cvglmnetPredict import cvglmnetPredict
from .coxnet import coxnet
from .cvelnet import cvelnet
from .cvlognet import cvlognet
from .cvmultnet import cvmultnet
from .fishnet import fishnet
from .glmnetControl import glmnetControl
from .lognet import lognet
from .printDict import printDict
from .wtmean import wtmean
from .cvcompute import cvcompute
from .cvfishnet import cvfishnet
from .cvmrelnet import cvmrelnet
from .elnet import elnet
from .loadGlmLib import loadGlmLib
from .mrelnet import mrelnet
from .structtype import structtype
from .dataprocess import dataprocess

__all__ = ['glmnet', 'glmnetPlot', 'glmnetPrint', 'glmnetPrint', 'glmnetPredict', 'cvglmnet', 'cvglmnetCoef',
'cvglmnetPlot', 'cvglmnetPredict' , 'coxnet', 'cvelnet', 'cvlognet', 'cvmultnet', 'fishnet',
'glmnetControl', 'lognet', 'printDict', 'wtmean', 'cvcompute', 'cvfishnet', 'cvmrelnet', 'elnet',
'glmnetSet', 'loadGlmLib', 'mrelnet', 'structtype', 'dataprocess']

#__version__ = get_versions()['version']
#del get_versions
186 changes: 186 additions & 0 deletions build/lib/glmnet_python/coxnet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
# -*- coding: utf-8 -*-
"""
Internal function called by glmnet. See also glmnet, cvglmnet

time -- column 0
status -- column 1
"""
# import packages/methods
import numpy as np
import numpy as np
import ctypes
from loadGlmLib import loadGlmLib

def coxnet(x, is_sparse, irs, pcs, y, weights, offset, parm,
nobs, nvars, jd, vp, cl, ne, nx, nlam, flmin, ulam,
thresh, isd, maxit, family):

# load shared fortran library
glmlib = loadGlmLib()

# pre-process data
ty = y[:, 0]
tevent = y[:, 1]
if np.any(ty <= 0):
raise ValueError('negative event time not permitted for cox family')
if len(offset) == 0:
offset = ty*0
is_offset = False
else:
is_offset = True

# now convert types and allocate memory before calling
# glmnet fortran library
######################################
# --------- PROCESS INPUTS -----------
######################################
# force inputs into fortran order and scipy float64
copyFlag = False
x = x.astype(dtype = np.float64, order = 'F', copy = copyFlag)
irs = irs.astype(dtype = np.int32, order = 'F', copy = copyFlag)
pcs = pcs.astype(dtype = np.int32, order = 'F', copy = copyFlag)
ty = ty.astype(dtype = np.float64, order = 'F', copy = copyFlag)
tevent = tevent.astype(dtype = np.float64, order = 'F', copy = copyFlag)
offset = offset.astype(dtype = np.float64, order = 'F', copy = copyFlag)
weights = weights.astype(dtype = np.float64, order = 'F', copy = copyFlag)
jd = jd.astype(dtype = np.int32, order = 'F', copy = copyFlag)
vp = vp.astype(dtype = np.float64, order = 'F', copy = copyFlag)
cl = cl.astype(dtype = np.float64, order = 'F', copy = copyFlag)
ulam = ulam.astype(dtype = np.float64, order = 'F', copy = copyFlag)

######################################
# --------- ALLOCATE OUTPUTS ---------
######################################
# lmu
lmu = -1
lmu_r = ctypes.c_int(lmu)
# ca
ca = np.zeros([nx, nlam], dtype = np.float64)
ca = ca.astype(dtype = np.float64, order = 'F', copy = False)
ca_r = ca.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
# ia
ia = -1*np.ones([nx], dtype = np.int32)
ia = ia.astype(dtype = np.int32, order = 'F', copy = False)
ia_r = ia.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
# nin
nin = -1*np.ones([nlam], dtype = np.int32)
nin = nin.astype(dtype = np.int32, order = 'F', copy = False)
nin_r = nin.ctypes.data_as(ctypes.POINTER(ctypes.c_int))
# dev
dev = -1*np.ones([nlam], dtype = np.float64)
dev = dev.astype(dtype = np.float64, order = 'F', copy = False)
dev_r = dev.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
# alm
alm = -1*np.ones([nlam], dtype = np.float64)
alm = alm.astype(dtype = np.float64, order = 'F', copy = False)
alm_r = alm.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
# nlp
nlp = -1
nlp_r = ctypes.c_int(nlp)
# jerr
jerr = -1
jerr_r = ctypes.c_int(jerr)
# dev0
dev0 = -1
dev0_r = ctypes.c_double(dev0)

# ###################################
# main glmnet fortran caller
# ###################################
if is_sparse:
# no sparse coxnet implemented
raise ValueError('Cox model not implemented for sparse x in glmnet')

else:
# call fortran coxnet routine
glmlib.coxnet_(
ctypes.byref(ctypes.c_double(parm)),
ctypes.byref(ctypes.c_int(nobs)),
ctypes.byref(ctypes.c_int(nvars)),
x.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
ty.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
tevent.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
offset.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
weights.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
jd.ctypes.data_as(ctypes.POINTER(ctypes.c_int)),
vp.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
cl.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
ctypes.byref(ctypes.c_int(ne)),
ctypes.byref(ctypes.c_int(nx)),
ctypes.byref(ctypes.c_int(nlam)),
ctypes.byref(ctypes.c_double(flmin)),
ulam.ctypes.data_as(ctypes.POINTER(ctypes.c_double)),
ctypes.byref(ctypes.c_double(thresh)),
ctypes.byref(ctypes.c_int(maxit)),
ctypes.byref(ctypes.c_int(isd)),
ctypes.byref(lmu_r),
ca_r,
ia_r,
nin_r,
ctypes.byref(dev0_r),
dev_r,
alm_r,
ctypes.byref(nlp_r),
ctypes.byref(jerr_r)
)

# ###################################
# post process results
# ###################################

# check for error
if (jerr_r.value > 0):
raise ValueError("Fatal glmnet error in library call : error code = ", jerr_r.value)
elif (jerr_r.value < 0):
print("Warning: Non-fatal error in glmnet library call: error code = ", jerr_r.value)
print("Check results for accuracy. Partial or no results returned.")

# clip output to correct sizes
lmu = lmu_r.value
ca = ca[0:nx, 0:lmu]
ia = ia[0:nx]
nin = nin[0:lmu]
dev = dev[0:lmu]
alm = alm[0:lmu]

# ninmax
ninmax = max(nin)
# fix first value of alm (from inf to correct value)
if ulam[0] == 0.0:
t1 = np.log(alm[1])
t2 = np.log(alm[2])
alm[0] = np.exp(2*t1 - t2)
# create return fit dictionary
if ninmax > 0:
ca = ca[0:ninmax, :]
df = np.sum(np.absolute(ca) > 0, axis=0)
ja = ia[0:ninmax] - 1 # ia is 1-indexed in fortran
oja = np.argsort(ja)
ja1 = ja[oja]
beta = np.zeros([nvars, lmu], dtype = np.float64)
beta[ja1, :] = ca[oja, :]
else:
beta = np.zeros([nvars, lmu], dtype = np.float64)
df = np.zeros([1, lmu], dtype = np.float64)

fit = dict()
fit['beta'] = beta
fit['dev'] = dev
fit['nulldev'] = dev0_r.value
fit['df']= df
fit['lambdau'] = alm
fit['npasses'] = nlp_r.value
fit['jerr'] = jerr_r.value
fit['dim'] = np.array([nvars, lmu], dtype = np.integer)
fit['offset'] = is_offset
fit['class'] = 'coxnet'

# ###################################
# return to caller
# ###################################

return fit
#-----------------------------------------
# end of method coxnet
#-----------------------------------------

34 changes: 34 additions & 0 deletions build/lib/glmnet_python/cvcompute.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# -*- coding: utf-8 -*-
"""
Internal glmnet function. See also cvglmnet.

Compute the weighted mean and SD within folds, and hence the SE of the mean
"""
import numpy as np
from wtmean import wtmean

def cvcompute(mat, weights, foldid, nlams):
if len(weights.shape) > 1:
weights = np.reshape(weights, [weights.shape[0], ])
wisum = np.bincount(foldid, weights = weights)
nfolds = np.amax(foldid) + 1
outmat = np.ones([nfolds, mat.shape[1]])*np.NaN
good = np.zeros([nfolds, mat.shape[1]])
mat[np.isinf(mat)] = np.NaN
for i in range(nfolds):
tf = foldid == i
mati = mat[tf, ]
wi = weights[tf, ]
outmat[i, :] = wtmean(mati, wi)
good[i, 0:nlams[i]] = 1
N = np.sum(good, axis = 0)
cvcpt = dict()
cvcpt['cvraw'] = outmat
cvcpt['weights'] = wisum
cvcpt['N'] = N

return(cvcpt)

# end of cvcompute
#=========================

83 changes: 83 additions & 0 deletions build/lib/glmnet_python/cvelnet.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
# -*- coding: utf-8 -*-
"""
Internal cvglmnet function. See also cvglmnet.

"""
import numpy as np
from glmnetPredict import glmnetPredict
from wtmean import wtmean
from cvcompute import cvcompute

def cvelnet(fit, \
lambdau, \
x, \
y, \
weights, \
offset, \
foldid, \
ptype, \
grouped, \
keep = False):

typenames = {'deviance':'Mean-Squared Error', 'mse':'Mean-Squared Error',
'mae':'Mean Absolute Error'}
if ptype == 'default':
ptype = 'mse'

ptypeList = ['mse', 'mae', 'deviance']
if not ptype in ptypeList:
print('Warning: only ', ptypeList, 'available for Gaussian models; ''mse'' used')
ptype = 'mse'
if len(offset) > 0:
y = y - offset

predmat = np.ones([y.size, lambdau.size])*np.NAN
nfolds = np.amax(foldid) + 1
nlams = []
for i in range(nfolds):
which = foldid == i
fitobj = fit[i].copy()
fitobj['offset'] = False
preds = glmnetPredict(fitobj, x[which, ])
nlami = np.size(fit[i]['lambdau'])
predmat[which, 0:nlami] = preds
nlams.append(nlami)
# convert nlams to scipy array
nlams = np.array(nlams, dtype = np.integer)

N = y.shape[0] - np.sum(np.isnan(predmat), axis = 0)
yy = np.tile(y, [1, lambdau.size])

if ptype == 'mse':
cvraw = (yy - predmat)**2
elif ptype == 'deviance':
cvraw = (yy - predmat)**2
elif ptype == 'mae':
cvraw = np.absolute(yy - predmat)

if y.size/nfolds < 3 and grouped == True:
print('Option grouped=false enforced in cv.glmnet, since < 3 observations per fold')
grouped = False

if grouped == True:
cvob = cvcompute(cvraw, weights, foldid, nlams)
cvraw = cvob['cvraw']
weights = cvob['weights']
N = cvob['N']

cvm = wtmean(cvraw, weights)
sqccv = (cvraw - cvm)**2
cvsd = np.sqrt(wtmean(sqccv, weights)/(N-1))

result = dict()
result['cvm'] = cvm
result['cvsd'] = cvsd
result['name'] = typenames[ptype]

if keep:
result['fit_preval'] = predmat

return(result)

# end of cvelnet
#=========================
Loading