-
Notifications
You must be signed in to change notification settings - Fork 5
/
sampleSY.py
76 lines (60 loc) · 2.64 KB
/
sampleSY.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Copyright (C) 2019 Albert Berahas, Majid Jahani, Martin Takáč
#
# All Rights Reserved.
#
# Authors: Albert Berahas, Majid Jahani, Martin Takáč
#
# Please cite:
#
# A. S. Berahas, M. Jahani, and M. Takáč, "Quasi-Newton Methods for
# Deep Learning: Forget the Past, Just Sample." (2019). Lehigh University.
# http://arxiv.org/abs/1901.09997
# ==========================================================================
import numpy as np
from numpy import linalg as LA
import tensorflow as tf
def sample_pairs_SY_SLSR1(X,y,num_weights,mmr,radius,eps,dnn,numHessEval,sess):
""" Function that computes SY pairs for S-LSR1 method"""
Stemp = radius*np.random.randn(num_weights,mmr)
Ytemp = np.squeeze(sess.run([dnn.Hvs], feed_dict={dnn.x: X, dnn.y:y, dnn.vecs: Stemp})).T
numHessEval += 1
S = np.zeros((num_weights,0))
Y = np.zeros((num_weights,0))
counterSucc = 0
for idx in xrange(mmr):
L = np.zeros((Y.shape[1],Y.shape[1]))
for ii in xrange(Y.shape[1]):
for jj in range(0,ii):
L[ii,jj] = S[:,ii].dot(Y[:,jj])
tmp = np.sum((S * Y),axis=0)
D = np.diag(tmp)
M = (D + L + L.T)
Minv = np.linalg.inv(M)
tmp1 = np.matmul(Y.T,Stemp[:,idx])
tmp2 = np.matmul(Minv,tmp1)
Bksk = np.squeeze(np.matmul(Y,tmp2))
yk_BkskDotsk = ( Ytemp[:,idx]- Bksk ).T.dot( Stemp[:,idx] )
if np.abs(np.squeeze(yk_BkskDotsk)) > (
eps *(LA.norm(Ytemp[:,idx]- Bksk ) * LA.norm(Stemp[:,idx])) ):
counterSucc += 1
S = np.append(S,Stemp[:,idx].reshape(num_weights,1),axis = 1)
Y = np.append(Y,Ytemp[:,idx].reshape(num_weights,1),axis=1)
return S,Y,counterSucc,numHessEval
def sample_pairs_SY_SLBFGS(X,y,num_weights,mmr,radius,eps,dnn,numHessEval,sess):
""" Function that computes SY pairs for S-LBFGS method"""
Stemp = radius*np.random.randn(num_weights,mmr)
Ytemp = np.squeeze(sess.run([dnn.Hvs], feed_dict={dnn.x: X, dnn.y:y, dnn.vecs: Stemp})).T
numHessEval += 1
S = np.zeros((num_weights,0))
Y = np.zeros((num_weights,0))
counterSucc = 0
for idx in xrange(mmr):
sTy = Ytemp[:,idx].T.dot(Stemp[:,idx])
if sTy > eps *(LA.norm(Stemp[:,idx])*LA.norm(Ytemp[:,idx])):
gamma_k = np.squeeze((Stemp[:,idx]).T.dot(Ytemp[:,idx])/((Ytemp[:,idx]).T.dot(Ytemp[:,idx])))
S = np.append(S,Stemp[:,idx].reshape(num_weights,1),axis = 1)
Y = np.append(Y,Ytemp[:,idx].reshape(num_weights,1),axis=1)
counterSucc += 1
return S,Y,counterSucc,numHessEval,gamma_k