First addition of nonlinear optimization methods

Finite differentiation: Hessian and Jacobian Line-search algorithms Gradient descent non-linear optimization
gusmaogabriels · May 14, 2017 · a30c16b · a30c16b
1 parent a3580e7
commit a30c16b
Show file tree

Hide file tree

Showing 10 changed files with 440 additions and 8 deletions.
diff --git a/main.py b/main.py
@@ -6,15 +6,14 @@
   """
 
 import optinpy
-"""
-A = [[1,2,3],[2,3,1],[3,4,1],[0,2,1]]
-b = [1,2,3,-1]
-c = [2,3,1]
 
-S = optinpy.simplex(A,b,c)
-S.pivot(0,1)
-"""
+A = [[-3,2,-4,-5],[4,-2,5,3],[2,4,1,2],[3,2,-2,4]]
+b = [-10,10,10,15]
+c = [-2,-2,-3,-3]
 
+S = optinpy.simplex(A,b,c,lb=[-10,-10,-10,-10],ub=[5,5,5,5])
+#S.dual()
+"""
 n = optinpy.graph()
 bs = zip(range(1,6),[5,-8,0,10,-7])
 for b in bs:
@@ -43,7 +42,7 @@
 arcs2 = optinpy.mst.kruskal(n2,verbose=True)
 print("Boruvka's")
 arcs3 = optinpy.mst.boruvka(n2,verbose=True)
-
+"""
 """ #SHORTEST PATH
 n = optinpy.graph()
 connections = [[1,2,2],[1,3,4],[1,4,5],[2,4,2],[3,4,1]]

diff --git a/optinpy/__init__.py b/optinpy/__init__.py
@@ -16,5 +16,9 @@
 
 import numpy as np
 from .graph import *
+from .simplex import *
+from .mcfp import *
+from .sp import *
+from .mst import *
 
 __all__ = ['np','graph','__author__','__version__']
diff --git a/optinpy/finite_diff/__init__.py b/optinpy/finite_diff/__init__.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+from __future__ import division, absolute_import, print_function
+from . import jacobian, hessian
+
+
diff --git a/optinpy/finite_diff/finite_diff.py b/optinpy/finite_diff/finite_diff.py
@@ -0,0 +1,80 @@
+# -*- coding: utf-8 -*-
+
+def jacobian(fun,x0,epsilon=1e-6,algorithm='central'):
+    '''
+        Jacobian calculator
+        ..fun as callable object; must be a function of x0 and return a single number
+        ..x0 as a numeric array; point at which the jacobian should be estimated
+        ..espilon as a numeric value; perturbation from which the jacobian is estimated
+        ..algorithm as a string among 'central', 'forward', 'backward'; algorithm to be used to estimate the jacobian
+    '''
+    grad = []
+    if algorithm == 'central':
+        evpoints = [-1,1]
+    elif algorithm == 'forward':
+        evpoints = [0,1]
+    elif algorithm == 'backward':
+        evpoints = [-1,0]
+    else:
+        raise Exception("Algorithm must be either 'central', 'forward' or 'backward'.")    
+    for i in range(len(x0)):
+        fvals = []
+        for _x in evpoints:
+            x0[i] += _x*epsilon
+            fvals += [fun(x0)]
+            x0[i] -= _x*epsilon
+        grad += [(fvals[1]-fvals[0])/((evpoints[1]-evpoints[0])*epsilon)]
+    return grad
+
+def hessian(fun,x0,epsilon=1e-6,algorithm='central'):
+    '''
+        hessian calculator
+        ..fun as callable object; must be a function of x0 and return a single number
+        ..x0 as a numeric array; point at which the hessian should be estimated
+        ..espilon as a numeric value; perturbation from which the hessian is estimated
+        ..algorithm as a string among 'central', 'forward', 'backward'; algorithm to be used to estimate the hessian
+    '''
+    hessian = [[None for j in range(len(x0))] for i in range(len(x0))]
+    if algorithm == 'central':
+        evpoints_ij = [[1,1],[1,-1],[-1,1],[-1,-1]]
+        evpoints_ii = [[2,0],[1,0],[0,0],[-1,0],[-2,0]]
+        coeffs_ij = [1,-1,-1,1]
+        coeffs_ii = [-1,16,-30,16,-1]
+        denom_ij = 4*epsilon**2
+        denom_ii = 12*epsilon**2
+    elif algorithm == 'forward':
+        evpoints_ij = [[1,1],[1,0],[0,1],[0,0]]
+        evpoints_ii = evpoints_ij
+        coeffs_ii = [1,-1,-1,1]
+        coeffs_ij = coeffs_ii
+        denom_ij = epsilon**2
+        denom_ii = denom_ij
+    elif algorithm == 'backward':
+        evpoints_ij = [[-1,-1],[-1,0],[0,-1],[0,0]]
+        evpoints_ii = evpoints_ij
+        coeffs_ii = [1,-1,-1,1]
+        coeffs_ij = coeffs_ii
+        denom_ij = epsilon**2
+        denom_ii = denom_ij
+    else:
+        raise Exception("Algorithm must be either 'central', 'forward' or 'backward'.")    
+    for i in range(0,len(x0)):
+        fvals = []
+        for points in evpoints_ii:
+            x0[i] += points[0]*epsilon
+            x0[i] += points[1]*epsilon
+            fvals += [fun(x0)]
+            x0[i] -= points[0]*epsilon
+            x0[i] -= points[1]*epsilon
+        hessian[i][i] = sum([fvals[k]*coeffs_ii[k] for k in range(len(fvals))])/denom_ii
+        for j in range(i+1,len(x0)):
+            fvals = []
+            for points in evpoints_ij:
+                x0[i] += points[0]*epsilon
+                x0[j] += points[1]*epsilon
+                fvals += [fun(x0)]
+                x0[i] -= points[0]*epsilon
+                x0[j] -= points[1]*epsilon
+            hessian[i][j] = sum([fvals[k]*coeffs_ij[k] for k in range(len(fvals))])/denom_ij
+            hessian[j][i] = hessian[i][j]
+    return hessian
diff --git a/optinpy/linesearch/__init__.py b/optinpy/linesearch/__init__.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+from __future__ import division, absolute_import, print_function
+
+from .finite_diff import jacobian, hessian
+from .linesearch import xstep, backtracking, interp23, unimodality, golden_ration
diff --git a/optinpy/linesearch/linesearch.py b/optinpy/linesearch/linesearch.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+
+from . import jacobian, hessian
+
+def xstep(x0,d,alpha):
+    '''
+        returns x_1 given x_0, d and alpha
+    '''
+    return map(sum,zip(x0,[i*alpha for i in d]))
+
+def __armijo(fun,x0,d,dd,alpha,c):
+    '''
+        Check's whether Armijo's conditions upholds
+        i.e. fun(x0+alpha*d) <= fun(x0) + c*alpha*dd
+    '''
+    return fun(xstep(x0,d,alpha)) <= fun(x0)+c*alpha*dd
+
+def backtracking(fun,x0,alpha=1,rho=0.5,c=1e-4,**kwargs):
+    '''
+        backtracking algorithm, arg_min(alpha) fun(x0+alpha*d)
+        ..fun as callable object; must be a function of x0 and return a single number
+        ..x0 as a numeric array; point at which the jacobian should be estimated
+        ..alpha as a numeric value; maximum step from x_i
+        ..rho as a numeric value; rate of decrement in alpha
+        ..c as a numeric values; constant of Wolfe's condition
+        ..**kwargs as a dictionary with the jacobians function parameters
+    '''
+    d = np.array(jacobian(fun,x0,**kwargs)) # jacobian as numpy array
+    dd = np.dot(-d,d) # steepest descent step
+    iters = 0
+    while not __armijo(fun,x0,-d,dd,alpha,c):# Armijo's Condition
+        alpha = rho*alpha
+        iters += 1
+    return {'x':xstep(x0,-d,alpha), 'f':fun(xstep(x0,-d,alpha)), 'alpha':alpha, 'iterations':iters}
+
+def interp23(fun,x0,alpha=1,c=1e-4,alpha_min=0.1,rho=0.5,**kwargs):
+    '''
+        interpolating algorithm, arg_min(alpha) fun(x0+alpha*d)
+        ..fun as callable object; must be a function of x0 and return a single number
+        ..x0 as a numeric array; point at which the jacobian should be estimated
+        ..alpha as a numeric value; maximum step from x_i
+        ..c as a numeric values; constant of Armijo's condition
+        ..alpha_min; lower limit for alpha when alpha is too small
+        ..**kwargs as a dictionary with the jacobians function parameters
+    '''
+    alpha0 = alpha
+    d = np.array(jacobian(fun,x0,**kwargs))
+    dd = np.dot(-d,d)
+    iters = {'first_order':0,'second_order':0,'third_order':0}
+    iters['first_order'] += 1
+    if __armijo(fun,x0,-d,dd,alpha0,c):
+        return {'x':xstep(x0,-d,alpha0), 'f':fun(xstep(x0,-d,alpha0)), 'alpha':alpha0, 'iterations':iters}
+    else:
+        # second order approximation
+        iters['second_order'] += 1
+        alpha1 = -(dd*alpha0**2)/(2*(fun(xstep(x0,-d,alpha0))-fun(x0)-dd*alpha0))
+        if __armijo(fun,x0,-d,dd,alpha1,c) and alpha1 > alpha_min:# Armijo's Condition
+            return {'x':xstep(x0,-d,alpha1), 'f':fun(xstep(x0,-d,alpha1)), 'alpha':alpha1, 'iterations':iters}
+        else:
+            alpha1 = alpha0*rho
+            iters['second_order'] += 1
+            if __armijo(fun,x0,-d,dd,alpha1,c) and alpha1 > alpha_min: # check whether a single backtracking iteration gives rise to a reasonable stepsize
+                return {'x':xstep(x0,-d,alpha1), 'f':fun(xstep(x0,-d,alpha1)), 'alpha':alpha1, 'iterations':iters}
+            else:
+                while not __armijo(fun,x0,-d,dd,alpha1,c):
+                    iters['third_order'] += 1
+                    coeff = (1/(alpha0**2*alpha1**2*(alpha1-alpha0)))
+                    m = [[alpha0**2,-alpha1**2],[-alpha0**3,alpha1**3]]
+                    v = [fun(xstep(x0,-d,alpha1))-fun(x0)-alpha1*dd,fun(xstep(x0,-d,alpha0))-fun(x0)-alpha0*dd]
+                    a, b = coeff*np.dot(m,v)
+                    alpha0 = alpha1
+                    alpha1 = (-b+(b**2-3*a*dd)**0.5)/(3*a)
+            return {'x':xstep(x0,-d,alpha1), 'f':fun(xstep(x0,-d,alpha1)), 'alpha':alpha1, 'iterations':iters}
+
+def unimodality(fun,x0,b,threshold=0.01):
+    '''
+        unimodality algorithm, arg_min(alpha) fun(x0+alpha*d)
+        ..fun, a callable object, be strictly quasiconvex between the interval [a,b], a0 = 0
+        ..x0 as a numeric array; starting point
+        ..b as the upper bound for the interval
+        ..threshold min mean-relative difference between interval bounds as of which the procedeure ceases flowing
+    '''
+    interv = [0,b]
+    d = np.array(jacobian(fun,x0))
+    iters = 0
+    while 2*abs((interv[-1]-interv[0])/(abs(interv[-1])+abs(interv[0]))):
+        iters += 1
+        x = np.sort(np.random.uniform(*interv+[2])) # evaluate alpha and beta uniformily distributed in the interv
+        phi = [fun(xstep(x0,-d,x_)) for x_ in x]
+        if phi[0] > phi[1]:
+            interv[0] = x[0]
+        else:
+            interv[1] = x[1]
+    alpha = (interv[1]+interv[0])/2.0
+    return {'x':xstep(x0,-d,alpha), 'f':fun(xstep(x0,-d,alpha)), 'alpha':alpha, 'iterations':iters}
+
+def golden_ratio(fun,x0,b,threshold=0.01):
+    '''
+        golden-ration algorithm, arg_min(alpha) fun(x0+alpha*d)
+        ..fun, a callable object (function)
+        ..x0 as a numeric array; starting point
+        ..b as the upper bound for the alpha domain in Re (b>0)
+        ..threshold min mean-relative difference between interval bounds as of which the procedeure ceases flowing
+    '''
+    d = np.array(jacobian(fun,x0))
+    r = (5**.5-1)/2.0
+    interv = [0,b]
+    alpha = interv[0] + (1-r)*b
+    beta = interv[0] + r*b
+    phi = [fun(xstep(x0,-d,x_)) for x_ in [alpha,beta]]     
+    iters = 0
+    while 2*abs((interv[-1]-interv[0])/(abs(interv[-1])+abs(interv[0]))):
+        iters += 1
+        if phi[0] > phi[1]:
+            interv[0] = alpha
+            alpha = beta
+            phi[0] = phi[1]
+            beta = alpha + r*(interv[1]-interv[0])
+            phi[1] = fun(xstep(x0,-d,beta))
+        else:
+            interv[1] = beta
+            beta = alpha
+            phi[1] = phi[0]
+            alpha = interv[0] + (1-r)*(interv[1]-interv[0])
+            phi[0] = fun(xstep(x0,-d,alpha))
+    alpha = (interv[1]+interv[0])/2.0
+    return {'x':xstep(x0,-d,alpha), 'f':fun(xstep(x0,-d,alpha)), 'alpha':alpha, 'iterations':iters}
+
+
+
+
+
+
+f = lambda x : (1-x[0])**2 + 100*(x[1]-x[0]**2)**2
+f_aurea = lambda x : np.exp(-x[0])+x[0]**2
+print 'backtracking\n', backtracking(f,[0,0],1,rho=0.6)
+print 'backtracking\n', backtracking(f,[0,0],1,rho=0.5)
+print 'interpolate\n', interp23(f,[0,0],1)
+print 'unimodality\n', unimodality(f,[0,0],1)
+print 'golden_ratio\n', golden_ratio(f,[0,0],1)
+print 'golden_ratio\n', golden_ratio(f_aurea,[0],1)
diff --git a/optinpy/nonlinear/__init__.py b/optinpy/nonlinear/__init__.py
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+from __future__ import division, absolute_import, print_function
+from ..finite_diff import jacobian, hessian
+from ..linesearch import xstep, backtracking, interp23, unimodality, golden_ration
+
+
diff --git a/optinpy/nonlinear/non_linear.py b/optinpy/nonlinear/non_linear.py
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+
+from . import gradient, hessian, xstep, backtracking, interp23, unimodality, golden_ration
+
+def gradient_descent(fun,x0,threshold,jacobian_kwargs={},linesearch_kwargs={}):
+    '''
+        gradient descent method
+        ..fun as callable object; must be a function of x0 and return a single number
+        ..x0 as a numeric array; point from which to start
+        ..threshold as a numeric value; threshold at which to stop the iterations
+        ..jacobian_kwargs as a dict object with jacobian function parameters
+        ..linesearch_kwargs as a dict object with linesearch method definition and its parameters
+    '''
+    if len(linesearch_kwargs) == 0:
+        linesearch_kwargs = {'method':'backtracking','kwargs':{'alpha':1,'rho':0.5,'c':1e-4},'jacobian_kwargs':{'algorithm':'central','epsilon':1e-6}}
+    else:
+        pass
+    if len(jacobian_kwargs) == 0:
+        jacobian_kwargs = {'algorithm':'central','epsilon':1e-6}
+    else: 
+        pass    
+    iters = 0
+    d = np.array(jacobian(fun,x0,**jacobian_kwargs))
+    x = x0
+    while np.dot(d,d) > threshold:
+        alpha = backtracking(fun,x,**linesearch_kwargs['jacobian_kwargs'])['alpha']
+        x = xstep(x,-d,alpha)
+        d = np.array(jacobian(fun,x,**jacobian_kwargs))
+        iters += 1
+    return {'x':x, 'f':fun(x), 'iterations':iters}
+
+f = lambda x : (1-x[0])**2 + 100*(x[1]-x[0]**2)**2
+print 'gradient_descent\n', gradient_descent(f,[0,0],0.1)
diff --git a/optinpy/simplex/__init__.py b/optinpy/simplex/__init__.py
@@ -0,0 +1,6 @@
+# -*- coding: utf-8 -*-
+from __future__ import division, absolute_import, print_function
+
+from .. import __author__, __version__, np
+from .base import simplex
+