// Copyright (C) 2008 Davis E. King (davis@dlib.net)
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_OPTIMIZATIOn_H_
#define DLIB_OPTIMIZATIOn_H_
#include <cmath>
#include <limits>
#include "optimization_abstract.h"
#include "optimization_search_strategies.h"
#include "optimization_stop_strategies.h"
#include "optimization_line_search.h"
namespace dlib
{
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// Functions that transform other functions
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <typename funct>
class central_differences
{
public:
// You get an error on this line when you pass in a global function to this function.
// You have to either use a function object or pass a pointer to your global function
// by taking its address using the & operator. (This check is here because gcc 4.0
// has a bug that causes it to silently corrupt return values from functions that
// invoked through a reference)
COMPILE_TIME_ASSERT(is_function<funct>::value == false);
central_differences(const funct& f_, double eps_ = 1e-7) : f(f_), eps(eps_){}
template <typename T>
typename T::matrix_type operator()(const T& x) const
{
// T must be some sort of dlib matrix
COMPILE_TIME_ASSERT(is_matrix<T>::value);
typename T::matrix_type der(x.size());
typename T::matrix_type e(x);
for (long i = 0; i < x.size(); ++i)
{
const double old_val = e(i);
e(i) += eps;
const double delta_plus = f(e);
e(i) = old_val - eps;
const double delta_minus = f(e);
der(i) = (delta_plus - delta_minus)/(2*eps);
// and finally restore the old value of this element
e(i) = old_val;
}
return der;
}
template <typename T, typename U>
typename U::matrix_type operator()(const T& item, const U& x) const
{
// U must be some sort of dlib matrix
COMPILE_TIME_ASSERT(is_matrix<U>::value);
typename U::matrix_type der(x.size());
typename U::matrix_type e(x);
for (long i = 0; i < x.size(); ++i)
{
const double old_val = e(i);
e(i) += eps;
const double delta_plus = f(item,e);
e(i) = old_val - eps;
const double delta_minus = f(item,e);
der(i) = (delta_plus - delta_minus)/(2*eps);
// and finally restore the old value of this element
e(i) = old_val;
}
return der;
}
double operator()(const double& x) const
{
return (f(x+eps)-f(x-eps))/(2*eps);
}
private:
const funct& f;
const double eps;
};
template <typename funct>
const central_differences<funct> derivative(const funct& f) { return central_differences<funct>(f); }
template <typename funct>
const central_differences<funct> derivative(const funct& f, double eps)
{
// You get an error on this line when you pass in a global function to this function.
// You have to either use a function object or pass a pointer to your global function
// by taking its address using the & operator. (This check is here because gcc 4.0
// has a bug that causes it to silently corrupt return values from functions that
// invoked through a reference)
COMPILE_TIME_ASSERT(is_function<funct>::value == false);
DLIB_ASSERT (
eps > 0,
"\tcentral_differences derivative(f,eps)"
<< "\n\tYou must give an epsilon > 0"
<< "\n\teps: " << eps
);
return central_differences<funct>(f,eps);
}
// ----------------------------------------------------------------------------------------
template <typename funct>
class negate_function_object
{
public:
// You get an error on this line when you pass in a global function to this function.
// You have to either use a function object or pass a pointer to your global function
// by taking its address using the & operator. (This check is here because gcc 4.0
// has a bug that causes it to silently corrupt return values from functions that
// invoked through a reference)
COMPILE_TIME_ASSERT(is_function<funct>::value == false);
negate_function_object(const funct& f_) : f(f_){}
template <typename T>
double operator()(const T& x) const
{
return -f(x);
}
private:
const funct& f;
};
template <typename funct>
const negate_function_object<funct> negate_function(const funct& f) { return negate_function_object<funct>(f); }
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
// Functions that perform unconstrained optimization
// ----------------------------------------------------------------------------------------
// ----------------------------------------------------------------------------------------
template <
typename search_strategy_type,
typename stop_strategy_type,
typename funct,
typename funct_der,
typename T
>
double find_min (
search_strategy_type search_strategy,
stop_strategy_type stop_strategy,
const funct& f,
const funct_der& der,
T& x,
double min_f
)
{
// You get an error on this line when you pass in a global function to this function.
// You have to either use a function object or pass a pointer to your global function
// by taking its address using the & operator. (This check is here because gcc 4.0
// has a bug that causes it to silently corrupt return values from functions that
// invoked through a reference)
COMPILE_TIME_ASSERT(is_function<funct>::value == false);
COMPILE_TIME_ASSERT(is_function<funct_der>::value == false);
COMPILE_TIME_ASSERT(is_matrix<T>::value);
DLIB_ASSERT (
is_col_vector(x),
"\tdouble find_min()"
<< "\n\tYou have to supply column vectors to this function"
<< "\n\tx.nc(): " << x.nc()
);
T g, s;
double f_value = f(x);
g = der(x);
while(stop_strategy.should_continue_search(x, f_value, g) && f_value > min_f)
{
s = search_strategy.get_next_direction(x, f_value, g);
double alpha = line_search(
make_line_search_function(f,x,s, f_value),
f_value,
make_line_search_function(der,x,s, g),
dot(g,s), // compute initial gradient for the line search
search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), min_f,
search_strategy.get_max_line_search_iterations());
// Take the search step indicated by the above line search
x += alpha*s;
}
return f_value;
}
// ----------------------------------------------------------------------------------------
template <
typename search_strategy_type,
typename stop_strategy_type,
typename funct,
typename funct_der,
typename T
>
double find_max (
search_strategy_type search_strategy,
stop_strategy_type stop_strategy,
const funct& f,
const funct_der& der,
T& x,
double max_f
)
{
// You get an error on this line when you pass in a global function to this function.
// You have to either use a function object or pass a pointer to your global function
// by taking its address using the & operator. (This check is here because gcc 4.0
// has a bug that causes it to silently corrupt return values from functions that
// invoked through a reference)
COMPILE_TIME_ASSERT(is_function<funct>::value == false);
COMPILE_TIME_ASSERT(is_function<funct_der>::value == false);
COMPILE_TIME_ASSERT(is_matrix<T>::value);
DLIB_ASSERT (
is_col_vector(x),
"\tdouble find_max()"
<< "\n\tYou have to supply column vectors to this function"
<< "\n\tx.nc(): " << x.nc()
);
T g, s;
// This function is basically just a copy of find_min() but with - put in the right places
// to flip things around so that it ends up looking for the max rather than the min.
double f_value = -f(x);
g = -der(x);
while(stop_strategy.should_continue_search(x, f_value, g) && f_value > -max_f)
{
s = search_strategy.get_next_direction(x, f_value, g);
double alpha = line_search(
negate_function(make_line_search_function(f,x,s, f_value)),
f_value,
negate_function(make_line_search_function(der,x,s, g)),
dot(g,s), // compute initial gradient for the line search
search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), -max_f,
search_strategy.get_max_line_search_iterations()
);
// Take the search step indicated by the above line search
x += alpha*s;
// Don't forget to negate these outputs from the line search since they are
// from the unnegated versions of f() and der()
g *= -1;
f_value *= -1;
}
return -f_value;
}
// ----------------------------------------------------------------------------------------
template <
typename search_strategy_type,
typename stop_strategy_type,
typename funct,
typename T
>
double find_min_using_approximate_derivatives (
search_strategy_type search_strategy,
stop_strategy_type stop_strategy,
const funct& f,
T& x,
double min_f,
double derivative_eps = 1e-7
)
{
// You get an error on this line when you pass in a global function to this function.
// You have to either use a function object or pass a pointer to your global function
// by taking its address using the & operator. (This check is here because gcc 4.0
// has a bug that causes it to silently corrupt return values from functions that
// invoked through a reference)
COMPILE_TIME_ASSERT(is_function<funct>::value == false);
COMPILE_TIME_ASSERT(is_matrix<T>::value);
DLIB_ASSERT (
is_col_vector(x) && derivative_eps > 0,
"\tdouble find_min_using_approximate_derivatives()"
<< "\n\tYou have to supply column vectors to this function"
<< "\n\tx.nc(): " << x.nc()
<< "\n\tderivative_eps: " << derivative_eps
);
T g, s;
double f_value = f(x);
g = derivative(f,derivative_eps)(x);
while(stop_strategy.should_continue_search(x, f_value, g) && f_value > min_f)
{
s = search_strategy.get_next_direction(x, f_value, g);
double alpha = line_search(
make_line_search_function(f,x,s,f_value),
f_value,
derivative(make_line_search_function(f,x,s),derivative_eps),
dot(g,s), // Sometimes the following line is a better way of determining the initial gradient.
//derivative(make_line_search_function(f,x,s),derivative_eps)(0),
search_strategy.get_wolfe_rho(), search_strategy.get_wolfe_sigma(), min_f,
search_strategy.get_max_line_search_iterations()
);
// Take the search step indicated by the above line search
x += alpha*s;
g = derivative(f,derivative_eps)(x);
}
return f_value;
}
// ----------------------------------------------------------------------------------------
template <
typename search_strategy_type,
typename stop_strategy_type,
typename funct,
typename T
>
double find_max_using_approximate_derivatives (
search_strategy_type search_strategy,
stop_strategy_type stop_strategy,
const funct& f,
T& x,
double max_f,
double derivative_eps = 1e-7
)
{
// You get an error on this line when you pass in a global function to this function.
// You have to either use a function object or pass a pointer to your global function
// by taking its address using the & operator. (This check is here because gcc 4.0
// has a bug that causes it to silently corrupt return values from functions that
// invoked through a reference)
COMPILE_TIME_ASSERT(is_function<funct>::value == false);
COMPILE_TIME_ASSERT(is_matrix<T>::value);
DLIB_ASSERT (
is_col_vector(x) && derivative_eps > 0,
"\tdouble find_max_using_approximate_derivatives()"
<< "\n\tYou have to supply column vectors to this function"
<< "\n\tx.nc(): " << x.nc()
<< "\n\tderivative_eps: " << derivative_eps
);
// Just negate the necessary things and call the find_min version of this function.
return -find_min_using_approximate_derivatives(
search_strategy,
stop_strategy,
negate_function(f),
x,
-max_f,
derivative_eps
);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_OPTIMIZATIOn_H_