Commit 5da049e4 authored by Niklas Rieken's avatar Niklas Rieken

finished theory part, w/o proofs and figures

parent e58e79b0
No preview for this file type
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
\usetikzlibrary{calc} \usetikzlibrary{calc}
\usepackage{listings} \usepackage{listings}
\usepackage{enumitem} \usepackage{enumitem}
\usepackage{algorithm2e}
\usepackage{wrapfig} \usepackage{wrapfig}
%\usepackage{bbold} %\usepackage{bbold}
\usepackage{bbm} \usepackage{bbm}
...@@ -27,6 +28,7 @@ ...@@ -27,6 +28,7 @@
\let\epsilon\varepsilon \let\epsilon\varepsilon
\let\rho\varrho \let\rho\varrho
\let\theta\vartheta \let\theta\vartheta
\let\phi\varphi
\DeclareMathOperator{\spa}{span} \DeclareMathOperator{\spa}{span}
\DeclareMathOperator{\aff}{aff} \DeclareMathOperator{\aff}{aff}
\DeclareMathOperator{\conv}{conv} \DeclareMathOperator{\conv}{conv}
...@@ -403,7 +405,194 @@ Let $f\colon \mathbb{R}^n \to \mathbb{R}^m$ be a function and $K$ be a proper co ...@@ -403,7 +405,194 @@ Let $f\colon \mathbb{R}^n \to \mathbb{R}^m$ be a function and $K$ be a proper co
Without further discussion, we state that many results for convex functions extend to $K$-convexity. Without further discussion, we state that many results for convex functions extend to $K$-convexity.
\subsection{Convex Optimization} \subsection{Convex Optimization}
We consider general optimization problems ins \emph{standard form}:
\begin{align*}
\text{minimize } & f_0(x)\\
\text{subject to } & f_i(x) \leq 0 & i \in \{1, \ldots, m\}\\
& h_i(x) = 0 & i \in \{1, \ldots, p\}.
\end{align*}
A feasible point $x$ is \emph{(globally) optimal} if $f_0(x) = \inf \{f_0(y) : y \text{ feasible}\}$. It is \emph{locally optimal} if there exists $R > 0$ such that $x$ is optimal in feasible ball $\norm{z-x} \leq R$. The \emph{feasibility problem} can be stated in standard form:
\begin{align*}
\text{minimize } & 0\\
\text{subject to } & f_i(x) \leq 0 & i \in \{1, \ldots, m\}\\
& h_i(x) = 0 & i \in \{1, \ldots, p\}.
\end{align*}
An optimization problem is called \emph{convex} if $f_i$ is convex for all $i \in \{0, \ldots, m\}$ and $h_i$ is affine for all $i \in \{0, \ldots, p\}$.
\begin{theorem}
Any locally optimal point of a convex optimization problem is globally optimal.
%TODO proof
\end{theorem}
A point $x$ is optimal iff
\begin{enumerate}
\item $x$ is feasible and
\item $\nabla f_0(x)^T (y-x) \geq 0$ for all feasible $y$.
\end{enumerate}
\begin{figure}
%TODO iso contours and gradient directions
\end{figure}
In order to simplify convex problems, we can use some common transformations:
\begin{itemize}
\item Eliminating equality constraints ($Ax = b \iff x = Fz + x_0$ then minimize over $z$).
\item Introducing equality constraints.
\item Introducing slack variables for linear inequalities $a_i^T x + s_i = b_i, s_i \geq 0$.
\item Epigraph form: minimize $t$ subject to $f_0(x) - t, (f_i(x) \leq 0)_{i=1}^m, Ax = b$.
\end{itemize}
A \emph{quasiconvex optimization problem} in standard form
\begin{align*}
\text{minimize } & f_0(x)\\
\text{subject to } & f_i(x) \leq 0 & i \in \{1, \ldots, m\}\\
& Ax = b
\end{align*}
with $f_0$ quasiconvex, and $f_i$ convex for all $i \in \{1, \ldots, m\}$.
\begin{theorem}
If $f_0$ is quasiconvex, then there exists a family of functions $(\phi_t)_t$ such that
\begin{enumerate}
\item $\phi_t(x)$ is convex in $x$ (for fixed $t$) and
\item $t$-sublevel sets of $f_0$ is $0$-sublevel set of $\phi_t$, i.e. $f_0(x) \leq t$ iff $\phi_t(x) \leq 0$.
\end{enumerate}
%TODO proof
\end{theorem}
We can solve quasiconvex (and hence, convex) problems via bisection on $t$ in a feasibility problem:
\begin{align*}
\text{minimize } & 0\\
\text{subject to } & f_i(x) \leq 0 & i \in \{1, \ldots, m\}\\
& Ax = b\\
& \phi_t(x) \leq 0
\end{align*}
If this problem is feasible for a fixed $t$, then $t \geq p^\ast$ (optimum), if infeasible, then $t < p^\ast$.
We now approach the main theorems for convex optimization. That is, that in many cases optimality can be proved for a certain solution via duality.
Given a standard form problem, not necessarily convex
\begin{align*}
\text{minimize } & f_0(x)\\
\text{subject to } & f_i(x) \leq 0 & i \in \{1, \ldots, m\}\\
& h_i(x) = 0 & i \in \{1, \ldots, p\}
\end{align*}
with $x \in D \subseteq \mathbb{R}^n$ and optimal value $p^\ast$, we define the \emph{Lagrangian}
$$
L\colon \mathbb{R}^n \times \mathbb{R}^m \times \mathbb{R}^p \to \mathbb{R}
$$
with $\dom L = D \times \mathbb{R}^m \times \mathbb{R}^p$ by
$$
L(x, \lambda, \nu) = f_0(x) + \sum_{i=1}^m \lambda_i f_i(x) + \sum_{i=1}^p \nu_i h_i(x)
$$
and the \emph{Lagrange dual function} $g\colon \mathbb{R}^m \times \mathbb{R}^p \to \mathbb{R}$ by
$$
g(\lambda, \nu) = \inf_{x \in D} \underbrace{L(x, \lambda, \nu)}_{\text{affine on } \lambda, \nu}.
$$
Note that $g$ is concave. We call $\lambda, \nu$ \emph{Lagrange dual variables} (or \emph{Lagrange multipliers}).
\begin{theorem}[Lower Bound Property]
If $\lambda \geq 0$ then $g(\lambda, \nu) \leq p^\ast$.
%TODO proof
\end{theorem}
The following equation shows how the dual and the conjugate relate for minimize $f_0(x)$ subject to $Ax \preceq b, Cx = d$.
\begin{align*}
g(\lambda, \nu) &= \inf_{x \in D} \left(f_0(x) + (A^T\lambda + c^T\nu)^Tx - b^T\lambda - d^T\nu\right)\\
&= -f_0^\ast (-A^T\lambda - C^T\nu) - b^T\lambda - d^T\nu.
\end{align*}
The \emph{Lagrange dual problem} is given by
\begin{align*}
\text{maximize } & g(\lambda, \nu)\\
\text{subject to } & \lambda \succeq 0.
\end{align*}
Solving the Lagrange dual problem yields to the best lower bound of $p^\ast$, which we denote $d^\ast$. Note that the dual problem is always convex even if the primal problem is not. The Lagrange dual variables $\lambda, \nu$ are dual feasible if $\lambda \succeq 0$ and $\lambda, \nu \in \dom g$.
\begin{theorem}[Weak Duality]
$d^\ast \leq p^\ast$.
%TODO proof
\end{theorem}
Weak duality always holds for convex and non-convex problems and can be used to find non-trivial lower bounds for difficult problems. We call the difference between primal and dual solution $p^\ast - d^\ast$ \emph{(optimal) duality gap}.
The following theorem does actually not hold in general but it does usually hold for convex problems.
\begin{theorem}[Strong Duality]
$d^\ast = p^\ast$.
\end{theorem}
To make the ''usually`` in the previous statement more precise, consider \emph{Slater's constraint qualification}: Strong duality holds for convex problems in standard form if it is strictly feasible, i.e. there exists $x \in \inter D$ such that $f_i(x) < 0$ for all $i \in \{1, \ldots, m\}$ and $Ax = b$.
Another strong connection between primal and dual problem is due to the following theorem.
\begin{theorem}[Complementary Slackness]
Assume strong duality holds and $x^\ast, \lambda^\ast, \nu^\ast$ are optimal for the primal, respectively, dual problem. Then $\lambda^\ast_i f_i(x^\ast) = 0$ for all $i \in \{1, \ldots, m\}$.
\begin{proof}
\begin{align*}
f_0(x^\ast) &= g(\lambda^\ast, \nu^\ast)\\
&= \inf_{x} \left(f_0(x) + \sum_{i=1}^m \lambda_i^\ast f_i(x) + \sum_{i=1}^p \nu_i^\ast h_i(x)\right)\\
&\leq f_0(x^\ast) + \underbrace{\sum_{i=1}^m \lambda_i^\ast f_i(x^\ast)}_{\leq 0} + \underbrace{\sum_{i=1}^p \nu^\ast h_i(x^\ast)}_{= 0}\\
&\leq f_0(x^\ast).\qedhere
\end{align*}
\end{proof}
\end{theorem}
Strong duality and the lack of a gradient descent direction that is feasible in optimal points yields to the next theorem, which generalizes the first-order optimality condition (not to be confused with the first-order convexity conition from the previous section) $f'(x) = 0$.
\begin{theorem}[Karush, Kuhn, Tucker]
Assume strong duality holds and $x, \lambda, \nu$ are optimal, then
\begin{enumerate}
\item primal feasibility, i.e. $f_i(x) \leq 0$ for $i \in \{1, \ldots, m\}$, $h_i(x) = 0$ for $i \in \{1, \ldots, p\}$ holds,
\item dual feasibility, i.e. $\lambda_i \geq 0$ for $i \in \{1, \ldots, m\}$ holds,
\item complementary slackness, i.e. $\lambda_i f_i(x) = 0$ for $i \in \{1, \ldots, m\}$ holds, and
\item gradient descent of Lagrangian w.r.t. $x$ vanishes, i.e.
$$
\nabla f_0(x) + \sum_{i=1}^m \lambda_i \nabla f_i(x) + \sum_{i=1}^p \nu_i \nabla h_i(x) = 0.
$$
\end{enumerate}
%TODO proof
\end{theorem}
\begin{figure}
%TODO again isolines and gradients
\end{figure}
We can even solve the primal problem via the dual: Assume we have optimal duals $(\lambda^\ast, \nu^\ast)$. Then it suffices to minimize $f_0(x) + \sum_{i=1}^m \lambda_i^\ast f_i(x) + \sum_{i=1}^p \nu_i^\ast h_i(x)$.
\begin{lemma}
Let $x$ be primal feasible and $(\lambda, \nu)$ dual feasible. Then
$$
f_0(x) - p^\ast \leq f_0(x) - g(\lambda, \nu).
$$
%TODO proof
\end{lemma}
Thus, $p^\ast \in [g(\lambda, \nu), f_0(x)]$ and $d^\ast \in [g(\lambda, \nu), f_0(x)]$, i.e. a duality gap of $0$ is a certificate for optimality.
\section{Algorithms}
For unconstrained problems, we can simply use the optimality condition $\nabla f_0(x^\ast) = 0$ as a starting point. We shortly describe a few algorithms that compute a solution numerically. Exact solving is in general not possible.
The main idea of our algorithms is very generic and can be described in pseudocode with a few lines.
\begin{algorithm}
\While{stopping criterion not met}{
Determine descent direction $\Delta x$.\\
Line search: choose step size $t$.\\
$x \coloneqq x + t \Delta x$.
}
\end{algorithm}
This generic approach works for convex problems: From $\nabla f(x^{(k)}) (y-x^{(k)}) \geq 0$, we can derive $f(y) \geq f(x^{(k)})$. Hence, the search direction must satisfy $\nabla f(x^{(k)})^T \Delta x^{(k)} < 0$.
For the line search there exists two general ideas.
\begin{description}
\item[exact line search] $t \coloneqq \arg\min_{t>0} f(x+t\Delta x)$ via binary search
\item[backtracking line search] with parameters $\alpha \in \left]0, \frac{1}{2}\right[, \beta \in \left]0, 1\right[$. Backtrack until $t < t_0$.
\vspace{-2em}
\begin{algorithm}
$t \coloneqq 1$.\\
\While{$f(x+t\Delta x) \geq f(x) + \alpha t \nabla f(x)^T \Delta x$}{
$t \coloneqq \beta t$.
}
\end{algorithm}
\vspace{-2em}
\end{description}
\begin{figure}
%TODO line searches
\end{figure}
To determine the descent direction, the obvious start would be the gradient descent method, that is choose $\Delta x = -\nabla f(x)$.
% eof % eof
\end{document} \end{document}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment