# -*- coding: utf-8 -*-
# Copyright (C) 2020-2023 by SCICO Developers
# All rights reserved. BSD 3-clause License.
# This file is part of the SCICO package. Details of the copyright and
# user license can be found in the 'LICENSE' file distributed with the
# package.
"""Proximal Gradient Method auxiliary classes."""
# Needed to annotate a class method that returns the encapsulating class;
# see https://www.python.org/dev/peps/pep-0563/
from __future__ import annotations
from typing import Optional, Union
import jax
import scico.numpy as snp
import scico.optimize.pgm as sop
from scico.numpy import Array, BlockArray
class PGMStepSize:
r"""Base class for computing the PGM step size.
Base class for computing the reciprocal of the step size for PGM
solvers.
The PGM solver implemented by :class:`.PGM` addresses a general
proximal gradient form that requires the specification of a step size
for the gradient descent step. This class is a base class for methods
that estimate the reciprocal of the step size (:math:`L` in PGM
equations).
Attributes:
pgm (:class:`.PGM`): PGM solver object to which the solver is
attached.
"""
[docs] def internal_init(self, pgm: sop.PGM):
"""Second stage initializer to be called by :meth:`.PGM.__init__`.
Args:
pgm: Reference to :class:`.PGM` object to which the
:class:`.StepSize` object is to be attached.
"""
self.pgm = pgm
[docs] def update(self, v: Union[Array, BlockArray]) -> float:
"""Hook for updating the step size in derived classes.
Hook for updating the reciprocal of the step size in derived
classes. The base class does not compute any update.
Args:
v: Current solution or current extrapolation (if accelerated
PGM).
Returns:
Current reciprocal of the step size.
"""
return self.pgm.L
class BBStepSize(PGMStepSize):
r"""Scheme for step size estimation based on Barzilai-Borwein method.
The Barzilai-Borwein method :cite:`barzilai-1988-stepsize` estimates
the step size :math:`\alpha` as
.. math::
\mb{\Delta x} = \mb{x}_k - \mb{x}_{k-1} \; \\
\mb{\Delta g} = \nabla f(\mb{x}_k) - \nabla f (\mb{x}_{k-1}) \; \\
\alpha = \frac{\mb{\Delta x}^T \mb{\Delta g}}{\mb{\Delta g}^T
\mb{\Delta g}} \;\;.
Since the PGM solver uses the reciprocal of the step size, the value
:math:`L = 1 / \alpha` is returned.
When applied to complex-valued problems, only the real part of the
inner product is used. When the inner product is negative, the
previous iterate is used instead.
Attributes:
pgm (:class:`.PGM`): PGM solver object to which the solver is
attached.
"""
def __init__(self):
"""Initialize a :class:`BBStepSize` object."""
self.xprev = None
self.gradprev = None
[docs] def update(self, v: Union[Array, BlockArray]) -> float:
"""Update the reciprocal of the step size.
Args:
v: Current solution or current extrapolation (if accelerated
PGM).
Returns:
Updated reciprocal of the step size.
"""
if self.xprev is None:
# Solution and gradient of previous iterate are required.
# For first iteration these variables are stored and current estimate is returned.
self.xprev = v
self.gradprev = self.pgm.f.grad(self.xprev)
L = self.pgm.L
else:
Δx = v - self.xprev
gradv = self.pgm.f.grad(v)
Δg = gradv - self.gradprev
# Taking real part of inner products in case of complex-value problem.
den = snp.real(snp.sum(Δx.conj() * Δg))
num = snp.real(snp.sum(Δg.conj() * Δg))
L = num / den
# Revert to previous iterate if update results in nan or negative value.
if snp.isnan(L) or L <= 0.0:
L = self.pgm.L
# Store current state and gradient for next update.
self.xprev = v
self.gradprev = gradv
return L
class AdaptiveBBStepSize(PGMStepSize):
r"""Adaptive Barzilai-Borwein method to determine step size.
Adaptive Barzilai-Borwein method to determine step size in PGM, as
introduced in :cite:`zhou-2006-adaptive`.
The adaptive step size rule computes
.. math::
\mb{\Delta x} = \mb{x}_k - \mb{x}_{k-1} \; \\
\mb{\Delta g} = \nabla f(\mb{x}_k) - \nabla f (\mb{x}_{k-1}) \; \\
\alpha^{\mathrm{BB1}} = \frac{\mb{\Delta x}^T \mb{\Delta x}}
{\mb{\Delta x}^T \mb{\Delta g}} \; \\
\alpha^{\mathrm{BB2}} = \frac{\mb{\Delta x}^T \mb{\Delta g}}
{\mb{\Delta g}^T \mb{\Delta g}} \;\;.
The determination of the new steps size is made via the rule
.. math::
\alpha = \left\{ \begin{matrix} \alpha^{\mathrm{BB2}} &
\mathrm{~if~} \alpha^{\mathrm{BB2}} / \alpha^{\mathrm{BB1}}
< \kappa \; \\
\alpha^{\mathrm{BB1}} & \mathrm{~otherwise} \end{matrix}
\right . \;,
with :math:`\kappa \in (0, 1)`.
Since the PGM solver uses the reciprocal of the step size, the value
:math:`L = 1 / \alpha` is returned.
When applied to complex-valued problems, only the real part of the
inner product is used. When the inner product is negative, the
previous iterate is used instead.
Attributes:
pgm (:class:`.PGM`): PGM solver object to which the solver is
attached.
"""
def __init__(self, kappa: float = 0.5):
r"""Initialize a :class:`AdaptiveBBStepSize` object.
Args:
kappa : Threshold for step size selection :math:`\kappa`.
"""
self.kappa: float = kappa
self.xprev: Union[Array, BlockArray] = None
self.gradprev: Union[Array, BlockArray] = None
self.Lbb1prev: Optional[float] = None
self.Lbb2prev: Optional[float] = None
[docs] def update(self, v: Union[Array, BlockArray]) -> float:
"""Update the reciprocal of the step size.
Args:
v: Current solution or current extrapolation (if accelerated
PGM).
Returns:
Updated reciprocal of the step size.
"""
if self.xprev is None:
# Solution and gradient of previous iterate are required.
# For first iteration these variables are stored and current estimate is returned.
self.xprev = v
self.gradprev = self.pgm.f.grad(self.xprev)
L = self.pgm.L
else:
Δx = v - self.xprev
gradv = self.pgm.f.grad(v)
Δg = gradv - self.gradprev
# Taking real part of inner products in case of complex-value problem.
innerxx = snp.real(snp.sum(Δx.conj() * Δx))
innerxg = snp.real(snp.sum(Δx.conj() * Δg))
innergg = snp.real(snp.sum(Δg.conj() * Δg))
Lbb1 = innerxg / innerxx
# Revert to previous iterate if computation results in nan or negative value.
if snp.isnan(Lbb1) or Lbb1 <= 0.0:
Lbb1 = self.Lbb1prev
Lbb2 = innergg / innerxg
# Revert to previous iterate if computation results in nan or negative value.
if snp.isnan(Lbb2) or Lbb2 <= 0.0:
Lbb2 = self.Lbb2prev
# If possible, apply adaptive selection rule, if not, revert to previous iterate
if Lbb1 is not None and Lbb2 is not None:
if (Lbb1 / Lbb2) < self.kappa:
L = Lbb2
else:
L = Lbb1
else:
L = self.pgm.L
# Store current state and gradient for next update.
self.xprev = v
self.gradprev = gradv
# Store current estimates of Barzilai-Borwein 1 (Lbb1) and Barzilai-Borwein 2 (Lbb2).
self.Lbb1prev = Lbb1
self.Lbb2prev = Lbb2
return L
class LineSearchStepSize(PGMStepSize):
r"""Line search for estimating the step size for PGM solvers.
Line search for estimating the reciprocal of step size for PGM
solvers. The line search strategy described in :cite:`beck-2009-fast`
estimates :math:`L` such that :math:`f(\mb{x}) <= \hat{f}_{L}(\mb{x})`
is satisfied with :math:`\hat{f}_{L}` a quadratic approximation to
:math:`f` defined as
.. math::
\hat{f}_{L}(\mb{x}, \mb{y}) = f(\mb{y}) + \nabla f(\mb{y})^H
(\mb{x} - \mb{y}) + \frac{L}{2} \left\| \mb{x} - \mb{y}
\right\|_2^2 \;,
with :math:`\mb{x}` the potential new update and :math:`\mb{y}` the
current solution or current extrapolation (if accelerated PGM).
Attributes:
pgm (:class:`.PGM`): PGM solver object to which the solver is
attached.
"""
def __init__(self, gamma_u: float = 1.2, maxiter: int = 50):
r"""Initialize a :class:`LineSearchStepSize` object.
Args:
gamma_u: Rate of increment in :math:`L`.
maxiter: Maximum iterations in line search.
"""
self.gamma_u: float = gamma_u
self.maxiter: int = maxiter
def g_prox(v, gradv, L):
return self.pgm.g.prox(v - 1.0 / L * gradv, 1.0 / L)
self.g_prox = jax.jit(g_prox)
[docs] def update(self, v: Union[Array, BlockArray]) -> float:
"""Update the reciprocal of the step size.
Args:
v: Current solution or current extrapolation (if accelerated
PGM).
Returns:
Updated reciprocal of the step size.
"""
gradv = self.pgm.f.grad(v)
L = self.pgm.L
it = 0
while it < self.maxiter:
z = self.g_prox(v, gradv, L)
fz = self.pgm.f(z)
fquad = self.pgm.f_quad_approx(z, v, L)
if fz <= fquad:
break
else:
L *= self.gamma_u
it += 1
return L
class RobustLineSearchStepSize(LineSearchStepSize):
r"""Robust line search for estimating the accelerated PGM step size.
A robust line search for estimating the reciprocal of step size for
accelerated PGM solvers.
The robust line search strategy described in :cite:`florea-2017-robust`
estimates :math:`L` such that :math:`f(\mb{x}) <= \hat{f}_{L}(\mb{x})`
is satisfied with :math:`\hat{f}_{L}` a quadratic approximation to
:math:`f` defined as
.. math::
\hat{f}_{L}(\mb{x}, \mb{y}) = f(\mb{y}) + \nabla f(\mb{y})^H
(\mb{x} - \mb{y}) + \frac{L}{2} \left\| \mb{x} - \mb{y}
\right\|_2^2 \;,
with :math:`\mb{x}` the potential new update and :math:`\mb{y}` the
auxiliary extrapolation state.
Attributes:
pgm (:class:`.PGM`): PGM solver object to which the solver is
attached.
"""
def __init__(self, gamma_d: float = 0.9, gamma_u: float = 2.0, maxiter: int = 50):
r"""Initialize a :class:`RobustLineSearchStepSize` object.
Args:
gamma_d: Rate of decrement in :math:`L`.
gamma_u: Rate of increment in :math:`L`.
maxiter: Maximum iterations in line search.
"""
super(RobustLineSearchStepSize, self).__init__(gamma_u, maxiter)
self.gamma_d: float = gamma_d
self.Tk: float = 0.0
# State needed for computing auxiliary extrapolation sequence in robust line search.
self.Zrb: Union[Array, BlockArray] = None
#: Current estimate of solution in robust line search.
self.Z: Union[Array, BlockArray] = None
[docs] def update(self, v: Union[Array, BlockArray]) -> float:
"""Update the reciprocal of the step size.
Args:
v: Current solution or current extrapolation (if accelerated
PGM).
Returns:
Updated reciprocal of the step size.
"""
if self.Zrb is None:
self.Zrb = self.pgm.x
L = self.pgm.L * self.gamma_d
it = 0
while it < self.maxiter:
t = (1.0 + snp.sqrt(1.0 + 4.0 * L * self.Tk)) / (2.0 * L)
T = self.Tk + t
# Auxiliary extrapolation sequence.
y = (self.Tk * self.pgm.x + t * self.Zrb) / T
# New update based on auxiliary extrapolation and current L estimate.
z = self.pgm.x_step(y, L)
fz = self.pgm.f(z)
fquad = self.pgm.f_quad_approx(z, y, L)
if fz <= fquad:
break
else:
L *= self.gamma_u
it += 1
self.Tk = T
self.Zrb += t * L * (z - y)
self.Z = z
return L