None
import numpy as np
c = np.array([ 2,3,4,5 ])
b = np.array([1,1,1])
A = np.array([[1, 0, 1, 0 ],
[1, 0, 0, 1],
[0, 1, 1, 1]])
For the sake of reference, let's solve first the original problem and its linear relaxation with gurobi. We use a matrix representation of the problem that can also be handled by gurobi.
import gurobipy as gp
def solve_SCP(vtype=gp.GRB.BINARY):
try:
# Model
m = gp.Model("SCP")
m.setParam(gp.GRB.Param.OutputFlag, 0)
# Create variables
x = m.addMVar(shape=c.shape[0], vtype=vtype, name="x")
# Set objective
m.setObjective(c @ x, gp.GRB.MINIMIZE)
# Add constraints
m.addConstr(A @ x >= b, name="c")
# Optimize model
m.optimize()
print(x.X)
print('Obj: %g' % m.objVal)
except gp.GurobiError as e:
print('Error code ' + str(e.errno) + ": " + str(e))
except AttributeError:
print('Encountered an attribute error')
solve_SCP()
solve_SCP(vtype=gp.GRB.CONTINUOUS)
We relax all three constraints: $$ \begin{aligned} z_{LB}=\min &\sum_{j=1}^n c_j x_j - \sum_{i=1}^m\lambda_i\left (\sum_{j=1}^n a_{ij} x_j-1\right )\\ &x_j\in \{0,1\} \qquad j=1,...,n \end{aligned} $$ $$ \begin{aligned} z_{LB}=\min &\sum_{j=1}^n\left [c_j - \sum_{i=1}^n \lambda_i a_{ij}\right ]x_j + \sum^m_{i=1}\lambda_i\\ &x_j\in \{0,1\} \qquad j=1,...,n \end{aligned} $$
Defining $C_j = [c_j - \sum_{i=1}^m \lambda_i a_{ij}]$ $j=1,...,n$ i.e. $C_j$ is the coefficient of $x_j$ in the objective function of LR we have that LR becomes: $$ \begin{aligned} z_{LB}=\min &\sum_j C_jx_j + \sum_{i=1}^m \lambda_i\\ & x_j\in \{0,1\} \qquad j=1,...,n \end{aligned} $$
Now the solution $x_j$ to LR can be found by inspection, namely: $$ x_j= \begin{cases} 1 &if C_j \leq 0\\ 0 &\text{otherwise} \end{cases} $$ with the solution value $z_{LB}$ of LR being given by: $$z_{LB} = \sum_{j=1}^n C_jx_j + \sum_{i=1}^m\lambda_i$$ where $z_{LB}$ is a lower bound on the optimal solution to the original SCP.
def solve_lagrangian(lambdas):
#print("solve with lambda: ",lambdas)
C = c - lambdas @ A
x=np.zeros(c.shape[0])
x[C<=0]=1
z_LB = np.sum(C@x) + np.sum(lambdas)
return (x,z_LB)
lambdas = np.array([1.5,1.6,2.2])
solve_lagrangian(lambdas)
The solution is not feasible for the original problem since the third row is not covered. Hence, it cannot be either optimal.
A common fallacy in Lagrangean relaxation is to believe that, if the solution to LR is feasible for the original problem, then it is also optimal for the original problem. This is incorrect.
solve_lagrangian([10,10,10])
The solution is feasible but not optimal for the original problem, indeed substituting $x$ in the objective function of the original problem we obtain a value of 14, which is not the optimal value (we know that the optimal value is 5).
A solution $x$ to a Lagrangean lower bound program is only optimal for the original problem if:
$x$ is feasible for the original problem; and
$cx = [cX + \lambda(b - AX)]$ i.e. $\lambda(b - Ax) = 0$
If we are relaxing equality constraints ($Ax=b$) then any solution to the Lagrangean lower bound program which is feasible for the original problem automatically satisfies both 1. and 2. above and so is optimal. But with inequality constraints we can have 1. without 2.
We now set out to find the values of $\vec \lambda$ that yield the best lower bound from the Lagrangian lower bound problem. That is, we solve the Lagrangian dual problem. Note that the text writes the update of $\lambda$ as: $$\lambda_i=\max\{\lambda_i+\theta \lambda_i,0\} \text{ for } i=1,..,m$$ while in the slides we write: $$\lambda_i=\max\{\lambda_i-\theta \lambda_i,0\} \text{ for } i=1,..,m$$ The first form is for a maximization problem, as is the case here, while the second for a minimization problem.
We will need to calculate an upper bound. For this we design a heuristic procedure that can also be used to derive a feasible solution for the original problem from a solution for the LR problem.
def heuristic_sol(x):
# inefficient implementation
x_h=np.copy(x)
uncovered = A @ x_h - b < 0
while np.any(uncovered):
i = np.where(uncovered)[0][0]
potential=[j for j in range(A.shape[1]) if A[i,j] > 0 and x_h[j]<1]
x_h[potential[np.argmin(c[potential])]]=1
uncovered = A @ x_h - b < 0
z_UB = c @ x_h
return x_h, z_UB
x_h, z_UB =heuristic_sol(np.array([0,0,0,0]))
x_h, z_UB
Here the solution found is already optimal.
import math
def subgradient_iteration(lambdas, z_UB, epsilon=0.1, mu=2):
x, z_LR = solve_lagrangian(lambdas)
subgradient = b - A @ x
if np.linalg.norm(subgradient)<0.002:
return x, z_LR, subgradient, lambdas
theta = mu * ( z_UB - z_LR )/(np.sum(subgradient**2))
for i in range(lambdas.shape[0]):
if (np.abs(subgradient[i])>epsilon):
lambdas[i] = max(lambdas[i] + theta * subgradient[i], 0)
print(f"subgradient: {subgradient}, step: {subgradient*theta}, lambdas: {lambdas}")
return x, z_LR, subgradient, lambdas
Let's try one single iteration of the subgradient optimization algorithm so that we can check the calculations by hand:
subgradient_iteration(np.array([1.5,1.6,2.2]),6)
The iterative procedure is given below
%matplotlib inline
import matplotlib.pyplot as plt
def solve_lagrangian_dual(UB=math.inf, lambdas=np.zeros(A.shape[0],dtype='float'), mu=2, iterations=10):
z_LR=np.zeros(iterations)
z_LB=np.zeros(iterations)
z_HR=np.zeros(iterations,dtype='float')
z_UB=np.zeros(iterations+1,dtype='float')
_lambdas = [lambdas]
z_LB[-1]=-math.inf #float('-inf')
if UB == solve_lagrangian_dual.__defaults__[0]:
z_UB[0] = heuristic_sol(np.zeros(c.shape[0]))[1]
else:
z_UB[0]=UB
for t in range(iterations):
#print(_lambdas)
x, z_LR[t], subgradient, lambdas = subgradient_iteration(lambdas,z_UB[t])
if np.linalg.norm(subgradient)<0.002:
iterations=t
break
z_HR[t] = heuristic_sol(x)[1]
#print((z_LR[t],z_LB[t-1]))
z_LB[t] = max(z_LR[t],z_LB[t-1])
z_UB[t+1] = min(z_HR[t],z_UB[t])
_lambdas.append(lambdas.copy())
if t % math.ceil(iterations/2) == 0:
print("mu halved")
mu=mu/2
#print(_lambdas)
#plt.subplot(3, 1, 1)
plt.plot(range(iterations), z_LR[:iterations], marker='o',color='blue',label="z_LR")
plt.plot(range(iterations), z_LB[:iterations], marker='o',color='red',label="z_LB")
plt.plot(range(iterations), z_UB[:iterations], marker='o',color='green',label="z_UB")
plt.plot(range(iterations), z_HR[:iterations], marker='o',color='magenta',label="z_HR")
plt.ylabel('z_LR')
plt.xlabel('iteration')
plt.legend()
plt.show()
#plt.subplot(3, 1, 1)
plt.plot(range(iterations), [x[0] for x in _lambdas[:-1]], 'o-')
plt.plot(range(iterations), [x[1] for x in _lambdas[:-1]], 'o-')
plt.plot(range(iterations), [x[2] for x in _lambdas[:-1]], 'o-')
plt.xlabel('iteration')
plt.ylabel('lambdas')
plt.show()
#max(0.0, -math.inf)
solve_lagrangian_dual(UB=6,lambdas=np.array([1.,1.,1.]),mu=1)
After some tuning for the initial parameters we get the desired outcome: the Lagrangian Dual converges towards a maximum that for this problem corresponds to the optimal solution of the original problem. Note that this is a lucky case. Indeed, there are two conditions that appear here: