Use CVXPY to solve convex problem but got optimal-inaccurate result

380 Views Asked by At

When I use the cvxpy to solve the convex problem below,I get a confusing result, the source code is written like below:

import numpy as np
import cvxpy as cp

eta = 10e-27

# CVXPY to solve convex problem
def mycvx1(A,ds,ts,tr,Cm):

    M=A.shape[0]
    N=A.shape[1]

    def sum_cost(f):
        sum_cost=0
        for m in range(M):
            for n in range(N):
                sum_cost+=A[m][n]*(ts[0][n]*eta*f[m][n]**2+ds[0][n]/tr[m][n]+ts[0][n]*cp.inv_pos(f[m][n])) # the first optimization goal
                #sum_cost += A[m][n] * (ts[0][n] * eta * f[m][n] ** 2 + ds[0][n] / tr[m][n] + ts[0][n] * cp.inv_pos(f[m][n])) + (1-A[m][n])*f[m][n]**2 # the second optimization goal for mycvx2 function
        return sum_cost

    f=cp.Variable((M,N))
    objective = cp.Minimize(sum_cost(f))
    constraints = [ f >= 0 , cp.sum(f,axis=1) <= Cm.reshape((M,))]
    prob = cp.Problem(objective, constraints)
    prob.solve()
    print(prob.status)
    return prob.value,f.value

def mycvx2(A,ds,ts,tr,Cm):

    M=A.shape[0]
    N=A.shape[1]

    def sum_cost(f):
        sum_cost=0
        for m in range(M):
            for n in range(N):
                # the only difference from mycvx1
                sum_cost += A[m][n] * (ts[0][n] * eta * f[m][n] ** 2 + ds[0][n] / tr[m][n] + ts[0][n] * cp.inv_pos(f[m][n])) + (1-A[m][n])*f[m][n]**2

        return sum_cost


    f=cp.Variable((M,N))
    objective = cp.Minimize(sum_cost(f))
    constraints = [ f >= 0 , cp.sum(f,axis=1) <= Cm.reshape((M,))]
    prob = cp.Problem(objective, constraints)
    prob.solve()
    print(prob.status)
    return prob.value,f.value

# These are used for verification(The same calculation as the above two optimization target)
def sumcost1(A,ds,ts,tr,f):
    sum_cost=0
    for m in range(M):
        for n in range(N):
            sum_cost += A[m][n]*(ts[0][n]*eta*f[m][n]**2+ds[0][n]/tr[m][n]+ts[0][n]/f[m][n])
    return sum_cost

def sumcost2(A,ds,ts,tr,f):
    sum_cost=0
    for m in range(M):
        for n in range(N):
            sum_cost += A[m][n] * (
                        ts[0][n] * eta * f[m][n] ** 2 + ds[0][n] / tr[m][n] + ts[0][n] /f[m][n]) + (
                                    1 - A[m][n]) * f[m][n] ** 2
    return sum_cost

if __name__ == "__main__":
    M = 1
    N = 2
    A = np.array([[1, 0]])
    Task_L = np.random.uniform(500 * 8, 2000 * 8, [1, N])
    Task_rho = np.random.uniform(10, 20, [1, N])
    C_m = np.random.uniform(5e6, 20e6, [1, M])

    min = 50000
    max = 300000
    trans_rate = np.random.uniform(min, max, [M, N])

    cost,fre=mycvx1(A,Task_L,Task_L*Task_rho,trans_rate,C_m)
    cost,fre2=mycvx2(A,Task_L,Task_L*Task_rho,trans_rate,C_m) #mycvx2 function  just change the optimization goal to the second optimization goal in mycvx1
    print('cost:',cost)
    print('fre:',fre)
    print('cost2',cost)
    print('fre2',fre2)

    print(sumcost1(A,Task_L,Task_L*Task_rho,trans_rate,fre))
    print(sumcost2(A,Task_L,Task_L*Task_rho,trans_rate,fre2))

I have tried the two types of optimization goals, it's easy to see from the code it supposed to be same on the final optimization result and variables(The first element in the 'f' array) to be solved, because added part ((1-A[m][n])*f[m][n]**2) only is only to make sure that if A[m][n]=0,f[m][n]=0.However,I get the result:   

optimal_inaccurate
optimal_inaccurate
cost: 0.25349947243930165
fre: [[1420093.15693927 1420093.10825906]]
cost2 0.25349947243930165
fre2 [[1.68237567e+06 6.23146051e-03]]
0.2760806151859887(it's weird that this result different from the computed cost in the above line?)
0.25349947243930165

There exists two problem: first,the first element in variable 'f' is different. Moreover,I also substituted the solved variables into the original optimization goal and found that the calculation result is different from that given by the solver.

Is there anything wrong with my code? or is it related to 'optimal inaccurate' status and I should use another solver to solve the problem?

Besides, are there any better ways to implement the constraints that when A[m][n]=0,f[m][n]=0?

1

There are 1 best solutions below

6
Reinderien On

I'm less familiar with cvxpy so I demonstrate with scipy.

Your results are not exactly reproducible because you have not set a fixed random seed.

Your cost function can receive some significant simplification. It's basically two dot products with vectors that are constant over one instance of the problem. In this form the Jacobian is easy to calculate. I've written regression tests to demonstrate that this is equivalent. I run two tests - one with the same problem size as what you've shown and one larger.

Your constraint can be re-expressed as a linear product. If the problem size is much higher than what you've shown then it would be worth constructing the constraint's A from the sparse library.

import numpy as np
from numpy.random import default_rng, Generator
from scipy.optimize import check_grad, minimize, Bounds, LinearConstraint, OptimizeResult


def sum_cost(
    f: np.ndarray,
    Adstr: float,
    coef1: np.ndarray,
    coef2: np.ndarray,
) -> float:
    return coef1.dot(f**2) + coef2.dot(1/f) + Adstr


def cost_jac(
    f: np.ndarray,
    Adstr: float,
    coef1: np.ndarray,
    coef2: np.ndarray,
) -> np.ndarray:
    return 2*coef1*f - coef2/f**2


def mycvx1(
    A: np.ndarray,
    ds: np.ndarray,
    ts: np.ndarray,
    tr: np.ndarray,
    Cm: np.ndarray,
    eta: float = 1e-26,
) -> tuple[np.ndarray, OptimizeResult]:
    M, N = A.shape
    Adstr = (A/tr @ ds.T).sum()
    coef2 = (A*ts).ravel()
    coef1 = coef2 * eta

    #  A      x         Cm
    # (M MN) (MN 1) <= (M 1)
    constraint = LinearConstraint(
        A=np.repeat(np.eye(M), N, axis=1),
        ub=Cm.ravel(),
    )

    result = minimize(
        fun=sum_cost, jac=cost_jac,
        x0=np.ones(M*N),
        args=(Adstr, coef1, coef2),
        bounds=Bounds(lb=0),
        constraints=constraint,
        options={'maxiter': 1_000},
    )
    assert result.success, result.message
    f = result.x.reshape((M, N))

    # Debugging only
    const1 = f.sum(axis=1)
    const2 = constraint.A @ result.x
    assert np.allclose(const1, const2)
    assert np.all(Cm.ravel() - const1 >= -1e-6)

    return f, result


def regression_test(
    rand: Generator,
    A: np.ndarray,
    ds: np.ndarray,
    ts: np.ndarray,
    tr: np.ndarray,
) -> None:
    M, N = A.shape
    f = rand.uniform(0, 2, (M, N)).ravel()
    Adstr = (A/tr @ ds.T).sum()
    eta = 1e-9
    coef2 = (A*ts).ravel()
    coef1 = coef2 * eta

    err = check_grad(sum_cost, cost_jac, f, Adstr, coef1, coef2)
    against = np.abs(cost_jac(f, Adstr, coef1, coef2)).sum()
    relerr = err / against
    assert relerr < 1e-6

    actual = sum_cost(f, Adstr, coef1, coef2)
    expected = {
        (12, 17): 42776357.3605468,
        (1, 2): 41017.668055850976,
    }[A.shape]
    assert np.isclose(actual, expected, rtol=1e-15, atol=0)


def test(M: int = 1, N: int = 2) -> None:
    rand = default_rng(seed=0)

    if M == 2 and N == 1:
        A = np.array([(1, 0)])
    else:
        A = rand.uniform(low=0, high=1, size=(M, N))

    Task_L = rand.uniform(500*8, 2_000*8, (1, N))
    Task_rho = rand.uniform(10, 20, (1, N))
    C_m = rand.uniform(5e6, 20e6, (1, M))
    trans_rate = rand.uniform(50_000, 300_000, (M, N))
    ds = Task_L
    ts = Task_L * Task_rho
    tr = trans_rate

    regression_test(rand, A, ds, ts, tr)

    x, result = mycvx1(A=A, ds=ds, ts=ts, tr=tr, Cm=C_m)
    print(f'{result.message} in {result.nit} iterations; cost={result.fun:.5f}')


if __name__ == '__main__':
    test(12, 17)
    test()
Optimization terminated successfully in 192 iterations; cost=405.34608
Optimization terminated successfully in 52 iterations; cost=0.02895

In fact, for your example data, the better strategy is to set x0 to be the analytic optimum (based on the gradient) when possible, or right on the constraint hyperplane otherwise. It gives a lower cost and only iterates once. This may change for more realistic data. The effect is most informative with higher Cm that are on the same order of magnitude as (0.5/eta)**(1/3).

import numpy as np
from numpy.random import default_rng, Generator
from scipy.optimize import check_grad, minimize, Bounds, LinearConstraint, OptimizeResult


def sum_cost(
    f: np.ndarray,
    Adstr: float,
    coef1: np.ndarray,
    coef2: np.ndarray,
) -> float:
    return coef1.dot(f**2) + coef2.dot(1/f) + Adstr


def cost_jac(
    f: np.ndarray,
    Adstr: float,
    coef1: np.ndarray,
    coef2: np.ndarray,
) -> np.ndarray:
    return 2*coef1*f - coef2/f**2


def mycvx1(
    A: np.ndarray,
    ds: np.ndarray,
    ts: np.ndarray,
    tr: np.ndarray,
    Cm: np.ndarray,
    eta: float = 1e-26,
) -> tuple[np.ndarray, OptimizeResult]:
    M, N = A.shape
    Adstr = (A/tr @ ds.T).sum()
    coef2 = (A*ts).ravel()
    coef1 = coef2 * eta

    #  A      f         Cm
    # (M MN) (MN 1) <= (M 1)
    constraint = LinearConstraint(
        A=np.repeat(np.eye(M), N, axis=1),
        ub=Cm.ravel(),
    )

    # Use analytic optimum when it obeys the constraint
    # Use Cm otherwise.
    optimum = (0.5/eta)**(1/3)
    x0 = np.broadcast_to(
        np.minimum(Cm.T/N, optimum),
        shape=(M, N),
    )

    result = minimize(
        fun=sum_cost, jac=cost_jac,
        x0=x0.ravel(),
        args=(Adstr, coef1, coef2),
        bounds=Bounds(lb=0),
        constraints=constraint,
    )
    assert result.success, result.message
    f = result.x.reshape((M, N))

    # Debugging only
    const1 = f.sum(axis=1)
    const2 = constraint.A @ result.x
    assert np.allclose(const1, const2)
    assert np.all(Cm.ravel() - const1 >= -1e-15)

    if np.allclose(f, x0):
        print('Warning: f == initial; no optimization needed')

    return f, result


def regression_test(
    rand: Generator,
    A: np.ndarray,
    ds: np.ndarray,
    ts: np.ndarray,
    tr: np.ndarray,
) -> None:
    M, N = A.shape
    f = rand.uniform(0, 2, (M, N)).ravel()
    Adstr = (A/tr @ ds.T).sum()
    eta = 1e-9
    coef2 = (A*ts).ravel()
    coef1 = coef2 * eta

    err = check_grad(sum_cost, cost_jac, f, Adstr, coef1, coef2)
    against = np.abs(cost_jac(f, Adstr, coef1, coef2)).sum()
    relerr = err / against
    assert relerr < 1e-6

    actual = sum_cost(f, Adstr, coef1, coef2)
    expected = {
        (12, 17): 42776357.3605468,
        (1, 2): 41017.668055850976,
    }[A.shape]
    assert np.isclose(actual, expected, rtol=1e-15, atol=0)


def test(M: int = 1, N: int = 2) -> None:
    rand = default_rng(seed=0)

    if M == 2 and N == 1:
        A = np.array([(1, 0)])
    else:
        A = rand.uniform(low=0, high=1, size=(M, N))

    Task_L = rand.uniform(500*8, 2_000*8, (1, N))
    Task_rho = rand.uniform(10, 20, (1, N))
    C_m = rand.uniform(1e9, 1e10, (1, M))
    trans_rate = rand.uniform(50_000, 300_000, (M, N))
    ds = Task_L
    ts = Task_L * Task_rho
    tr = trans_rate

    regression_test(rand, A, ds, ts, tr)

    x, result = mycvx1(A=A, ds=ds, ts=ts, tr=tr, Cm=C_m)
    print(f'{result.message} in {result.nit} iterations; cost={result.fun:.5f}')


if __name__ == '__main__':
    test(12, 17)
    test()
Warning: f == initial; no optimization needed
Optimization terminated successfully in 1 iterations; cost=7.98333
Warning: f == initial; no optimization needed
Optimization terminated successfully in 1 iterations; cost=0.01870