openopt · matteofrigo · Mar 29, 2021 · Apr 2, 2021 · Apr 3, 2021 · Apr 6, 2021
diff --git a/copt/penalty.py b/copt/penalty.py
@@ -55,14 +55,128 @@ def _prox_L1(x, i, indices, indptr, d, step_size):
 
 class GroupL1:
     """
-    Group Lasso penalty
+    Group Lasso penalty for non-overlapping groups
 
     Args:
         alpha: float
             Constant multiplying this loss
 
         blocks: list of lists
 
+        weights: (optional)
+            - If not passed, each group gets the same penalty (=1). (default)
+            - If 'nf', each groups gets a penalty equal to the square root of
+              the number of features indexed in it.
+            - If 'nfi', each group gets a penalty equal to the inverse of the
+              square root of the number of features indexed in it.
+            - If iterable, the i-th group gets a penalty equal to the i-th
+              entry of the passed iterable.
+
+    """
+
+    def __init__(self, alpha, groups, weights=None):
+        self.alpha = alpha
+
+        sum_groups = np.sum([len(g) for g in groups])
+        all_indices = list(groups[0])
+        for g in groups[1:]:
+            all_indices.extend(list(g))
+        n_unique = np.unique(all_indices).size
+        if sum_groups != n_unique:
+            raise ValueError('Groups must not overlap.')
+        self.groups = [list(g) for g in groups]
+
+        if weights is None:
+            self.weights = np.ones(len(self.groups), dtype=np.float32)
+        elif isinstance(weights, str):
+            if weights == 'nf':
+                self.weights = np.asarray([np.sqrt(len(g)) for g in
+                                           self.groups])
+            elif weights == 'nfi':
+                self.weights = np.asarray([1 / np.sqrt(len(g)) for g in
+                                           self.groups])
+        else:
+            if len(weights) != len(self.groups):
+                raise ValueError('Length of weights must be equal to number '
+                                 'of groups.')
+            self.weights = np.asarray(weights)
+
+    def __call__(self, x):
+        return self.alpha * np.sum([w * np.linalg.norm(x[g]) for w, g in
+                                    zip(self.weights, self.groups)])
+
+    def prox(self, x, step_size):
+        out = x.copy()
+        for w, g in zip(self.weights, self.groups):
+            norm = np.linalg.norm(x[g])
+            r = w * self.alpha * step_size
+            if norm > r:
+                out[g] -= r * out[g] / norm
+            else:
+                out[g] = 0
+        return out
+
+    def prox_factory(self, n_features):
+        B_data = np.zeros(n_features)
+        B_indices = np.zeros(n_features, dtype=np.int32)
+        B_indptr = np.zeros(n_features + 1, dtype=np.int32)
+
+        feature_pointer = 0
+        block_pointer = 0
+        for g in self.groups:
+            for atom in g:
+                B_data[feature_pointer] = 1.
+                B_indices[feature_pointer] = atom
+                feature_pointer += 1
+            B_indptr[block_pointer + 1] = B_indptr[block_pointer] + len(g)
+            block_pointer += 1
+
+        excluded_indices = np.ones(n_features, dtype=np.int32)
+        excluded_indices[B_indices[: feature_pointer + 1]] = 0.
+        for i in np.where(excluded_indices)[0]:
+            B_data[feature_pointer] = -1.
+            B_indices[feature_pointer] = i
+            feature_pointer += 1
+
+            B_indptr[block_pointer + 1] = B_indptr[block_pointer] + 1
+            block_pointer += 1
+
+        B_indptr = B_indptr[: block_pointer + 1]
+        B = sparse.csr_matrix((B_data, B_indices, B_indptr))
+
+        alpha = self.alpha
+
+        @njit
+        def _prox_gl(x, i, indices, indptr, d, step_size):
+            for b in range(indptr[i], indptr[i + 1]):
+                h = indices[b]
+                if B_data[B_indices[B_indptr[h]]] <= 0:
+                    continue
+                ss = step_size * d[h]
+                norm = 0.0
+                for j in range(B_indptr[h], B_indptr[h + 1]):
+                    j_idx = B_indices[j]
+                    norm += x[j_idx] ** 2
+                norm = np.sqrt(norm)
+                if norm > alpha * ss * self.weights[h]:
+                    for j in range(B_indptr[h], B_indptr[h + 1]):
+                        j_idx = B_indices[j]
+                        x[j_idx] *= 1 - alpha * ss / norm
+                else:
+                    for j in range(B_indptr[h], B_indptr[h + 1]):
+                        j_idx = B_indices[j]
+                        x[j_idx] = 0.0
+
+        return _prox_gl, B
+
+
+class OGroupL1:
+    """
+    Overlapping Group Lasso penalty
+    Args:
+        alpha: float
+            Constant multiplying this loss
+        blocks: list of lists
     """
 
     def __init__(self, alpha, groups):
@@ -81,7 +195,6 @@ def __call__(self, x):
     def prox(self, x, step_size):
         out = x.copy()
         for g in self.groups:
-
             norm = np.linalg.norm(x[g])
             if norm > self.alpha * step_size:
                 out[g] -= step_size * self.alpha * out[g] / norm
@@ -304,4 +417,4 @@ def prox(self, x, step_size):
             self.n_cols,
             max_iter=self.max_iter,
             tol=self.tol,
-        )
+        )
diff --git a/tests/test_penalties.py b/tests/test_penalties.py
@@ -1,14 +1,15 @@
 import numpy as np
-import copt as cp
+import pytest
+from numpy import testing
+
 import copt.constraint
 import copt.penalty
 from copt import tv_prox
-from numpy import testing
-import pytest
 
 proximal_penalties = [
     copt.penalty.L1Norm(1.0),
     copt.penalty.GroupL1(1.0, np.array_split(np.arange(16), 5)),
+    copt.penalty.OGroupL1(1.0, np.array_split(np.arange(16), 5)),
     copt.penalty.TraceNorm(1.0, (4, 4)),
     copt.constraint.TraceBall(1.0, (4, 4)),
     copt.penalty.TotalVariation2D(1.0, (4, 4)),
@@ -17,8 +18,92 @@
 
 
 def test_GroupL1():
+    # non-overlapping
+    groups = [(0, 1), (1, 2)]
+    with np.testing.assert_raises(ValueError):
+        copt.penalty.GroupL1(1.0, groups)
+
+    # converts group type from tuple to list
+    groups = [(0, 1), (2, 3)]
+    pen = copt.penalty.GroupL1(1.0, groups)
+    for g in pen.groups:
+        np.testing.assert_(isinstance(g, list))
+
+    # same number of groups and weights
+    groups = [[0, 1], [2, 3]]
+    weights = [1, 2, 3]
+    with np.testing.assert_raises(ValueError):
+        copt.penalty.GroupL1(1.0, groups, weights)
+
+    # evaluation of penalty and prox
+    x = np.array([0.01, 0.5, 3, 4])
+    weights = np.array([10, .2])
+    g0 = copt.penalty.GroupL1(1, groups, weights)
+    # eval
+    result = g0(x)
+    gt = (weights[0] * np.linalg.norm(x[groups[0]], 2) +
+          weights[1] * np.linalg.norm(x[groups[1]], 2))
+    np.testing.assert_almost_equal(result, gt)
+    # prox
+    gt = x.copy()
+    # the first group has norm lower than the corresponding weight
+    gt[groups[0]] = 0
+    # the second group has norm higher than the corresponding weight
+    gt[groups[1]] -= (x[groups[1]] * weights[1] /
+                      np.linalg.norm(x[groups[1]]))
+    prox = g0.prox(x, 1)
+    np.testing.assert_almost_equal(prox, gt)
+
+    # default weights
+    g1 = copt.penalty.GroupL1(1, groups)
+    gt = np.array([1., 1])
+    np.testing.assert_almost_equal(g1.weights, gt)
+
+    # weights: sqrt(|g|)
+    g2 = copt.penalty.GroupL1(1.0, groups, 'nf')
+    gt = np.array([np.sqrt(2), np.sqrt(2)])
+    np.testing.assert_almost_equal(g2.weights, gt)
+
+    # weights: sqrt(|g|) ** -1
+    g3 = copt.penalty.GroupL1(1.0, groups, 'nfi')
+    gt = 1. / gt
+    np.testing.assert_almost_equal(g3.weights, gt)
+
+    # custom weights
+    gt = np.random.rand(len(groups))
+    g4 = copt.penalty.GroupL1(1.0, groups, gt)
+    np.testing.assert_almost_equal(g4.weights, gt)
+    expected = (np.linalg.norm(x[[0, 1]]) * gt[0] +
+                np.linalg.norm(x[[2, 3]]) * gt[1])
+    np.testing.assert_almost_equal(g4(x), expected)
+
+    # sparse proximal
+    _, B = g1.prox_factory(5)
+    gt = np.array(
+        [
+            [1.0, 1.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 1.0, 1.0, 0.0],
+            [0.0, 0.0, 0.0, 0.0, -1.0],
+        ]
+    )
+    np.testing.assert_almost_equal(B.toarray(), gt)
+
+    groups = [(0, 1), (3, 4)]
+    g5 = copt.penalty.GroupL1(1.0, groups)
+    _, B = g5.prox_factory(5)
+    gt = np.array(
+        [
+            [1.0, 1.0, 0.0, 0.0, 0.0],
+            [0.0, 0.0, 0.0, 1.0, 1.0],
+            [0.0, 0.0, -1.0, 0.0, 0.0],
+        ]
+    )
+    np.testing.assert_almost_equal(B.toarray(), gt)
+
+
+def test_OverlappingGroupL1():
     groups = [(0, 1), (2, 3)]
-    g1 = copt.penalty.GroupL1(1.0, groups)
+    g1 = copt.penalty.OGroupL1(1.0, groups)
     _, B = g1.prox_factory(5)
     assert np.all(
         B.toarray()
@@ -32,7 +117,7 @@ def test_GroupL1():
     )
 
     groups = [(0, 1), (3, 4)]
-    g2 = copt.penalty.GroupL1(1.0, groups)
+    g2 = copt.penalty.OGroupL1(1.0, groups)
     _, B = g2.prox_factory(5)
     assert np.all(
         B.toarray()
@@ -46,7 +131,6 @@ def test_GroupL1():
     )
 
 
-#
 #     for blocks in [[(0, 1), (2, 3)], ]:
 #         pen = cp.utils.GroupL1(1., blocks)
 #         counter = 0
@@ -99,7 +183,8 @@ def tv_norm(x, n_rows, n_cols):
 
     for nrun in range(20):
         x = np.random.randn(n_features)
-        x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10, max_iter=10000)
+        x_next = tv_prox.prox_tv2d(x, gamma, n_rows, n_cols, tol=1e-10,
+                                   max_iter=10000)
         diff_obj = tv_norm(x, n_rows, n_cols) - tv_norm(x_next, n_rows, n_cols)
         testing.assert_array_less(
             ((x - x_next) ** 2).sum() / gamma, (1 + epsilon) * diff_obj
@@ -137,8 +222,8 @@ def test_three_inequality(pen):
 
         lhs = 2 * (pen(xi) - pen(u))
         rhs = (
-            np.linalg.norm(u - z) ** 2
-            - np.linalg.norm(u - xi) ** 2
-            - np.linalg.norm(xi - z) ** 2
+                np.linalg.norm(u - z) ** 2
+                - np.linalg.norm(u - xi) ** 2
+                - np.linalg.norm(xi - z) ** 2
         )
         assert lhs <= rhs, pen
diff --git a/tests/test_randomized.py b/tests/test_randomized.py
@@ -195,8 +195,8 @@ def test_vrtos_ogl():
     groups_2 = [np.arange(5, 10)]
     f = copt.loss.LogLoss(A, b, alpha)
     for beta in np.logspace(-3, 3, 3):
-        p_1 = copt.penalty.GroupL1(beta, groups_1)
-        p_2 = copt.penalty.GroupL1(beta, groups_2)
+        p_1 = copt.penalty.OGroupL1(beta, groups_1)
+        p_2 = copt.penalty.OGroupL1(beta, groups_2)
         L = cp.utils.get_max_lipschitz(A, "logloss") + alpha / density
 
         opt_vrtos = cp.minimize_vrtos(