Add support for predictions without the k budget constraint

mwydmuch · Jan 23, 2024 · 3271e3e · 3271e3e
1 parent 42e1179
commit 3271e3e
Show file tree

Hide file tree

Showing 7 changed files with 273 additions and 427 deletions.
diff --git a/xcolumns/block_coordinate.py b/xcolumns/block_coordinate.py
@@ -4,7 +4,7 @@
 from typing import Callable
 
 import numpy as np
-from scipy.sparse import csr_matrix, load_npz, save_npz
+from scipy.sparse import csr_matrix
 from tqdm import tqdm, trange
 
 from .default_types import *
@@ -78,6 +78,7 @@ def _calculate_utility(
 
     return utility_aggregation_func(bin_utilities)
 
+
 def _calculate_binary_gains(
     bin_utility_func,
     p_Etp: np.ndarray,
@@ -161,11 +162,14 @@ def bc_with_0approx_np_step(
     if maximize:
         gains = -gains
 
-    top_k = np.argpartition(gains, k)[:k]
-
     # Update prediction
     y_pred_i[:] = 0.0
-    y_pred_i[top_k] = 1.0
+
+    if k > 0:
+        top_k = np.argpartition(gains, k)[:k]
+        y_pred_i[top_k] = 1.0
+    else: 
+        y_pred_i[gains <= 0] = 1.0
 
     # Update local confusion matrix
     if not only_pred:
@@ -496,13 +500,13 @@ def _calculate_coverage_utility(
 
     cov_utility = 1 - np.mean(Ef)
     if alpha < 1:
-        precision_at_k = 0
-        if isinstance(y_proba, np.ndarray):
-            precision_at_k = (np.sum(y_pred * y_proba, axis=0) / n / k).sum()
-        elif isinstance(y_proba, csr_matrix):
-            precision_at_k = (
-                np.asarray(calculate_tp_csr(y_proba, y_pred) / n / k).ravel().sum()
-            )
+        precision_at_k = (calculate_tp(y_proba, y_pred) / n / k).sum()
+        # if isinstance(y_proba, np.ndarray):
+        #     precision_at_k = (np.sum(y_pred * y_proba, axis=0) / n / k).sum()
+        # elif isinstance(y_proba, csr_matrix):
+        #     precision_at_k = (
+        #         np.asarray(calculate_tp_csr(y_proba, y_pred) / n / k).ravel().sum()
+        #     )
         cov_utility = alpha * cov_utility + (1 - alpha) * precision_at_k
 
     return cov_utility
@@ -567,7 +571,7 @@ def bc_coverage(
             if isinstance(y_proba, np.ndarray):
                 Ef = np.product(1 - y_pred * y_proba, axis=0)
             elif isinstance(y_proba, csr_matrix):
-                Ef = numba_calculate_prod_1_sparse_mat_mul_ones_minus_mat(
+                Ef = numba_calculate_prod_0_sparse_mat_mul_ones_minus_mat(
                     *unpack_csr_matrices(y_pred, y_proba), n, m
                 )
 

diff --git a/xcolumns/frank_wolfe.py b/xcolumns/frank_wolfe.py
@@ -6,6 +6,7 @@
 from scipy.sparse import csr_matrix
 
 from .default_types import *
+from .metrics_on_conf_matrix import *
 from .utils import *
 from .weighted_prediction import predict_weighted_per_instance
 
@@ -17,7 +18,7 @@ def _get_grad_as_numpy(t):
         return np.zeros(t.shape, dtype=FLOAT_TYPE)
 
 
-def _utility_func_with_gradient(utility_func, tp, fp, fn, tn):
+def utility_func_with_gradient(utility_func, tp, fp, fn, tn):
     tp = torch.tensor(tp, requires_grad=True, dtype=TORCH_FLOAT_TYPE)
     fp = torch.tensor(fp, requires_grad=True, dtype=TORCH_FLOAT_TYPE)
     fn = torch.tensor(fn, requires_grad=True, dtype=TORCH_FLOAT_TYPE)
@@ -58,8 +59,8 @@ def find_optimal_randomized_classifier_using_frank_wolfe(
     init_classifier: Union[str, Tuple[np.ndarray, np.ndarray]] = "topk", # or "random"
     search_for_best_alpha: bool = True,
     alpha_search_algo: str = "lin",
-    alpha_eps: float = 0.001,
-    alpha_lin_search_step: float = 0.001,
+    alpha_eps: float = 0.00001,
+    alpha_lin_search_step: float = 0.00001,
     skip_tn=False,
     verbose: bool = True,
     return_meta: bool = False,
@@ -84,9 +85,10 @@ def find_optimal_randomized_classifier_using_frank_wolfe(
     log(f"  Initializing initial {init_classifier} classifier ...")
     if init_classifier == "topk":
         classifiers_a[0] = np.ones(m, dtype=FLOAT_TYPE)
+        classifiers_b[0] = np.full(m, -0.5, dtype=FLOAT_TYPE)
     elif init_classifier == "random":
-        classifiers_a[0] = np.random.rand(m, dtype=FLOAT_TYPE)
-        classifiers_b[0] = np.random.rand(m, dtype=FLOAT_TYPE)
+        classifiers_a[0] = np.random.rand(m)
+        classifiers_b[0] = np.random.rand(m)
     else:
         raise ValueError(f"Unsuported type of init_classifier: {init_classifier}")
     y_pred_i = predict_weighted_per_instance(y_proba, k, a=classifiers_a[0], b=classifiers_b[0])
@@ -107,11 +109,11 @@ def find_optimal_randomized_classifier_using_frank_wolfe(
 
     for i in range(1, max_iters):
         log(f"  Starting iteration {i} ...")
-        old_utility, Gtp, Gfp, Gfn, Gtn = _utility_func_with_gradient(utility_func, tp, fp, fn, tn)
+        old_utility, Gtp, Gfp, Gfn, Gtn = utility_func_with_gradient(utility_func, tp, fp, fn, tn)
 
         classifiers_a[i] = Gtp - Gfp - Gfn + Gtn
         classifiers_b[i] = Gfp - Gtn
-        y_pred_i = predict_weighted_per_instance(y_proba, k, a=classifiers_a[i], b=classifiers_b[i])
+        y_pred_i = predict_weighted_per_instance(y_proba, k, t=0.0, a=classifiers_a[i], b=classifiers_b[i])
         tp_i, fp_i, fn_i, tn_i = calculate_confusion_matrix(
             y_true, y_pred_i, normalize=True, skip_tn=skip_tn
         )
@@ -144,7 +146,7 @@ def find_optimal_randomized_classifier_using_frank_wolfe(
             meta["utilities"].append(new_utility)
             meta["iters"] = i
 
-        log(f"   Iteration {i} finished, utility: (1 - {alpha}) * {old_utility} + {alpha} * {utility_i} -> {new_utility}")
+        log(f"   Iteration {i} finished, alpha: {alpha}, utility: {old_utility} -> {new_utility}")
 
         if alpha < alpha_eps:
             print(f"  Stopping because alpha is smaller than {alpha_eps}")
@@ -179,8 +181,12 @@ def _predict_using_randomized_classifier_np(
     for i in range(n):
         c_i = np.random.choice(classifiers_range, p=classifiers_proba)
         gains = y_proba[i] * classifiers_a[c_i] + classifiers_b[c_i]
-        top_k = np.argpartition(-gains, k)[:k]
-        result[i, top_k] = 1.0
+
+        if k > 0:
+            top_k = np.argpartition(-gains, k)[:k]
+            result[i, top_k] = 1.0
+        else:
+            result[i, gains > 0] = 1.0
 
     return result
 

diff --git a/xcolumns/metrics_on_conf_matrix.py b/xcolumns/metrics_on_conf_matrix.py
@@ -1,6 +1,132 @@
-import numpy as np
-from typing import Union
 from numbers import Number
+from typing import Tuple, Union, Callable
+
+import numpy as np
+from scipy.sparse import csr_matrix
+
+from .default_types import *
+from .numba_csr_methods import *
+from .utils import *
+
+
+def _calculate_tp_np(y_true: np.ndarray, y_pred: np.ndarray):
+    return np.sum(y_true * y_pred, axis=0)
+
+
+# Alternative version, performance is similar
+# def _calculate_tp_csr(y_true: csr_matrix, y_pred: csr_matrix):
+#     return (y_pred.multiply(y_true)).sum(axis=0)
+
+
+def _calculate_tp_csr(y_true: csr_matrix, y_pred: csr_matrix):
+    n, m = y_true.shape
+    return numba_calculate_sum_0_sparse_mat_mul_mat(
+        *unpack_csr_matrices(y_pred, y_true), n, m
+    )
+
+
+def _calculate_fp_np(y_true: np.ndarray, y_pred: np.ndarray):
+    return np.sum((1 - y_true) * y_pred, axis=0)
+
+
+def _calculate_fp_csr_slow(y_true: csr_matrix, y_pred: csr_matrix):
+    n, m = y_true.shape
+    fp = np.zeros(m, dtype=FLOAT_TYPE)
+    dense_ones = np.ones(m, dtype=FLOAT_TYPE)
+    for i in range(n):
+        fp += y_pred[i].multiply(dense_ones - y_true[i])
+    return fp
+
+
+def _calculate_fn_np(y_true: np.ndarray, y_pred: np.ndarray):
+    return np.sum(y_true * (1 - y_pred), axis=0)
+
+
+def _calculate_fp_csr(y_true: csr_matrix, y_pred: csr_matrix):
+    n, m = y_true.shape
+    return numba_calculate_sum_0_sparse_mat_mul_ones_minus_mat(
+        *unpack_csr_matrices(y_pred, y_true), n, m
+    )
+
+
+def _calculate_fn_csr_slow(y_true: csr_matrix, y_pred: csr_matrix):
+    n, m = y_true.shape
+    fn = np.zeros(m, dtype=FLOAT_TYPE)
+    dense_ones = np.ones(m, dtype=FLOAT_TYPE)
+    for i in range(n):
+        fn += y_true[i].multiply(dense_ones - y_pred[i])
+
+    return fn
+
+
+def _calculate_fn_csr(y_true: csr_matrix, y_pred: csr_matrix):
+    n, m = y_true.shape
+    return numba_calculate_sum_0_sparse_mat_mul_ones_minus_mat(
+        *unpack_csr_matrices(y_true, y_pred), n, m
+    )
+
+
+def _calculate_tn_np(y_true: np.ndarray, y_pred: np.ndarray):
+    return np.sum((1 - y_true) * (1 - y_pred), axis=0)
+
+
+def _calculate_conf_mat_entry(
+        y_true: Union[np.ndarray, csr_matrix], 
+        y_pred: Union[np.ndarray, csr_matrix], 
+        func_for_np: Callable,
+        func_for_csr: Callable,
+        normalize: bool = False):
+
+    if y_true.shape != y_pred.shape:
+        raise ValueError("y_true and y_pred must have the same shape")
+
+    if isinstance(y_true, np.ndarray) and isinstance(y_pred, np.ndarray):
+        val = func_for_np(y_true, y_pred)
+    elif isinstance(y_true, csr_matrix) and isinstance(y_pred, csr_matrix):
+        val = func_for_csr(y_true, y_pred)
+    else:
+        raise ValueError("y_true and y_pred must be both dense or both sparse")
+
+    if normalize:
+        val /= y_true.shape[0]
+
+    return val
+
+
+def calculate_tp(y_true: Union[np.ndarray, csr_matrix], y_pred: Union[np.ndarray, csr_matrix], normalize: bool = False):
+    return _calculate_conf_mat_entry(y_true, y_pred, _calculate_tp_np, _calculate_tp_csr, normalize=normalize)
+
+
+def calculate_fp(y_true: Union[np.ndarray, csr_matrix], y_pred: Union[np.ndarray, csr_matrix], normalize: bool = False):
+    return _calculate_conf_mat_entry(y_true, y_pred, _calculate_fp_np, _calculate_fp_csr, normalize=normalize)
+
+
+def calculate_fn(y_true: Union[np.ndarray, csr_matrix], y_pred: Union[np.ndarray, csr_matrix], normalize: bool = False):
+    return _calculate_conf_mat_entry(y_true, y_pred, _calculate_fn_np, _calculate_fn_csr, normalize=normalize)
+
+
+def calculate_confusion_matrix(
+    y_true: Union[np.ndarray, csr_matrix],
+    y_pred: Union[np.ndarray, csr_matrix],
+    normalize: bool = False,
+    skip_tn: bool = False,
+):
+    """
+    Calculate confusion matrix for true and prediction.
+    """
+
+    tp = calculate_tp(y_true, y_pred, normalize=normalize)
+    fp = calculate_fp(y_true, y_pred, normalize=normalize)
+    fn = calculate_fn(y_true, y_pred, normalize=normalize)
+
+    n, m = y_true.shape
+
+    if skip_tn:
+        tn = np.zeros(m, dtype=FLOAT_TYPE)
+    else:
+        tn = np.full(m, 1 if normalize else n, dtype=FLOAT_TYPE) - tp - fp - fn
+
+    return tp, fp, fn, tn
 
 
 def bin_precision_at_k_on_conf_matrix(tp: Union[Number, np.ndarray], fp: Union[Number, np.ndarray], fn: Union[Number, np.ndarray], tn: Union[Number, np.ndarray], k: int):