VowpalWabbit · bassmang · Nov 17, 2023 · Oct 13, 2023 · Oct 13, 2023 · Oct 13, 2023
diff --git a/test/core.vwtest.json b/test/core.vwtest.json
@@ -6021,7 +6021,7 @@
   {
     "id": 465,
     "desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and saving model",
-    "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_model.vw",
+    "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -f standard_sparse_model.vw -q::",
     "diff_files": {
       "stderr": "train-sets/ref/sparse_save_check.stderr",
       "stdout": "train-sets/ref/sparse_save_check.stdout"
@@ -6033,7 +6033,7 @@
   {
     "id": 466,
     "desc": "cb_explore_adf with epsilon-greedy exploration using --sparse_weights and loading model",
-    "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_model.vw",
+    "vw_command": "--cb_explore_adf --epsilon 0.1 -d train-sets/cb_test.ldf --noconstant --sparse_weights -i standard_sparse_model.vw -q::",
     "diff_files": {
       "stderr": "train-sets/ref/sparse_load_check.stderr",
       "stdout": "train-sets/ref/sparse_load_check.stdout"

diff --git a/test/train-sets/ref/sparse_load_check.stderr b/test/train-sets/ref/sparse_load_check.stderr
@@ -1,3 +1,4 @@
+creating quadratic features for pairs: ::
 using no cache
 Reading datafile = train-sets/cb_test.ldf
 num sources = 1
@@ -6,17 +7,17 @@ learning rate = 0.5
 initial_t = 3
 power_t = 0.5
 cb_type = mtr
-Enabled learners: gd, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
+Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
 Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.066667 0.066667            1            1.0        0:1:0.5         1:0.48       15
-0.033333 0.000000            2            2.0        1:0:0.5         1:0.95        6
+0.066667 0.066667            1            1.0        0:1:0.5         1:0.48       60
+0.033333 0.000000            2            2.0        1:0:0.5         1:0.95       18
 
 finished run
 number of examples = 3
 weighted example sum = 3.000000
 weighted label sum = 0.000000
 average loss = 0.033333
-total feature number = 27
+total feature number = 96
diff --git a/test/train-sets/ref/sparse_load_check.stdout b/test/train-sets/ref/sparse_load_check.stdout
@@ -0,0 +1,3 @@
+[warning] model file has set of {-q, --cubic, --interactions} settings stored, but they'll be OVERRIDDEN by set of {-q, --cubic, --interactions} settings from command line.
+[warning] Any duplicate namespace interactions will be removed
+You can use --leave_duplicate_interactions to disable this behaviour.
diff --git a/test/train-sets/ref/sparse_save_check.stderr b/test/train-sets/ref/sparse_save_check.stderr
@@ -1,3 +1,4 @@
+creating quadratic features for pairs: ::
 final_regressor = standard_sparse_model.vw
 using no cache
 Reading datafile = train-sets/cb_test.ldf
@@ -7,17 +8,17 @@ learning rate = 0.5
 initial_t = 0
 power_t = 0.5
 cb_type = mtr
-Enabled learners: gd, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
+Enabled learners: gd, generate_interactions, scorer-identity, csoaa_ldf-rank, cb_adf, cb_explore_adf_greedy, shared_feature_merger
 Input label = CB
 Output pred = ACTION_PROBS
 average  since         example        example        current        current  current
 loss     last          counter         weight          label        predict features
-0.666667 0.666667            1            1.0        0:1:0.5         0:0.33       15
-0.333333 0.000000            2            2.0        1:0:0.5         1:0.95        6
+0.666667 0.666667            1            1.0        0:1:0.5         0:0.33       60
+0.333333 0.000000            2            2.0        1:0:0.5         1:0.95       18
 
 finished run
 number of examples = 3
 weighted example sum = 3.000000
 weighted label sum = 0.000000
 average loss = 0.333333
-total feature number = 27
+total feature number = 96
diff --git a/test/train-sets/ref/sparse_save_check.stdout b/test/train-sets/ref/sparse_save_check.stdout
@@ -0,0 +1,2 @@
+[warning] Any duplicate namespace interactions will be removed
+You can use --leave_duplicate_interactions to disable this behaviour.
diff --git a/vowpalwabbit/core/include/vw/core/array_parameters.h b/vowpalwabbit/core/include/vw/core/array_parameters.h
@@ -25,6 +25,12 @@ class parameters
     else { return dense_weights[i]; }
   }
 
+  inline VW::weight& get(size_t i)
+  {
+    if (sparse) { return sparse_weights.get(i); }
+    else { return dense_weights.get(i); }
+  }
+
   template <typename Lambda>
   void set_default(Lambda&& default_func)
   {

diff --git a/vowpalwabbit/core/include/vw/core/array_parameters_dense.h b/vowpalwabbit/core/include/vw/core/array_parameters_dense.h
@@ -122,6 +122,9 @@ class dense_parameters
   inline const VW::weight& operator[](size_t i) const { return _begin.get()[i & _weight_mask]; }
   inline VW::weight& operator[](size_t i) { return _begin.get()[i & _weight_mask]; }
 
+  inline const VW::weight& get(size_t i) const { return _begin.get()[i & _weight_mask]; }
+  inline VW::weight& get(size_t i) { return _begin.get()[i & _weight_mask]; }
+
   VW_ATTR(nodiscard) static dense_parameters shallow_copy(const dense_parameters& input);
   VW_ATTR(nodiscard) static dense_parameters deep_copy(const dense_parameters& input);
 

diff --git a/vowpalwabbit/core/include/vw/core/array_parameters_sparse.h b/vowpalwabbit/core/include/vw/core/array_parameters_sparse.h
@@ -10,8 +10,8 @@
 
 #include <cstddef>
 #include <functional>
+#include <map>
 #include <memory>
-#include <unordered_map>
 
 namespace VW
 {
@@ -20,7 +20,7 @@ class sparse_parameters;
 namespace details
 {
 
-using weight_map = std::unordered_map<uint64_t, std::shared_ptr<VW::weight>>;
+using weight_map = std::map<uint64_t, std::shared_ptr<VW::weight>>;
 
 template <typename T>
 class sparse_iterator
@@ -82,9 +82,11 @@ class sparse_parameters
   const_iterator cend() const { return const_iterator(_map.end()); }
 
   inline VW::weight& operator[](size_t i) { return *(get_or_default_and_get(i)); }
-
   inline const VW::weight& operator[](size_t i) const { return *(get_or_default_and_get(i)); }
 
+  inline VW::weight& get(size_t i) { return *(get_impl(i)); };
+  inline const VW::weight& get(size_t i) const { return *(get_impl(i)); };
+
   inline VW::weight& strided_index(size_t index) { return operator[](index << _stride_shift); }
   inline const VW::weight& strided_index(size_t index) const { return operator[](index << _stride_shift); }
 
@@ -119,6 +121,7 @@ class sparse_parameters
   // It is marked const so it can be used from both const and non const operator[]
   // The map itself is mutable to facilitate this
   VW::weight* get_or_default_and_get(size_t i) const;
+  VW::weight* get_impl(size_t i) const;
 };
 }  // namespace VW
 using sparse_parameters VW_DEPRECATED("sparse_parameters moved into VW namespace") = VW::sparse_parameters;
diff --git a/vowpalwabbit/core/include/vw/core/gd_predict.h b/vowpalwabbit/core/include/vw/core/gd_predict.h
@@ -36,7 +36,7 @@ inline void foreach_feature(WeightsT& weights, const VW::features& fs, DataT& da
 {
   for (const auto& f : fs)
   {
-    VW::weight& w = weights[(f.index() + offset)];
+    VW::weight& w = weights[f.index() + offset];
     FuncT(dat, mult * f.value(), w);
   }
 }
@@ -46,7 +46,7 @@ template <class DataT, void (*FuncT)(DataT&, float, float), class WeightsT>
 inline void foreach_feature(
     const WeightsT& weights, const VW::features& fs, DataT& dat, uint64_t offset = 0, float mult = 1.)
 {
-  for (const auto& f : fs) { FuncT(dat, mult * f.value(), weights[static_cast<size_t>(f.index() + offset)]); }
+  for (const auto& f : fs) { FuncT(dat, mult * f.value(), weights.get(static_cast<size_t>(f.index() + offset))); }
 }
 
 template <class DataT, class WeightOrIndexT, void (*FuncT)(DataT&, float, WeightOrIndexT),

diff --git a/vowpalwabbit/core/include/vw/core/interactions_predict.h b/vowpalwabbit/core/include/vw/core/interactions_predict.h
@@ -43,7 +43,7 @@ inline void call_func_t(DataT& dat, WeightsT& weights, const float ft_value, con
 template <class DataT, void (*FuncT)(DataT&, const float, float), class WeightsT>
 inline void call_func_t(DataT& dat, const WeightsT& weights, const float ft_value, const uint64_t ft_idx)
 {
-  FuncT(dat, ft_value, weights[static_cast<size_t>(ft_idx)]);
+  FuncT(dat, ft_value, weights.get(static_cast<size_t>(ft_idx)));
 }
 
 template <class DataT, void (*FuncT)(DataT&, float, uint64_t), class WeightsT>

diff --git a/vowpalwabbit/core/src/array_parameters_sparse.cc b/vowpalwabbit/core/src/array_parameters_sparse.cc
@@ -26,6 +26,21 @@ VW::weight* VW::sparse_parameters::get_or_default_and_get(size_t i) const
   return iter->second.get();
 }
 
+VW::weight* VW::sparse_parameters::get_impl(size_t i) const
+{
+  static VW::weight default_value = 0.0f;
+
+  uint64_t index = i & _weight_mask;
+  auto iter = _map.find(index);
+  if (iter == _map.end())
+  {
+    if (_default_func != nullptr) { _default_func(&default_value, index); }
+    return &default_value;
+  }
+
+  return iter->second.get();
+}
+
 VW::sparse_parameters::sparse_parameters(size_t length, uint32_t stride_shift)
     : _weight_mask((length << stride_shift) - 1), _stride_shift(stride_shift), _default_func(nullptr)
 {

diff --git a/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc b/vowpalwabbit/core/src/reductions/cb/cb_explore_adf_rnd.cc
@@ -145,6 +145,7 @@ class lazy_gaussian
 {
 public:
   inline float operator[](uint64_t index) const { return VW::details::merand48_boxmuller(index); }
+  inline float get(uint64_t index) const { return VW::details::merand48_boxmuller(index); }
 };
 
 inline void vec_add_with_norm(std::pair<float, float>& p, float fx, float fw)