Skip to content

Commit

Permalink
[mod] Refactor stress data
Browse files Browse the repository at this point in the history
BREAKING for the very small minority of folks that use `nippy/stress-data`.

Changes:

1. Make `nippy/stress-data` a function

   It's unnecessarily wasteful to generate and store all this data when it's not
   being used in the common case.

2. Make data deterministic

   The stress data will now generally be stable by default between different versions
   of Nippy, etc. This will help support an upcoming test for stable serialized output.
  • Loading branch information
ptaoussanis committed Jan 15, 2024
1 parent bf6d640 commit 0004fc1
Show file tree
Hide file tree
Showing 4 changed files with 180 additions and 204 deletions.
175 changes: 81 additions & 94 deletions src/taoensso/nippy.clj
Original file line number Diff line number Diff line change
Expand Up @@ -1941,101 +1941,88 @@
(deftype StressType [my-data]
Object (equals [a b] (= (.-my-data a) (.-my-data ^StressType b))))

(def stress-data "Reference data used for tests & benchmarks"
{:nil nil
:true true
:false false
:boxed-false (Boolean. false)

:char \ಬ
:str-short "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"
:str-long (apply str (range 1000))
:kw :keyword
:kw-ns ::keyword
:kw-long (keyword
(apply str "kw" (range 1000))
(apply str "kw" (range 1000)))

:sym 'foo
:sym-ns 'foo/bar
:sym-long (symbol
(apply str "sym" (range 1000))
(apply str "sym" (range 1000)))

:regex #"^(https?:)?//(www\?|\?)?"

;;; Try reflect real-world data:
:many-small-numbers (vec (range 200))
:many-small-keywords (->> (java.util.Locale/getISOLanguages)
(mapv keyword))
:many-small-strings (->> (java.util.Locale/getISOCountries)
(mapv #(.getDisplayCountry (java.util.Locale. "en" %))))

:queue (enc/queue [:a :b :c :d :e :f :g])
:queue-empty (enc/queue)
:sorted-set (sorted-set 1 2 3 4 5)
:sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3)

:list (list 1 2 3 4 5 (list 6 7 8 (list 9 10 '(()))))
:vector [1 2 3 4 5 [6 7 8 [9 10 [[]]]]]
:subvec (subvec [1 2 3 4 5 6 7 8] 2 8)
:map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7 :j {{} {}}}}}
:map-entry (clojure.lang.MapEntry. "key" "val")
:set #{1 2 3 4 5 #{6 7 8 #{9 10 #{#{}}}}}
:meta (with-meta {:a :A} {:metakey :metaval})
:nested [#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{}}] #{:a :b}}
#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{}}] #{:a :b}}
[1 [1 2 [1 2 3 [1 2 3 4 [1 2 3 4 5]]]]]]

:lazy-seq (repeatedly 1000 rand)
:lazy-seq-empty (map identity '())

:byte (byte 16)
:short (short 42)
:integer (int 3)
:long (long 3)
:bigint (bigint 31415926535897932384626433832795)

:float (float 3.14)
:double (double 3.14)
:bigdec (bigdec 3.1415926535897932384626433832795)

:ratio 22/7
:uri (java.net.URI. "https://clojure.org/reference/data_structures")
:uuid (java.util.UUID/randomUUID)
:util-date (java.util.Date.)
:sql-date (java.sql.Date/valueOf "2023-06-21")

;;; JVM 8+
:time-instant (enc/compile-if java.time.Instant (java.time.Instant/now) nil)
:time-duration (enc/compile-if java.time.Duration (java.time.Duration/ofSeconds 100 100) nil)
:time-period (enc/compile-if java.time.Period (java.time.Period/of 1 1 1) nil)

:bytes (byte-array [(byte 1) (byte 2) (byte 3)])
:objects (object-array [1 "two" {:data "data"}])

:stress-record (StressRecord. "data")
:stress-type (StressType. "data")
(defn stress-data
"Returns map of reference stress data for use by tests, benchmarks, etc."
[{:keys [comparable?] :as opts}]
(let [rng (java.util.Random. 123456) ; Seeded for determinism
rand-nth (fn [coll] (nth coll (.nextInt rng (count coll))))
all
{:nil nil
:true true
:false false
:false-boxed (Boolean. false)

:char \ಬ
:str-short "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"
:str-long (reduce str (range 1024))
:kw :keyword
:kw-ns ::keyword
:sym 'foo
:sym-ns 'foo/bar
:kw-long (keyword (reduce str "_" (range 128)) (reduce str "_" (range 128)))
:sym-long (symbol (reduce str "_" (range 128)) (reduce str "_" (range 128)))

:byte (byte 16)
:short (short 42)
:integer (int 3)
:long (long 3)
:float (float 3.1415926535897932384626433832795)
:double (double 3.1415926535897932384626433832795)
:bigdec (bigdec 3.1415926535897932384626433832795)
:bigint (bigint 31415926535897932384626433832795)
:ratio 22/7

:list (list 1 2 3 4 5 (list 6 7 8 (list 9 10 (list) ())))
:vector [1 2 3 4 5 [6 7 8 [9 10 [[]]]]]
:subvec (subvec [1 2 3 4 5 6 7 8] 2 8)
:map {:a 1 :b 2 :c 3 :d {:e 4 :f {:g 5 :h 6 :i 7 :j {{} {}}}}}
:map-entry (clojure.lang.MapEntry. "key" "val")
:set #{1 2 3 4 5 #{6 7 8 #{9 10 #{#{}}}}}
:meta (with-meta {:a :A} {:metakey :metaval})
:nested [#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{[] ()}}] #{:a :b}}
#{{1 [:a :b] 2 [:c :d] 3 [:e :f]} [#{{[] ()}}] #{:a :b}}
[1 [1 2 [1 2 3 [1 2 3 4 [1 2 3 4 5 "ಬಾ ಇಲ್ಲಿ ಸಂಭವಿಸ"] {} #{} [] ()]]]]]

:regex #"^(https?:)?//(www\?|\?)?"
:sorted-set (sorted-set 1 2 3 4 5)
:sorted-map (sorted-map :b 2 :a 1 :d 4 :c 3)
:lazy-seq-empty (map identity ())
:lazy-seq (repeatedly 64 #(do nil))
:queue-empty (enc/queue)
:queue (enc/queue [:a :b :c :d :e :f :g])

:uuid (java.util.UUID. 7232453380187312026 -7067939076204274491)
:uri (java.net.URI. "https://clojure.org")
:defrecord (StressRecord. "data")
:deftype (StressType. "data")
:bytes (byte-array [(byte 1) (byte 2) (byte 3)])
:objects (object-array [1 "two" {:data "data"}])

:util-date (java.util.Date. 1577884455500)
:sql-date (java.sql.Date. 1577884455500)
:instant (enc/compile-if java.time.Instant (java.time.Instant/parse "2020-01-01T13:14:15.50Z") ::skip)
:duration (enc/compile-if java.time.Duration (java.time.Duration/ofSeconds 100 100) ::skip)
:period (enc/compile-if java.time.Period (java.time.Period/of 1 1 1) ::skip)

:throwable (Throwable. "Msg")
:exception (Exception. "Msg")
:ex-info (ex-info "Msg" {:data "data"})

:many-longs (vec (repeatedly 512 #(rand-nth (range 10))))
:many-doubles (vec (repeatedly 512 #(double (rand-nth (range 10)))))
:many-strings (vec (repeatedly 512 #(rand-nth ["foo" "bar" "baz" "qux"])))
:many-keywords (vec (repeatedly 512
#(keyword
(rand-nth ["foo" "bar" "baz" "qux" nil])
(rand-nth ["foo" "bar" "baz" "qux" ]))))}]

(if comparable?
(dissoc all :bytes :objects :throwable :exception :ex-info :regex)
(do all))))

;; Serializable
:throwable (Throwable. "Yolo")
:exception (try (/ 1 0) (catch Exception e e))
:ex-info (ex-info "ExInfo" {:data "data"})})

(def stress-data-comparable
"Reference data with stuff removed that breaks roundtrip equality."
(dissoc stress-data :bytes :objects :throwable :exception :ex-info :regex))

(comment (let [data stress-data-comparable] (= (thaw (freeze data)) data)))

(def stress-data-benchable
"Reference data with stuff removed that breaks reader or other utils we'll
be benching with."
(dissoc stress-data-comparable
:queue :queue-empty
:stress-record :stress-type
:time-instant :time-duration :time-period
:byte :uri))
(comment
[(= (stress-data {:comparable? true}) (stress-data {:comparable? true}))
(let [d (stress-data {:comparable? true})] (= (thaw (freeze d)) d))])

;;;; Tools

Expand Down
40 changes: 19 additions & 21 deletions test/taoensso/nippy_benchmarks.clj
Original file line number Diff line number Diff line change
Expand Up @@ -27,30 +27,29 @@

;;;; Benchable data

(def data
"Map of data suitable for benching, a subset of
`nippy/stress-data-comparable`."
(reduce-kv
(fn [m k v]
(try
(-> v freeze-reader thaw-reader)
(-> v freeze-fress thaw-fress)
m
(catch Throwable _ (dissoc m k))))
nippy/stress-data-comparable
nippy/stress-data-comparable))
(def bench-data
"Subset of stress data suitable for benching."
(let [sd (nippy/stress-data {:comparable? true})]
(reduce-kv
(fn [m k v]
(try
(-> v freeze-reader thaw-reader)
(-> v freeze-fress thaw-fress)
m
(catch Throwable _ (dissoc m k))))
sd sd)))

(comment
(clojure.set/difference
(set (keys nippy/stress-data-comparable))
(set (keys data))))
(set (keys (nippy/stress-data {:comparable? true})))
(set (keys bench-data))))

;;;;

(defn- bench1
[{:keys [laps warmup] :or {laps 1e4, warmup 25e3}} freezer thawer sizer]
(let [data-frozen (freezer data)
time-freeze (enc/bench laps {:warmup-laps warmup} (freezer data))
(let [data-frozen (freezer bench-data)
time-freeze (enc/bench laps {:warmup-laps warmup} (freezer bench-data))
time-thaw (enc/bench laps {:warmup-laps warmup} (thawer data-frozen))
data-size (sizer data-frozen)]

Expand Down Expand Up @@ -127,19 +126,18 @@

;;;; Compressors

(let [_ (require '[taoensso.nippy :as nippy])
data (nippy/freeze nippy/stress-data-comparable {:compressor nil})]
(let [bench-data (nippy/freeze (nippy/stress-data {:comparable? true}) {:compressor nil})]

(defn bench1-compressor
[{:keys [laps warmup] :or {laps 1e4, warmup 2e4}} compressor]
(let [data-compressed (compr/compress compressor data)
time-compress (enc/bench laps {:warmup-laps warmup} (compr/compress compressor data))
(let [data-compressed (compr/compress compressor bench-data)
time-compress (enc/bench laps {:warmup-laps warmup} (compr/compress compressor bench-data))
time-decompress (enc/bench laps {:warmup-laps warmup} (compr/decompress compressor data-compressed))]

{:round (+ time-compress time-decompress)
:compress time-compress
:decompress time-decompress
:ratio (enc/round2 (/ (count data-compressed) (count data)))}))
:ratio (enc/round2 (/ (count data-compressed) (count bench-data)))}))

(defn bench-compressors [bench1-opts lzma-opts]
(merge
Expand Down
32 changes: 18 additions & 14 deletions test/taoensso/nippy_tests.clj
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

;;;; Config, etc.

(def test-data nippy/stress-data-comparable)
(def test-data (nippy/stress-data {:comparable? true}))
(def tc-gen-recursive-any-equatable
(tc-gens/recursive-gen tc-gens/container-type
tc-gens/any-equatable))
Expand All @@ -35,7 +35,13 @@
;;;; Core

(deftest _core
[(println (str "Clojure version: " *clojure-version*))
(println (str "Clojure version: " *clojure-version*))
[(is (= test-data test-data) "Test data is comparable")
(is (=
(nippy/stress-data {:comparable? true})
(nippy/stress-data {:comparable? true}))
"Stress data is deterministic")

(is (= test-data ((comp thaw freeze) test-data)))
(is (= test-data ((comp #(thaw % {:no-header? true
:compressor nippy/lz4-compressor
Expand All @@ -47,8 +53,9 @@
#(freeze % {:password [:salted "p"]}))
test-data)))

(is (= (vec (:objects nippy/stress-data))
((comp vec thaw freeze) (:objects nippy/stress-data))))
(let [d (nippy/stress-data {})]
[(is (= (vec (:bytes d)) ((comp vec thaw freeze) (:bytes d))))
(is (= (vec (:objects d)) ((comp vec thaw freeze) (:objects d))))])

(is (= test-data ((comp #(thaw % {:compressor nippy/lzma2-compressor})
#(freeze % {:compressor nippy/lzma2-compressor}))
Expand Down Expand Up @@ -141,18 +148,15 @@
;;;; Caching

(deftest _caching
(let [stress [nippy/stress-data-comparable
nippy/stress-data-comparable
nippy/stress-data-comparable
nippy/stress-data-comparable]
cached (mapv nippy/cache stress)
cached (mapv nippy/cache stress) ; <=1 wrap auto-enforced
(let [test-data* [test-data test-data test-data test-data] ; Data with duplicates
cached (mapv nippy/cache test-data*)
cached (mapv nippy/cache test-data*) ; <=1 wrap auto-enforced
]

[(is (= stress (thaw (freeze stress {:compressor nil}))))
(is (= stress (thaw (freeze cached {:compressor nil}))))
(let [size-stress (count (freeze stress {:compressor nil}))
size-cached (count (freeze cached {:compressor nil}))]
[(is (= test-data* (thaw (freeze test-data* {:compressor nil}))))
(is (= test-data* (thaw (freeze cached {:compressor nil}))))
(let [size-stress (count (freeze test-data* {:compressor nil}))
size-cached (count (freeze cached {:compressor nil}))]
(is (>= size-stress (* 3 size-cached)))
(is (< size-stress (* 4 size-cached))))]))

Expand Down
Loading

0 comments on commit 0004fc1

Please sign in to comment.