diff --git a/.github/workflows/modules-zstd.yml b/.github/workflows/modules-zstd.yml index 002a0c23c7..b3294364b5 100644 --- a/.github/workflows/modules-zstd.yml +++ b/.github/workflows/modules-zstd.yml @@ -38,9 +38,9 @@ jobs: run: | bazel build -c opt --test_output=errors -- //xls/dslx:interpreter_main //xls/dslx/ir_convert:ir_converter_main //xls/tools:opt_main //xls/tools:codegen_main - - name: Build ZSTD Module (opt) - run: | - bazel build -c opt --test_output=errors -- //xls/modules/zstd:all + # - name: Build ZSTD Module (opt) + # run: | + # bazel build -c opt --test_output=errors -- //xls/modules/zstd:all - name: Test ZSTD Module - DSLX Tests (opt) if: ${{ !cancelled() }} @@ -52,30 +52,35 @@ jobs: run: | bazel test -c opt --test_output=errors -- $(bazel query 'filter(".*_cc_test", kind(rule, //xls/modules/zstd/...))') - - name: Build ZSTD verilog targets (opt) - if: ${{ !cancelled() }} - run: | - bazel build -c opt -- $(bazel query 'filter(".*_verilog", kind(rule, //xls/modules/zstd/...))') + # - name: Build ZSTD verilog targets (opt) + # if: ${{ !cancelled() }} + # run: | + # bazel build -c opt -- $(bazel query 'filter(".*_verilog", kind(rule, //xls/modules/zstd/...))') - - name: Build and run ZSTD IR benchmark rules (opt) - if: ${{ !cancelled() }} - run: | - for target in $(bazel query 'filter(".*_ir_benchmark$", kind(rule, //xls/modules/zstd/...))'); - do - echo "running $target"; - bazel run -c opt $target -- --logtostderr; - done + # - name: Build and run ZSTD IR benchmark rules (opt) + # if: ${{ !cancelled() }} + # run: | + # for target in $(bazel query 'filter(".*_ir_benchmark$", kind(rule, //xls/modules/zstd/...))'); + # do + # echo "running $target"; + # bazel run -c opt $target -- --logtostderr; + # done - - name: Build and run synthesis benchmarks of the ZSTD module (opt) - if: ${{ !cancelled() }} - run: | - for target in $(bazel query 'filter(".*_benchmark_synth$", kind(rule, //xls/modules/zstd/...))'); - do - echo "running $target"; - bazel run -c opt $target -- --logtostderr; - done + # - name: Build and run synthesis benchmarks of the ZSTD module (opt) + # if: ${{ !cancelled() }} + # run: | + # for target in $(bazel query 'filter(".*_benchmark_synth$", kind(rule, //xls/modules/zstd/...))'); + # do + # echo "running $target"; + # bazel run -c opt $target -- --logtostderr; + # done - - name: Build ZSTD place and route targets (opt) + - name: Build and test verilog simulation of the ZSTD module components (opt) if: ${{ !cancelled() }} run: | - bazel build -c opt -- $(bazel query 'filter(".*_place_and_route", kind(rule, //xls/modules/zstd/...))') + bazel test -c opt --test_output=errors -- $(bazel query 'filter(".*_cocotb_test", kind(rule, //xls/modules/zstd/...))') + + # - name: Build ZSTD place and route targets (opt) + # if: ${{ !cancelled() }} + # run: | + # bazel build -c opt -- $(bazel query 'filter(".*_place_and_route", kind(rule, //xls/modules/zstd/...))') diff --git a/dependency_support/pip_requirements.in b/dependency_support/pip_requirements.in index 2382e41330..6170edf844 100644 --- a/dependency_support/pip_requirements.in +++ b/dependency_support/pip_requirements.in @@ -8,6 +8,11 @@ termcolor==1.1.0 psutil==5.7.0 portpicker==1.3.1 pyyaml==6.0.1 +pytest==8.2.2 +cocotb==1.9.0 +cocotbext-axi==0.1.24 +cocotb_bus==0.2.1 +zstandard==0.23.0 # Note: numpy and scipy version availability seems to differ between Ubuntu # versions that we want to support (e.g. 18.04 vs 20.04), so we accept a diff --git a/dependency_support/pip_requirements_lock.txt b/dependency_support/pip_requirements_lock.txt index ac0bff0f4f..ad142b9a5d 100644 --- a/dependency_support/pip_requirements_lock.txt +++ b/dependency_support/pip_requirements_lock.txt @@ -14,10 +14,68 @@ click==8.1.3 \ # via # -r dependency_support/pip_requirements.in # flask +cocotb==1.9.0 \ + --hash=sha256:02a58ef6c941114964096e7c039bdd4e67e63816cfd2f6a9af6a34cd92b00e8e \ + --hash=sha256:0819794ef5e8fd14fee0b265933226cf600e85edc2f1a749b4d5f8fa2d31ce4e \ + --hash=sha256:0ba35617a677ff65a1273411a3dfdfc5f587128ad8cb9e941ab0eb17ec8fb3e2 \ + --hash=sha256:17556e3a23562f64d577d0eb117fe02e384aedee997b29497b5c395f5010ff82 \ + --hash=sha256:19b4e27b53a16e0b9c4cc5227c7f9d4dccac06e431a4f937e9f5513350196333 \ + --hash=sha256:1a0381ced5590a726032ba2265c6b70ac12cfb49edb152be86a081bb7d104751 \ + --hash=sha256:1aff68cf77059448a9a3278079037e34b50c8c2aee466d984295fa7fe699d390 \ + --hash=sha256:277281420fd6fc3002bb85d6bec497bd20ff3a3905d4b5f1301faf975f750ede \ + --hash=sha256:2daf743320331615f4e8ffb877ab0b04e6f913b911bb11bf9dbc1d876d9c4220 \ + --hash=sha256:2e9bcdbfba3e99c9297bd0d74ba781772d89d2c86e893980784ada252bd1a0f8 \ + --hash=sha256:3058c977f9d4e1f6333d505947f34b9142910719f1d8631c40a151dd86bad727 \ + --hash=sha256:5832d894419a9e8fe5c242e3ac86588e16e2cb379822dcb154bfec8544ae858e \ + --hash=sha256:598b841ed0809e5c64d8c383b8035f6ace5a6f9013f680cdc6981221911c005d \ + --hash=sha256:5a5c91027d7652aaf10e101743edd6b1e832039a19af75fca301275ef30f01d4 \ + --hash=sha256:61418f619af72c8cca8de622785b4f4bfc17ace09981de6eb44feae560cf3bbb \ + --hash=sha256:784c914c8df3fd79cfb148d2bcd17c4b2703c89af1278ed98773afb57ceea3e6 \ + --hash=sha256:87a19d3012f505ba7fda37483b851ef0ca40290ad8a9b28a820b84f8574287bb \ + --hash=sha256:89503f0749362d36b6fab8636710f1848943c21f9d488672921bac21e9edd29f \ + --hash=sha256:89e5189fd393918c27af2daefdcb13df4d52fa761f065d5964d2c4ff5c0642fb \ + --hash=sha256:8cb4b0edf8f0b47c3b604b461cb574fc75fd97efa893cbaf828f4f2f71cf459e \ + --hash=sha256:94e884e16186899ad5b4d131c3f7ff0a2277e67ea0660754e8810a4bbf2d610e \ + --hash=sha256:997dbca2a2cd933fd0a44d9fadeebc1e8a40701db15ea06f207811933dceb350 \ + --hash=sha256:a7cea13cb2fe4f5ca735490846342885117778a73008a67ed9cac667aaaf3f0d \ + --hash=sha256:a84edfbfa57dc6e16845a55feb0b4e1c8b6bbfa5ef1ab6768beba8d81e0546aa \ + --hash=sha256:a95b5e5708a3629d319d2b655d11345cc7e97fea9bdc9bc1df7435926ac30966 \ + --hash=sha256:aa6818c39ca1ce699e4bb1d84899c4f98c2d25c7671bd6c7beee3b1ee9d68834 \ + --hash=sha256:ab99bf7e055780b57419d4133fd4dca9c72a03b766a3e2200552f10498eb8845 \ + --hash=sha256:b966f5560a494fd99f95a1562f9326ca20c35bb118d4e6b50db41da8e4a6f718 \ + --hash=sha256:bc44a7708a5a63d3059a622c2fb90831dc33534c3343e971f5a6c78905097baa \ + --hash=sha256:c11e21d291ba2f889e33c21d76e9aec6ffdfb5666053dc34452666579daa675b \ + --hash=sha256:c848de13583478d71cc91e528e17c051ca6a3b92e89d703ac5015f17cab1287b \ + --hash=sha256:d944aa5509a0f0786d6f30554a2f8b1f229847f9ac9988879d7a05497739f668 \ + --hash=sha256:f50862153e1364f6edeaef9d70505093549fa097e9b2555ea46d1e4f94ac3287 \ + --hash=sha256:f74c598e230e1035103f6e3a97dd7a0e1bcacf7f3ea7481cd3bcde477b74e379 \ + --hash=sha256:fcb81c6c37e11b0729768dd8e192a9cfb809778699ab1fe89f4d92ba0beb3092 \ + --hash=sha256:ff2ddc8b304eb7076ceead2534a1b9828df771798fa9c2601ea983c86d23ec08 + # via + # -r dependency_support/pip_requirements.in + # cocotb-bus + # cocotbext-axi +cocotb-bus==0.2.1 \ + --hash=sha256:a197aa4b0e0ad28469c8877b41b3fb2ec0206da9f491b9276d1578ce6dd8aa8d + # via + # -r dependency_support/pip_requirements.in + # cocotbext-axi +cocotbext-axi==0.1.24 \ + --hash=sha256:3ed62dcaf9448833176826507c5bc5c346431c4846a731e409d87c862d960593 \ + --hash=sha256:533ba6c7503c6302bdb9ef86e43a549ad5da876eafb1adce23d39751c54cced4 + # via -r dependency_support/pip_requirements.in +find-libpython==0.4.0 \ + --hash=sha256:034a4253bd57da3408aefc59aeac1650150f6c1f42e10fdd31615cf1df0842e3 \ + --hash=sha256:46f9cdcd397ddb563b2d7592ded3796a41c1df5222443bd9d981721c906c03e6 + # via cocotb flask==2.3.2 \ --hash=sha256:77fd4e1249d8c9923de34907236b747ced06e5467ecac1a7bb7115ae0e9670b0 \ --hash=sha256:8c2f9abd47a9e8df7f0c3f091ce9497d011dc3b31effcf4c85a6e2b50f4114ef # via -r dependency_support/pip_requirements.in +iniconfig==2.0.0 \ + --hash=sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3 \ + --hash=sha256:b6a85871a79d2e3b22d2d1b94ac2824226a63c6b741c88f7ae975f18b6778374 + # via pytest itsdangerous==2.1.2 \ --hash=sha256:2c2349112351b88699d8d4b6b075022c0808887cb7ad10069318a8b0bc88db44 \ --hash=sha256:5dbbc68b317e5e42f327f9021763545dc3fc3bfe22e6deb96aaf1fc38874156a @@ -107,6 +165,14 @@ numpy==1.24.4 \ # via # -r dependency_support/pip_requirements.in # scipy +packaging==24.1 \ + --hash=sha256:026ed72c8ed3fcce5bf8950572258698927fd1dbda10a5e981cdf0ac37f4f002 \ + --hash=sha256:5b8f2217dbdbd2f7f384c41c628544e6d52f2d0f53c6d0c3ea61aa5d1d7ff124 + # via pytest +pluggy==1.5.0 \ + --hash=sha256:2cffa88e94fdc978c4c574f15f9e59b7f4201d439195c3715ca9e2486f1d0cf1 \ + --hash=sha256:44e1ad92c8ca002de6377e165f3e0f1be63266ab4d554740532335b9d75ea669 + # via pytest portpicker==1.3.1 \ --hash=sha256:d2cdc776873635ed421315c4d22e63280042456bbfa07397817e687b142b9667 # via -r dependency_support/pip_requirements.in @@ -123,6 +189,10 @@ psutil==5.7.0 \ --hash=sha256:e2d0c5b07c6fe5a87fa27b7855017edb0d52ee73b71e6ee368fae268605cc3f5 \ --hash=sha256:f344ca230dd8e8d5eee16827596f1c22ec0876127c28e800d7ae20ed44c4b310 # via -r dependency_support/pip_requirements.in +pytest==8.2.2 \ + --hash=sha256:c434598117762e2bd304e526244f67bf66bbd7b5d6cf22138be51ff661980343 \ + --hash=sha256:de4bb8104e201939ccdc688b27a89a7be2079b22e2bd2b07f806b6ba71117977 + # via -r dependency_support/pip_requirements.in pyyaml==6.0.1 \ --hash=sha256:04ac92ad1925b2cff1db0cfebffb6ffc43457495c9b3c39d3fcae417d7125dc5 \ --hash=sha256:062582fca9fabdd2c8b54a3ef1c978d786e0f6b3a1510e0ac93ef59e0ddae2bc \ @@ -207,3 +277,102 @@ werkzeug==3.0.6 \ # via # -r dependency_support/pip_requirements.in # flask +zstandard==0.23.0 \ + --hash=sha256:034b88913ecc1b097f528e42b539453fa82c3557e414b3de9d5632c80439a473 \ + --hash=sha256:0a7f0804bb3799414af278e9ad51be25edf67f78f916e08afdb983e74161b916 \ + --hash=sha256:11e3bf3c924853a2d5835b24f03eeba7fc9b07d8ca499e247e06ff5676461a15 \ + --hash=sha256:12a289832e520c6bd4dcaad68e944b86da3bad0d339ef7989fb7e88f92e96072 \ + --hash=sha256:1516c8c37d3a053b01c1c15b182f3b5f5eef19ced9b930b684a73bad121addf4 \ + --hash=sha256:157e89ceb4054029a289fb504c98c6a9fe8010f1680de0201b3eb5dc20aa6d9e \ + --hash=sha256:1bfe8de1da6d104f15a60d4a8a768288f66aa953bbe00d027398b93fb9680b26 \ + --hash=sha256:1e172f57cd78c20f13a3415cc8dfe24bf388614324d25539146594c16d78fcc8 \ + --hash=sha256:1fd7e0f1cfb70eb2f95a19b472ee7ad6d9a0a992ec0ae53286870c104ca939e5 \ + --hash=sha256:203d236f4c94cd8379d1ea61db2fce20730b4c38d7f1c34506a31b34edc87bdd \ + --hash=sha256:27d3ef2252d2e62476389ca8f9b0cf2bbafb082a3b6bfe9d90cbcbb5529ecf7c \ + --hash=sha256:29a2bc7c1b09b0af938b7a8343174b987ae021705acabcbae560166567f5a8db \ + --hash=sha256:2ef230a8fd217a2015bc91b74f6b3b7d6522ba48be29ad4ea0ca3a3775bf7dd5 \ + --hash=sha256:2ef3775758346d9ac6214123887d25c7061c92afe1f2b354f9388e9e4d48acfc \ + --hash=sha256:2f146f50723defec2975fb7e388ae3a024eb7151542d1599527ec2aa9cacb152 \ + --hash=sha256:2fb4535137de7e244c230e24f9d1ec194f61721c86ebea04e1581d9d06ea1269 \ + --hash=sha256:32ba3b5ccde2d581b1e6aa952c836a6291e8435d788f656fe5976445865ae045 \ + --hash=sha256:34895a41273ad33347b2fc70e1bff4240556de3c46c6ea430a7ed91f9042aa4e \ + --hash=sha256:379b378ae694ba78cef921581ebd420c938936a153ded602c4fea612b7eaa90d \ + --hash=sha256:38302b78a850ff82656beaddeb0bb989a0322a8bbb1bf1ab10c17506681d772a \ + --hash=sha256:3aa014d55c3af933c1315eb4bb06dd0459661cc0b15cd61077afa6489bec63bb \ + --hash=sha256:4051e406288b8cdbb993798b9a45c59a4896b6ecee2f875424ec10276a895740 \ + --hash=sha256:40b33d93c6eddf02d2c19f5773196068d875c41ca25730e8288e9b672897c105 \ + --hash=sha256:43da0f0092281bf501f9c5f6f3b4c975a8a0ea82de49ba3f7100e64d422a1274 \ + --hash=sha256:445e4cb5048b04e90ce96a79b4b63140e3f4ab5f662321975679b5f6360b90e2 \ + --hash=sha256:48ef6a43b1846f6025dde6ed9fee0c24e1149c1c25f7fb0a0585572b2f3adc58 \ + --hash=sha256:50a80baba0285386f97ea36239855f6020ce452456605f262b2d33ac35c7770b \ + --hash=sha256:519fbf169dfac1222a76ba8861ef4ac7f0530c35dd79ba5727014613f91613d4 \ + --hash=sha256:53dd9d5e3d29f95acd5de6802e909ada8d8d8cfa37a3ac64836f3bc4bc5512db \ + --hash=sha256:53ea7cdc96c6eb56e76bb06894bcfb5dfa93b7adcf59d61c6b92674e24e2dd5e \ + --hash=sha256:576856e8594e6649aee06ddbfc738fec6a834f7c85bf7cadd1c53d4a58186ef9 \ + --hash=sha256:59556bf80a7094d0cfb9f5e50bb2db27fefb75d5138bb16fb052b61b0e0eeeb0 \ + --hash=sha256:5d41d5e025f1e0bccae4928981e71b2334c60f580bdc8345f824e7c0a4c2a813 \ + --hash=sha256:61062387ad820c654b6a6b5f0b94484fa19515e0c5116faf29f41a6bc91ded6e \ + --hash=sha256:61f89436cbfede4bc4e91b4397eaa3e2108ebe96d05e93d6ccc95ab5714be512 \ + --hash=sha256:62136da96a973bd2557f06ddd4e8e807f9e13cbb0bfb9cc06cfe6d98ea90dfe0 \ + --hash=sha256:64585e1dba664dc67c7cdabd56c1e5685233fbb1fc1966cfba2a340ec0dfff7b \ + --hash=sha256:65308f4b4890aa12d9b6ad9f2844b7ee42c7f7a4fd3390425b242ffc57498f48 \ + --hash=sha256:66b689c107857eceabf2cf3d3fc699c3c0fe8ccd18df2219d978c0283e4c508a \ + --hash=sha256:6a41c120c3dbc0d81a8e8adc73312d668cd34acd7725f036992b1b72d22c1772 \ + --hash=sha256:6f77fa49079891a4aab203d0b1744acc85577ed16d767b52fc089d83faf8d8ed \ + --hash=sha256:72c68dda124a1a138340fb62fa21b9bf4848437d9ca60bd35db36f2d3345f373 \ + --hash=sha256:752bf8a74412b9892f4e5b58f2f890a039f57037f52c89a740757ebd807f33ea \ + --hash=sha256:76e79bc28a65f467e0409098fa2c4376931fd3207fbeb6b956c7c476d53746dd \ + --hash=sha256:774d45b1fac1461f48698a9d4b5fa19a69d47ece02fa469825b442263f04021f \ + --hash=sha256:77da4c6bfa20dd5ea25cbf12c76f181a8e8cd7ea231c673828d0386b1740b8dc \ + --hash=sha256:77ea385f7dd5b5676d7fd943292ffa18fbf5c72ba98f7d09fc1fb9e819b34c23 \ + --hash=sha256:80080816b4f52a9d886e67f1f96912891074903238fe54f2de8b786f86baded2 \ + --hash=sha256:80a539906390591dd39ebb8d773771dc4db82ace6372c4d41e2d293f8e32b8db \ + --hash=sha256:82d17e94d735c99621bf8ebf9995f870a6b3e6d14543b99e201ae046dfe7de70 \ + --hash=sha256:837bb6764be6919963ef41235fd56a6486b132ea64afe5fafb4cb279ac44f259 \ + --hash=sha256:84433dddea68571a6d6bd4fbf8ff398236031149116a7fff6f777ff95cad3df9 \ + --hash=sha256:8c24f21fa2af4bb9f2c492a86fe0c34e6d2c63812a839590edaf177b7398f700 \ + --hash=sha256:8ed7d27cb56b3e058d3cf684d7200703bcae623e1dcc06ed1e18ecda39fee003 \ + --hash=sha256:9206649ec587e6b02bd124fb7799b86cddec350f6f6c14bc82a2b70183e708ba \ + --hash=sha256:983b6efd649723474f29ed42e1467f90a35a74793437d0bc64a5bf482bedfa0a \ + --hash=sha256:98da17ce9cbf3bfe4617e836d561e433f871129e3a7ac16d6ef4c680f13a839c \ + --hash=sha256:9c236e635582742fee16603042553d276cca506e824fa2e6489db04039521e90 \ + --hash=sha256:9da6bc32faac9a293ddfdcb9108d4b20416219461e4ec64dfea8383cac186690 \ + --hash=sha256:a05e6d6218461eb1b4771d973728f0133b2a4613a6779995df557f70794fd60f \ + --hash=sha256:a0817825b900fcd43ac5d05b8b3079937073d2b1ff9cf89427590718b70dd840 \ + --hash=sha256:a4ae99c57668ca1e78597d8b06d5af837f377f340f4cce993b551b2d7731778d \ + --hash=sha256:a8c86881813a78a6f4508ef9daf9d4995b8ac2d147dcb1a450448941398091c9 \ + --hash=sha256:a8fffdbd9d1408006baaf02f1068d7dd1f016c6bcb7538682622c556e7b68e35 \ + --hash=sha256:a9b07268d0c3ca5c170a385a0ab9fb7fdd9f5fd866be004c4ea39e44edce47dd \ + --hash=sha256:ab19a2d91963ed9e42b4e8d77cd847ae8381576585bad79dbd0a8837a9f6620a \ + --hash=sha256:ac184f87ff521f4840e6ea0b10c0ec90c6b1dcd0bad2f1e4a9a1b4fa177982ea \ + --hash=sha256:b0e166f698c5a3e914947388c162be2583e0c638a4703fc6a543e23a88dea3c1 \ + --hash=sha256:b2170c7e0367dde86a2647ed5b6f57394ea7f53545746104c6b09fc1f4223573 \ + --hash=sha256:b2d8c62d08e7255f68f7a740bae85b3c9b8e5466baa9cbf7f57f1cde0ac6bc09 \ + --hash=sha256:b4567955a6bc1b20e9c31612e615af6b53733491aeaa19a6b3b37f3b65477094 \ + --hash=sha256:b69bb4f51daf461b15e7b3db033160937d3ff88303a7bc808c67bbc1eaf98c78 \ + --hash=sha256:b8c0bd73aeac689beacd4e7667d48c299f61b959475cdbb91e7d3d88d27c56b9 \ + --hash=sha256:be9b5b8659dff1f913039c2feee1aca499cfbc19e98fa12bc85e037c17ec6ca5 \ + --hash=sha256:bf0a05b6059c0528477fba9054d09179beb63744355cab9f38059548fedd46a9 \ + --hash=sha256:c16842b846a8d2a145223f520b7e18b57c8f476924bda92aeee3a88d11cfc391 \ + --hash=sha256:c363b53e257246a954ebc7c488304b5592b9c53fbe74d03bc1c64dda153fb847 \ + --hash=sha256:c7c517d74bea1a6afd39aa612fa025e6b8011982a0897768a2f7c8ab4ebb78a2 \ + --hash=sha256:d20fd853fbb5807c8e84c136c278827b6167ded66c72ec6f9a14b863d809211c \ + --hash=sha256:d2240ddc86b74966c34554c49d00eaafa8200a18d3a5b6ffbf7da63b11d74ee2 \ + --hash=sha256:d477ed829077cd945b01fc3115edd132c47e6540ddcd96ca169facff28173057 \ + --hash=sha256:d50d31bfedd53a928fed6707b15a8dbeef011bb6366297cc435accc888b27c20 \ + --hash=sha256:dc1d33abb8a0d754ea4763bad944fd965d3d95b5baef6b121c0c9013eaf1907d \ + --hash=sha256:dc5d1a49d3f8262be192589a4b72f0d03b72dcf46c51ad5852a4fdc67be7b9e4 \ + --hash=sha256:e2d1a054f8f0a191004675755448d12be47fa9bebbcffa3cdf01db19f2d30a54 \ + --hash=sha256:e7792606d606c8df5277c32ccb58f29b9b8603bf83b48639b7aedf6df4fe8171 \ + --hash=sha256:ed1708dbf4d2e3a1c5c69110ba2b4eb6678262028afd6c6fbcc5a8dac9cda68e \ + --hash=sha256:f2d4380bf5f62daabd7b751ea2339c1a21d1c9463f1feb7fc2bdcea2c29c3160 \ + --hash=sha256:f3513916e8c645d0610815c257cbfd3242adfd5c4cfa78be514e5a3ebb42a41b \ + --hash=sha256:f8346bfa098532bc1fb6c7ef06783e969d87a99dd1d2a5a18a892c1d7a643c58 \ + --hash=sha256:f83fa6cae3fff8e98691248c9320356971b59678a17f20656a9e59cd32cee6d8 \ + --hash=sha256:fa6ce8b52c5987b3e34d5674b0ab529a4602b632ebab0a93b07bfb4dfc8f8a33 \ + --hash=sha256:fb2b1ecfef1e67897d336de3a0e3f52478182d6a47eda86cbd42504c5cbd009a \ + --hash=sha256:fc9ca1c9718cb3b06634c7c8dec57d24e9438b2aa9a0f02b8bb36bf478538880 \ + --hash=sha256:fd30d9c67d13d891f2360b2a120186729c111238ac63b43dbd37a5a40670b8ca \ + --hash=sha256:fd7699e8fd9969f455ef2926221e0233f81a2542921471382e77a9e2f2b57f4b \ + --hash=sha256:fe3b385d996ee0822fd46528d9f0443b880d4d05528fd26a9119a54ec3f91c69 + # via -r dependency_support/pip_requirements.in diff --git a/xls/modules/rle/rle_common.x b/xls/modules/rle/rle_common.x index 8b1217ff2c..9410c3e9e9 100644 --- a/xls/modules/rle/rle_common.x +++ b/xls/modules/rle/rle_common.x @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +import std; + // This file defines RLE common data structures // @@ -24,6 +26,15 @@ pub struct PlainData { last: bool, // flush RLE } +// Structure contains multiple uncompressed symbols. +// Structure is used as an output from a advanced RLE decoder. +// FIXME: add default value DATA_WIDTH_LOG2: u32 = {std::clog2(DATA_WIDTH + u32:1) } (https://github.com/google/xls/issues/1425) +pub struct PlainDataWithLen { + symbols: uN[DATA_WIDTH], + length: uN[DATA_WIDTH_LOG2], + last: bool, +} + // Structure contains compressed (symbol, counter) pairs. // Structure is used as an output from RLE encoder and // as an input to RLE decoder. diff --git a/xls/modules/zstd/BUILD b/xls/modules/zstd/BUILD index 8717497922..990e53aa10 100644 --- a/xls/modules/zstd/BUILD +++ b/xls/modules/zstd/BUILD @@ -17,11 +17,11 @@ load("@rules_hdl//place_and_route:build_defs.bzl", "place_and_route") load("@rules_hdl//synthesis:build_defs.bzl", "benchmark_synth", "synthesize_rtl") load("@rules_hdl//verilog:providers.bzl", "verilog_library") +load("@xls_pip_deps//:requirements.bzl", "requirement") load( "//xls/build_rules:xls_build_defs.bzl", "xls_benchmark_ir", "xls_benchmark_verilog", - "xls_dslx_ir", "xls_dslx_library", "xls_dslx_test", "xls_dslx_verilog", @@ -33,6 +33,34 @@ package( licenses = ["notice"], ) +exports_files(["xls_fifo_wrapper.v"]) + +CLOCK_PERIOD_PS = "750" +# Clock periods for modules that exceed the 750ps critical path in IR benchmark + +common_codegen_args = { + "delay_model": "asap7", + "reset": "rst", + "worst_case_throughput": "1", + "use_system_verilog": "false", + "clock_period_ps": CLOCK_PERIOD_PS, + "clock_margin_percent": "20", + "multi_proc": "true", +} + +xls_dslx_library( + name = "math_dslx", + srcs = [ + "math.x", + ], +) + +xls_dslx_test( + name = "math_dslx_test", + library = ":math_dslx", + tags = ["manual"], +) + xls_dslx_library( name = "buffer_dslx", srcs = [ @@ -64,23 +92,17 @@ xls_dslx_test( tags = ["manual"], ) +window_buffer_codegen_args = common_codegen_args | { + "module_name": "WindowBuffer64", + "clock_period_ps": "0", + "pipeline_stages": "1", +} + xls_dslx_verilog( name = "window_buffer_verilog", - codegen_args = { - "module_name": "WindowBuffer64", - "delay_model": "asap7", - "pipeline_stages": "2", - "reset": "rst", - "use_system_verilog": "false", - }, + codegen_args = window_buffer_codegen_args, dslx_top = "WindowBuffer64", library = ":window_buffer_dslx", - # TODO: 2024-01-25: Workaround for https://github.com/google/xls/issues/869 - # Force proc inlining and set last internal proc as top proc for IR optimization - opt_ir_args = { - "inline_procs": "true", - "top": "__window_buffer__WindowBuffer64__WindowBuffer_0__64_32_48_next", - }, tags = ["manual"], verilog_file = "window_buffer.v", ) @@ -88,9 +110,9 @@ xls_dslx_verilog( xls_benchmark_ir( name = "window_buffer_opt_ir_benchmark", src = ":window_buffer_verilog.opt.ir", - benchmark_ir_args = { - "pipeline_stages": "2", - "delay_model": "asap7", + benchmark_ir_args = window_buffer_codegen_args | { + "pipeline_stages": "10", + "top": "__window_buffer__WindowBuffer64__WindowBuffer_0__64_32_48_next", }, tags = ["manual"], ) @@ -121,7 +143,7 @@ benchmark_synth( place_and_route( name = "window_buffer_place_and_route", - clock_period = "750", + clock_period = CLOCK_PERIOD_PS, core_padding_microns = 2, min_pin_distance = "0.5", placement_density = "0.30", @@ -131,23 +153,6 @@ place_and_route( target_die_utilization_percentage = "10", ) -xls_dslx_library( - name = "magic_dslx", - srcs = [ - "magic.x", - ], - deps = [ - ":buffer_dslx", - ], -) - -xls_dslx_test( - name = "magic_dslx_test", - dslx_test_args = {"compare": "jit"}, - library = ":magic_dslx", - tags = ["manual"], -) - cc_library( name = "data_generator", srcs = ["data_generator.cc"], @@ -169,133 +174,182 @@ cc_library( ) xls_dslx_library( - name = "frame_header_dslx", + name = "common_dslx", srcs = [ - "frame_header.x", - ], - deps = [ - ":buffer_dslx", + "common.x", ], + deps = [], ) xls_dslx_library( - name = "common_dslx", + name = "frame_header_dec_dslx", srcs = [ - "common.x", + "frame_header_dec.x", + ], + deps = [ + "//xls/modules/zstd/memory:axi_dslx", + "//xls/modules/zstd/memory:mem_reader_dslx", ], - deps = [], ) xls_dslx_test( - name = "frame_header_dslx_test", - dslx_test_args = {"compare": "jit"}, - library = ":frame_header_dslx", + name = "frame_header_dec_dslx_test", + library = ":frame_header_dec_dslx", tags = ["manual"], ) -xls_dslx_library( - name = "frame_header_test_dslx", +frame_header_dec_codegen_args = common_codegen_args | { + "module_name": "FrameHeaderDecoder", + "clock_period_ps": "0", + "pipeline_stages": "6", +} + +xls_dslx_verilog( + name = "frame_header_dec_verilog", + codegen_args = frame_header_dec_codegen_args, + dslx_top = "FrameHeaderDecoderInst", + library = ":frame_header_dec_dslx", + tags = ["manual"], + verilog_file = "frame_header_dec.v", +) + +xls_benchmark_ir( + name = "frame_header_dec_opt_ir_benchmark", + src = ":frame_header_dec_verilog.opt.ir", + benchmark_ir_args = frame_header_dec_codegen_args | { + "top": "__frame_header_dec__FrameHeaderDecoderInst__FrameHeaderDecoder_0__16_32_30_5_next", + "pipeline_stages": "10", + }, + tags = ["manual"], +) + +verilog_library( + name = "frame_header_dec_verilog_lib", srcs = [ - "frame_header_test.x", + ":frame_header_dec.v", ], + tags = ["manual"], +) + +synthesize_rtl( + name = "frame_header_dec_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "FrameHeaderDecoder", deps = [ - ":buffer_dslx", - ":frame_header_dslx", + ":frame_header_dec_verilog_lib", ], ) -cc_test( - name = "frame_header_cc_test", +benchmark_synth( + name = "frame_header_dec_benchmark_synth", + synth_target = ":frame_header_dec_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "frame_header_dec_place_and_route", + clock_period = CLOCK_PERIOD_PS, + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":frame_header_dec_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + +xls_dslx_library( + name = "block_header_dslx", srcs = [ - "frame_header_test.cc", + "block_header.x", ], - data = [ - ":frame_header_test_dslx", + deps = [ + ":common_dslx", + ], +) + +xls_dslx_test( + name = "block_header_dslx_test", + dslx_test_args = {"compare": "jit"}, + library = ":block_header_dslx", + tags = ["manual"], +) + +xls_dslx_library( + name = "block_header_dec_dslx", + srcs = [ + "block_header_dec.x", ], - shard_count = 50, deps = [ - ":data_generator", - "//xls/common:xls_gunit_main", - "//xls/common/file:filesystem", - "//xls/common/file:get_runfile_path", - "//xls/common/fuzzing:fuzztest", - "//xls/common/status:matchers", - "//xls/common/status:ret_check", - "//xls/dslx:create_import_data", - "//xls/dslx:import_data", - "//xls/dslx:parse_and_typecheck", - "//xls/dslx/ir_convert:convert_options", - "//xls/dslx/ir_convert:ir_converter", - "//xls/dslx/type_system:parametric_env", - "//xls/ir:bits", - "//xls/ir:value", - "//xls/simulation:sim_test_base", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/status:statusor", - "@com_google_absl//absl/types:span", - "@com_google_googletest//:gtest", - "@zstd", + ":block_header_dslx", + ":common_dslx", + "//xls/modules/zstd/memory:mem_reader_dslx", ], ) +xls_dslx_test( + name = "block_header_dec_dslx_test", + library = ":block_header_dec_dslx", + tags = ["manual"], +) + +block_header_dec_codegen_args = common_codegen_args | { + "module_name": "BlockHeaderDec", + "pipeline_stages": "1", +} + xls_dslx_verilog( - name = "frame_header_verilog", - codegen_args = { - "module_name": "FrameHeaderDecoder", - "delay_model": "asap7", - "pipeline_stages": "9", - "reset": "rst", - "reset_data_path": "false", - "use_system_verilog": "false", - }, - dslx_top = "parse_frame_header_128", - library = ":frame_header_test_dslx", + name = "block_header_dec_verilog", + codegen_args = block_header_dec_codegen_args, + dslx_top = "BlockHeaderDecoderInst", + library = ":block_header_dec_dslx", tags = ["manual"], - verilog_file = "frame_header.v", + verilog_file = "block_header_dec.v", ) xls_benchmark_ir( - name = "frame_header_opt_ir_benchmark", - src = ":frame_header_verilog.opt.ir", - benchmark_ir_args = { - "pipeline_stages": "9", - "delay_model": "asap7", + name = "block_header_dec_opt_ir_benchmark", + src = ":block_header_dec_verilog.opt.ir", + benchmark_ir_args = block_header_dec_codegen_args | { + "pipeline_stages": "10", + "top": "__block_header_dec__BlockHeaderDecoderInst__BlockHeaderDecoder_0__16_64_next", }, tags = ["manual"], ) verilog_library( - name = "frame_header_verilog_lib", + name = "block_header_dec_verilog_lib", srcs = [ - ":frame_header.v", + ":block_header_dec.v", ], tags = ["manual"], ) synthesize_rtl( - name = "frame_header_synth_asap7", + name = "block_header_dec_synth_asap7", standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", tags = ["manual"], - top_module = "FrameHeaderDecoder", + top_module = "BlockHeaderDec", deps = [ - ":frame_header_verilog_lib", + ":block_header_dec_verilog_lib", ], ) benchmark_synth( - name = "frame_header_benchmark_synth", - synth_target = ":frame_header_synth_asap7", + name = "block_header_dec_benchmark_synth", + synth_target = ":block_header_dec_synth_asap7", tags = ["manual"], ) place_and_route( - name = "frame_header_place_and_route", - clock_period = "750", + name = "block_header_dec_place_and_route", + clock_period = CLOCK_PERIOD_PS, core_padding_microns = 2, min_pin_distance = "0.5", placement_density = "0.30", stop_after_step = "global_routing", - synthesized_rtl = ":frame_header_synth_asap7", + synthesized_rtl = ":block_header_dec_synth_asap7", tags = ["manual"], target_die_utilization_percentage = "10", ) @@ -306,8 +360,8 @@ xls_dslx_library( "raw_block_dec.x", ], deps = [ - ":buffer_dslx", ":common_dslx", + "//xls/modules/zstd/memory:mem_reader_dslx", ], ) @@ -318,16 +372,15 @@ xls_dslx_test( tags = ["manual"], ) +raw_block_dec_codegen_args = common_codegen_args | { + "module_name": "RawBlockDecoder", + "pipeline_stages": "1", +} + xls_dslx_verilog( name = "raw_block_dec_verilog", - codegen_args = { - "module_name": "RawBlockDecoder", - "delay_model": "asap7", - "pipeline_stages": "2", - "reset": "rst", - "use_system_verilog": "false", - }, - dslx_top = "RawBlockDecoder", + codegen_args = raw_block_dec_codegen_args, + dslx_top = "RawBlockDecoderInst", library = ":raw_block_dec_dslx", tags = ["manual"], verilog_file = "raw_block_dec.v", @@ -336,9 +389,9 @@ xls_dslx_verilog( xls_benchmark_ir( name = "raw_block_dec_opt_ir_benchmark", src = ":raw_block_dec_verilog.opt.ir", - benchmark_ir_args = { - "pipeline_stages": "2", - "delay_model": "asap7", + benchmark_ir_args = raw_block_dec_codegen_args | { + "pipeline_stages": "10", + "top": "__raw_block_dec__RawBlockDecoderInst__RawBlockDecoder_0__32_32_next", }, tags = ["manual"], ) @@ -369,7 +422,7 @@ benchmark_synth( place_and_route( name = "raw_block_dec_place_and_route", - clock_period = "750", + clock_period = CLOCK_PERIOD_PS, core_padding_microns = 2, min_pin_distance = "0.5", placement_density = "0.30", @@ -385,10 +438,7 @@ xls_dslx_library( "rle_block_dec.x", ], deps = [ - ":buffer_dslx", ":common_dslx", - "//xls/modules/rle:rle_common_dslx", - "//xls/modules/rle:rle_dec_dslx", ], ) @@ -399,23 +449,16 @@ xls_dslx_test( tags = ["manual"], ) +rle_block_dec_codegen_args = common_codegen_args | { + "module_name": "RleBlockDecoder", + "pipeline_stages": "1", +} + xls_dslx_verilog( name = "rle_block_dec_verilog", - codegen_args = { - "module_name": "RleBlockDecoder", - "delay_model": "asap7", - "pipeline_stages": "3", - "reset": "rst", - "use_system_verilog": "false", - }, - dslx_top = "RleBlockDecoder", + codegen_args = rle_block_dec_codegen_args, + dslx_top = "RleBlockDecoderInst", library = ":rle_block_dec_dslx", - # TODO: 2024-01-15: Workaround for https://github.com/google/xls/issues/869 - # Force proc inlining and set last internal proc as top proc for IR optimization - opt_ir_args = { - "inline_procs": "true", - "top": "__rle_block_dec__RleBlockDecoder__BatchPacker_0_next", - }, tags = ["manual"], verilog_file = "rle_block_dec.v", ) @@ -423,9 +466,9 @@ xls_dslx_verilog( xls_benchmark_ir( name = "rle_block_dec_opt_ir_benchmark", src = ":rle_block_dec_verilog.opt.ir", - benchmark_ir_args = { - "pipeline_stages": "3", - "delay_model": "asap7", + benchmark_ir_args = rle_block_dec_codegen_args | { + "pipeline_stages": "10", + "top": "__rle_block_dec__RleBlockDecoderInst__RleBlockDecoder_0__64_next", }, tags = ["manual"], ) @@ -456,7 +499,7 @@ benchmark_synth( place_and_route( name = "rle_block_dec_place_and_route", - clock_period = "750", + clock_period = CLOCK_PERIOD_PS, core_padding_microns = 2, min_pin_distance = "0.5", placement_density = "0.30", @@ -466,24 +509,6 @@ place_and_route( target_die_utilization_percentage = "10", ) -xls_dslx_library( - name = "block_header_dslx", - srcs = [ - "block_header.x", - ], - deps = [ - ":buffer_dslx", - ":common_dslx", - ], -) - -xls_dslx_test( - name = "block_header_dslx_test", - dslx_test_args = {"compare": "jit"}, - library = ":block_header_dslx", - tags = ["manual"], -) - xls_dslx_library( name = "dec_mux_dslx", srcs = [ @@ -501,15 +526,15 @@ xls_dslx_test( tags = ["manual"], ) +dec_mux_codegen_args = common_codegen_args | { + "module_name": "DecoderMux", + "clock_period_ps": "0", + "pipeline_stages": "3", +} + xls_dslx_verilog( name = "dec_mux_verilog", - codegen_args = { - "module_name": "DecoderMux", - "delay_model": "asap7", - "pipeline_stages": "2", - "reset": "rst", - "use_system_verilog": "false", - }, + codegen_args = dec_mux_codegen_args, dslx_top = "DecoderMux", library = ":dec_mux_dslx", tags = ["manual"], @@ -519,9 +544,8 @@ xls_dslx_verilog( xls_benchmark_ir( name = "dec_mux_opt_ir_benchmark", src = ":dec_mux_verilog.opt.ir", - benchmark_ir_args = { - "pipeline_stages": "2", - "delay_model": "asap7", + benchmark_ir_args = dec_mux_codegen_args | { + "pipeline_stages": "10", }, tags = ["manual"], ) @@ -552,7 +576,7 @@ benchmark_synth( place_and_route( name = "dec_mux_place_and_route", - clock_period = "750", + clock_period = CLOCK_PERIOD_PS, core_padding_microns = 2, min_pin_distance = "0.5", placement_density = "0.30", @@ -563,521 +587,830 @@ place_and_route( ) xls_dslx_library( - name = "dec_demux_dslx", + name = "ram_printer_dslx", + srcs = ["ram_printer.x"], + deps = [ + "//xls/examples:ram_dslx", + ], +) + +xls_dslx_test( + name = "ram_printer_dslx_test", + dslx_test_args = {"compare": "jit"}, + library = ":ram_printer_dslx", + tags = ["manual"], +) + +xls_dslx_library( + name = "sequence_executor_dslx", srcs = [ - "dec_demux.x", + "sequence_executor.x", ], deps = [ - ":block_header_dslx", ":common_dslx", + ":ram_printer_dslx", + "//xls/examples:ram_dslx", + "//xls/modules/zstd/memory:mem_writer_dslx", ], ) xls_dslx_test( - name = "dec_demux_dslx_test", - dslx_test_args = {"compare": "jit"}, - library = ":dec_demux_dslx", + name = "sequence_executor_dslx_test", + dslx_test_args = { + "compare": "none", + }, + library = ":sequence_executor_dslx", tags = ["manual"], ) +sequence_executor_codegen_args = common_codegen_args | { + "module_name": "sequence_executor", + "clock_period_ps": "0", + "generator": "pipeline", + "delay_model": "asap7", + "ram_configurations": ",".join([ + "{ram_name}:1R1W:{rd_req}:{rd_resp}:{wr_req}:{wr_resp}:{latency}".format( + latency = 5, + ram_name = "ram{}".format(num), + rd_req = "sequence_executor__rd_req_m{}_s".format(num), + rd_resp = "sequence_executor__rd_resp_m{}_r".format(num), + wr_req = "sequence_executor__wr_req_m{}_s".format(num), + wr_resp = "sequence_executor__wr_resp_m{}_r".format(num), + ) + for num in range(7) + ]), + "pipeline_stages": "6", + "reset": "rst", + "reset_data_path": "true", + "reset_active_low": "false", + "reset_asynchronous": "true", + "flop_inputs": "false", + "flop_single_value_channels": "false", + "flop_outputs": "false", +} + xls_dslx_verilog( - name = "dec_demux_verilog", - codegen_args = { - "module_name": "DecoderDemux", - "delay_model": "asap7", - "pipeline_stages": "2", - "reset": "rst", - "use_system_verilog": "false", + name = "sequence_executor_verilog", + codegen_args = sequence_executor_codegen_args, + dslx_top = "SequenceExecutorZstd", + library = ":sequence_executor_dslx", + opt_ir_args = { + "inline_procs": "true", + "top": "__sequence_executor__SequenceExecutorZstd__SequenceExecutor_0__16_64_64_0_0_0_13_8192_65536_next", }, - dslx_top = "DecoderDemux", - library = ":dec_demux_dslx", tags = ["manual"], - verilog_file = "dec_demux.v", + verilog_file = "sequence_executor.v", ) xls_benchmark_ir( - name = "dec_demux_opt_ir_benchmark", - src = ":dec_demux_verilog.opt.ir", - benchmark_ir_args = { - "pipeline_stages": "2", - "delay_model": "asap7", + name = "sequence_executor_opt_ir_benchmark", + src = ":sequence_executor_verilog.opt.ir", + benchmark_ir_args = sequence_executor_codegen_args | { + "pipeline_stages": "10", }, tags = ["manual"], ) +xls_benchmark_verilog( + name = "sequence_executor_verilog_benchmark", + tags = ["manual"], + verilog_target = "sequence_executor_verilog", +) + verilog_library( - name = "dec_demux_verilog_lib", + name = "sequence_executor_lib", srcs = [ - ":dec_demux.v", + ":sequence_executor.v", ], tags = ["manual"], ) synthesize_rtl( - name = "dec_demux_synth_asap7", + name = "sequence_executor_asap7", standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", tags = ["manual"], - top_module = "DecoderDemux", + top_module = "sequence_executor", deps = [ - ":dec_demux_verilog_lib", + ":sequence_executor_lib", ], ) benchmark_synth( - name = "dec_demux_benchmark_synth", - synth_target = ":dec_demux_synth_asap7", + name = "sequence_executor_benchmark_synth", + synth_target = ":sequence_executor_asap7", tags = ["manual"], ) place_and_route( - name = "dec_demux_place_and_route", - clock_period = "750", + name = "sequence_executor_place_and_route", + clock_period = CLOCK_PERIOD_PS, core_padding_microns = 2, - min_pin_distance = "0.5", + min_pin_distance = "0.4", placement_density = "0.30", stop_after_step = "global_routing", - synthesized_rtl = ":dec_demux_synth_asap7", + synthesized_rtl = ":sequence_executor_asap7", tags = ["manual"], - target_die_utilization_percentage = "5", + target_die_utilization_percentage = "10", ) xls_dslx_library( - name = "block_dec_dslx", + name = "axi_csr_accessor_dslx", srcs = [ - "block_dec.x", + "axi_csr_accessor.x", ], deps = [ - ":common_dslx", - ":dec_demux_dslx", - ":dec_mux_dslx", - ":raw_block_dec_dslx", - ":rle_block_dec_dslx", + ":csr_config_dslx", + "//xls/modules/zstd/memory:axi_dslx", ], ) xls_dslx_test( - name = "block_dec_dslx_test", - dslx_test_args = {"compare": "jit"}, - library = ":block_dec_dslx", + name = "axi_csr_accessor_dslx_test", + library = ":axi_csr_accessor_dslx", tags = ["manual"], ) +axi_csr_accessor_codegen_args = common_codegen_args | { + "module_name": "AxiCsrAccessor", + "pipeline_stages": "1", +} + xls_dslx_verilog( - name = "block_dec_verilog", - codegen_args = { - "module_name": "BlockDecoder", - "delay_model": "asap7", - "pipeline_stages": "2", - "reset": "rst", - "use_system_verilog": "false", - }, - dslx_top = "BlockDecoder", - library = ":block_dec_dslx", - # TODO: 2024-01-15: Workaround for https://github.com/google/xls/issues/869 - # Force proc inlining and set last internal proc as top proc for IR optimization - opt_ir_args = { - "inline_procs": "true", - "top": "__xls_modules_zstd_dec_mux__BlockDecoder__DecoderMux_0_next", - }, + name = "axi_csr_accessor_verilog", + codegen_args = axi_csr_accessor_codegen_args, + dslx_top = "AxiCsrAccessorInst", + library = ":axi_csr_accessor_dslx", tags = ["manual"], - verilog_file = "block_dec.v", + verilog_file = "axi_csr_accessor.v", ) xls_benchmark_ir( - name = "block_dec_opt_ir_benchmark", - src = ":block_dec_verilog.opt.ir", - benchmark_ir_args = { - "pipeline_stages": "2", - "delay_model": "asap7", + name = "axi_csr_accessor_opt_ir_benchmark", + src = ":axi_csr_accessor_verilog.opt.ir", + benchmark_ir_args = axi_csr_accessor_codegen_args | { + "pipeline_stages": "10", + "top": "__axi_csr_accessor__AxiCsrAccessorInst__AxiCsrAccessor_0__16_32_4_4_2_4_16_next", }, tags = ["manual"], ) verilog_library( - name = "block_dec_verilog_lib", + name = "axi_csr_accessor_verilog_lib", srcs = [ - ":block_dec.v", + ":axi_csr_accessor.v", ], tags = ["manual"], ) synthesize_rtl( - name = "block_dec_synth_asap7", + name = "axi_csr_accessor_synth_asap7", standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", tags = ["manual"], - top_module = "BlockDecoder", + top_module = "AxiCsrAccessor", deps = [ - ":block_dec_verilog_lib", + ":axi_csr_accessor_verilog_lib", ], ) benchmark_synth( - name = "block_dec_benchmark_synth", - synth_target = ":block_dec_synth_asap7", + name = "axi_csr_accessor_benchmark_synth", + synth_target = ":axi_csr_accessor_synth_asap7", tags = ["manual"], ) place_and_route( - name = "block_dec_place_and_route", - clock_period = "750", + name = "axi_csr_accessor_place_and_route", + clock_period = CLOCK_PERIOD_PS, core_padding_microns = 2, min_pin_distance = "0.5", placement_density = "0.30", stop_after_step = "global_routing", - synthesized_rtl = ":block_dec_synth_asap7", + synthesized_rtl = ":axi_csr_accessor_synth_asap7", tags = ["manual"], target_die_utilization_percentage = "10", ) xls_dslx_library( - name = "ram_printer_dslx", - srcs = ["ram_printer.x"], + name = "csr_config_dslx", + srcs = [ + "csr_config.x", + ], deps = [ - "//xls/examples:ram_dslx", + "//xls/modules/zstd/memory:axi_dslx", ], ) xls_dslx_test( - name = "ram_printer_dslx_test", - dslx_test_args = {"compare": "jit"}, - library = ":ram_printer_dslx", + name = "csr_config_dslx_test", + library = ":csr_config_dslx", tags = ["manual"], ) -xls_dslx_library( - name = "sequence_executor_dslx", +csr_config_codegen_args = common_codegen_args | { + "module_name": "CsrConfig", + "pipeline_stages": "3", +} + +xls_dslx_verilog( + name = "csr_config_verilog", + codegen_args = csr_config_codegen_args, + dslx_top = "CsrConfigInst", + library = ":csr_config_dslx", + tags = ["manual"], + verilog_file = "csr_config.v", +) + +xls_benchmark_ir( + name = "csr_config_opt_ir_benchmark", + src = ":csr_config_verilog.opt.ir", + benchmark_ir_args = csr_config_codegen_args | { + "pipeline_stages": "10", + "top": "__csr_config__CsrConfigInst__CsrConfig_0__2_32_4_32_2_4_next", + }, + tags = ["manual"], +) + +verilog_library( + name = "csr_config_verilog_lib", srcs = [ - "sequence_executor.x", + ":csr_config.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "csr_config_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "CsrConfig", + deps = [ + ":csr_config_verilog_lib", ], +) + +benchmark_synth( + name = "csr_config_benchmark_synth", + synth_target = ":csr_config_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "csr_config_place_and_route", + clock_period = CLOCK_PERIOD_PS, + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":csr_config_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + +py_binary( + name = "zstd_test_frames_generator", + srcs = ["zstd_frame_dslx.py"], + imports = ["."], + main = "zstd_frame_dslx.py", + tags = ["manual"], + visibility = ["//xls:xls_users"], deps = [ - ":common_dslx", - ":ram_printer_dslx", - "//xls/examples:ram_dslx", + requirement("zstandard"), + "//xls/common:runfiles", + "//xls/modules/zstd/cocotb:data_generator", + "@com_google_absl_py//absl:app", + "@com_google_absl_py//absl/flags", + "@com_google_protobuf//:protobuf_python", ], ) +genrule( + name = "zstd_test_frames_generate", + srcs = [], + outs = ["zstd_frame_testcases.x"], + cmd = "$(location :zstd_test_frames_generator) -n 2 --btype RAW RLE -o $@", + tools = [":zstd_test_frames_generator"], +) + +zstd_dec_deps = [ + ":axi_csr_accessor_dslx", + ":block_header_dec_dslx", + ":block_header_dslx", + ":common_dslx", + ":csr_config_dslx", + ":dec_mux_dslx", + ":frame_header_dec_dslx", + ":raw_block_dec_dslx", + ":rle_block_dec_dslx", + ":sequence_executor_dslx", + "//xls/examples:ram_dslx", + "//xls/modules/zstd/memory:mem_reader_dslx", + "//xls/modules/zstd/memory:mem_writer_dslx", + "//xls/modules/zstd/memory:axi_ram_dslx", +] + +xls_dslx_library( + name = "zstd_dec_dslx", + srcs = [ + "zstd_dec.x", + ], + deps = zstd_dec_deps, +) + xls_dslx_test( - name = "sequence_executor_dslx_test", - dslx_test_args = { - "compare": "none", - }, - library = ":sequence_executor_dslx", + name = "zstd_dec_dslx_test", + srcs = [ + "zstd_dec.x", + "zstd_dec_test.x", + "zstd_frame_testcases.x", + ], tags = ["manual"], + deps = zstd_dec_deps, ) +zstd_dec_codegen_args = common_codegen_args | { + "module_name": "ZstdDecoder", + "clock_period_ps": "0", + "pipeline_stages": "10", + "flop_inputs_kind": "skid", + "flop_outputs_kind": "skid", +} + xls_dslx_verilog( - name = "sequence_executor_verilog", - codegen_args = { - "module_name": "sequence_executor", - "generator": "pipeline", - "delay_model": "asap7", - "ram_configurations": ",".join([ - "{ram_name}:1R1W:{rd_req}:{rd_resp}:{wr_req}:{wr_resp}:{latency}".format( - latency = 5, - ram_name = "ram{}".format(num), - rd_req = "sequence_executor__rd_req_m{}_s".format(num), - rd_resp = "sequence_executor__rd_resp_m{}_r".format(num), - wr_req = "sequence_executor__wr_req_m{}_s".format(num), - wr_resp = "sequence_executor__wr_resp_m{}_r".format(num), - ) - for num in range(7) - ]), - "pipeline_stages": "8", - "reset": "rst", - "reset_data_path": "true", - "reset_active_low": "false", - "reset_asynchronous": "true", - "flop_inputs": "false", - "flop_single_value_channels": "false", - "flop_outputs": "false", - "worst_case_throughput": "1", - "use_system_verilog": "false", - }, - dslx_top = "SequenceExecutorZstd", - library = ":sequence_executor_dslx", - opt_ir_args = { - "inline_procs": "true", - "top": "__sequence_executor__SequenceExecutorZstd__SequenceExecutor_0__64_0_0_0_13_8192_65536_next", - }, + name = "zstd_dec_verilog", + codegen_args = zstd_dec_codegen_args, + dslx_top = "ZstdDecoderInst", + library = ":zstd_dec_dslx", tags = ["manual"], - verilog_file = "sequence_executor.v", + verilog_file = "zstd_dec.v", +) + +zstd_dec_internal_codegen_args = common_codegen_args | { + "module_name": "ZstdDecoderInternal", + "pipeline_stages": "2", +} + +xls_dslx_verilog( + name = "zstd_dec_internal_verilog", + codegen_args = zstd_dec_internal_codegen_args, + dslx_top = "ZstdDecoderInternalInst", + library = ":zstd_dec_dslx", + tags = ["manual"], + verilog_file = "zstd_dec_internal.v", ) xls_benchmark_ir( - name = "sequence_executor_ir_benchmark", - src = ":sequence_executor_verilog.opt.ir", + name = "zstd_dec_internal_opt_ir_benchmark", + src = ":zstd_dec_internal_verilog.opt.ir", benchmark_ir_args = { - "pipeline_stages": "8", - "delay_model": "asap7", + "top": "__zstd_dec__ZstdDecoderInternalInst__ZstdDecoderInternal_0__16_64_8_4_16_next", + "pipeline_stages": "10", }, tags = ["manual"], ) -xls_benchmark_verilog( - name = "sequence_executor_verilog_benchmark", +verilog_library( + name = "zstd_dec_internal_verilog_lib", + srcs = [ + ":zstd_dec_internal.v", + ], tags = ["manual"], - verilog_target = "sequence_executor_verilog", +) + +synthesize_rtl( + name = "zstd_dec_internal_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "ZstdDecoderInternal", + deps = [ + ":zstd_dec_internal_verilog_lib", + ], +) + +benchmark_synth( + name = "zstd_dec_internal_benchmark_synth", + synth_target = ":zstd_dec_internal_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "zstd_dec_internal_place_and_route", + clock_period = CLOCK_PERIOD_PS, + core_padding_microns = 2, + min_pin_distance = "0.35", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":zstd_dec_internal_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", ) verilog_library( - name = "sequence_executor_lib", + name = "zstd_dec_verilog_lib", srcs = [ - ":sequence_executor.v", + ":xls_fifo_wrapper.v", + ":zstd_dec.v", ], tags = ["manual"], ) synthesize_rtl( - name = "sequence_executor_asap7", + name = "zstd_dec_synth_asap7", standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", tags = ["manual"], - top_module = "sequence_executor", + top_module = "ZstdDecoder", deps = [ - ":sequence_executor_lib", + ":zstd_dec_verilog_lib", ], ) benchmark_synth( - name = "sequence_executor_benchmark_synth", - synth_target = ":sequence_executor_asap7", + name = "zstd_dec_benchmark_synth", + synth_target = ":zstd_dec_synth_asap7", tags = ["manual"], ) place_and_route( - name = "sequence_executor_place_and_route", - clock_period = "750", + name = "zstd_dec_place_and_route", + clock_period = CLOCK_PERIOD_PS, core_padding_microns = 2, min_pin_distance = "0.4", placement_density = "0.30", stop_after_step = "global_routing", - synthesized_rtl = ":sequence_executor_asap7", + synthesized_rtl = ":zstd_dec_synth_asap7", tags = ["manual"], target_die_utilization_percentage = "10", ) +py_test( + name = "zstd_dec_cocotb_test", + srcs = ["zstd_dec_cocotb_test.py"], + data = [ + ":xls_fifo_wrapper.v", + ":zstd_dec.v", + ":zstd_dec_wrapper.v", + "//xls/modules/zstd/external:arbiter.v", + "//xls/modules/zstd/external:axi_crossbar.v", + "//xls/modules/zstd/external:axi_crossbar_addr.v", + "//xls/modules/zstd/external:axi_crossbar_rd.v", + "//xls/modules/zstd/external:axi_crossbar_wr.v", + "//xls/modules/zstd/external:axi_crossbar_wrapper.v", + "//xls/modules/zstd/external:axi_register_rd.v", + "//xls/modules/zstd/external:axi_register_wr.v", + "//xls/modules/zstd/external:priority_encoder.v", + "@com_icarus_iverilog//:iverilog", + "@com_icarus_iverilog//:vvp", + ], + env = {"BUILD_WORKING_DIRECTORY": "sim_build"}, + imports = ["."], + tags = ["manual"], + visibility = ["//xls:xls_users"], + deps = [ + requirement("cocotb"), + requirement("cocotbext-axi"), + requirement("pytest"), + requirement("zstandard"), + "//xls/common:runfiles", + "//xls/modules/zstd/cocotb:channel", + "//xls/modules/zstd/cocotb:data_generator", + "//xls/modules/zstd/cocotb:memory", + "//xls/modules/zstd/cocotb:utils", + "//xls/modules/zstd/cocotb:xlsstruct", + "@com_google_absl_py//absl:app", + "@com_google_absl_py//absl/flags", + "@com_google_protobuf//:protobuf_python", + ], +) + xls_dslx_library( - name = "repacketizer_dslx", + name = "hash_table_dslx", srcs = [ - "repacketizer.x", + "hash_table.x", + ], + deps = [ + "//xls/examples:ram_dslx", + ], +) + +xls_dslx_test( + name = "hash_table_dslx_test", + library = ":hash_table_dslx", +) + +hash_table_codegen_args = common_codegen_args | { + "module_name": "HashTable", + "pipeline_stages": "16", +} + +xls_dslx_verilog( + name = "hash_table_verilog", + codegen_args = hash_table_codegen_args | { + "ram_configurations": "ram:1R1W:{rd_req}:{rd_resp}:{wr_req}:{wr_resp}:{latency}".format( + latency = 5, + rd_req = "hash_table__ram_read_req_s", + rd_resp = "hash_table__ram_read_resp_r", + wr_req = "hash_table__ram_write_req_s", + wr_resp = "hash_table__ram_write_resp_r", + ), + }, + dslx_top = "HashTableInst", + library = ":hash_table_dslx", + opt_ir_args = { + "inline_procs": "true", + "top": "__hash_table__HashTableInst__HashTable_0__HashTableWriteRespHandler_0_next", + }, + verilog_file = "hash_table.v", +) + +xls_benchmark_ir( + name = "hash_table_opt_ir_benchmark", + src = ":hash_table_verilog.opt.ir", + benchmark_ir_args = hash_table_codegen_args, +) + +xls_benchmark_verilog( + name = "hash_table_verilog_benchmark", + verilog_target = "hash_table_verilog", +) + +verilog_library( + name = "hash_table_verilog_lib", + srcs = [ + ":hash_table.v", + ], +) + +synthesize_rtl( + name = "hash_table_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + top_module = "HashTable", + deps = [ + ":hash_table_verilog_lib", + ], +) + +benchmark_synth( + name = "hash_table_benchmark_synth", + synth_target = ":hash_table_asap7", +) + +place_and_route( + name = "hash_table_place_and_route", + clock_period = "750", + core_padding_microns = 2, + min_pin_distance = "0.09", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":hash_table_asap7", + target_die_utilization_percentage = "10", +) + +xls_dslx_library( + name = "aligned_parallel_ram_dslx", + srcs = [ + "aligned_parallel_ram.x", ], deps = [ ":common_dslx", + "//xls/examples:ram_dslx", ], ) xls_dslx_test( - name = "repacketizer_dslx_test", + name = "aligned_parallel_ram_dslx_test", dslx_test_args = {"compare": "jit"}, - library = ":repacketizer_dslx", + library = ":aligned_parallel_ram_dslx", tags = ["manual"], ) +aligned_parallel_ram_codegen_args = common_codegen_args | { + "module_name": "AlignedParallelRam", + "clock_period_ps": "750", + "pipeline_stages": "8", + "worst_case_throughput": "1", +} + xls_dslx_verilog( - name = "repacketizer_verilog", - codegen_args = { - "module_name": "Repacketizer", - "delay_model": "asap7", - "pipeline_stages": "2", - "reset": "rst", - "use_system_verilog": "false", - }, - dslx_top = "Repacketizer", - library = ":repacketizer_dslx", + name = "aligned_parallel_ram_verilog", + codegen_args = aligned_parallel_ram_codegen_args, + dslx_top = "AlignedParallelRamInst", + library = ":aligned_parallel_ram_dslx", tags = ["manual"], - verilog_file = "repacketizer.v", + opt_ir_args = { + "inline_procs": "true", + "top": "__aligned_parallel_ram__AlignedParallelRamInst__AlignedParallelRam_0__10_64_7_8_1_8_128_1024_next", + }, + verilog_file = "aligned_parallel_ram.v", ) xls_benchmark_ir( - name = "repacketizer_opt_ir_benchmark", - src = ":repacketizer_verilog.opt.ir", - benchmark_ir_args = { - "pipeline_stages": "2", - "delay_model": "asap7", - }, + name = "aligned_parallel_ram_opt_ir_benchmark", + src = ":aligned_parallel_ram_verilog.opt.ir", + benchmark_ir_args = aligned_parallel_ram_codegen_args, tags = ["manual"], ) verilog_library( - name = "repacketizer_verilog_lib", + name = "aligned_parallel_ram_verilog_lib", srcs = [ - ":repacketizer.v", + ":aligned_parallel_ram.v", ], tags = ["manual"], ) synthesize_rtl( - name = "repacketizer_synth_asap7", + name = "aligned_parallel_ram_synth_asap7", standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", tags = ["manual"], - top_module = "Repacketizer", + top_module = "AlignedParallelRam", deps = [ - ":repacketizer_verilog_lib", + ":aligned_parallel_ram_verilog_lib", ], ) benchmark_synth( - name = "repacketizer_benchmark_synth", - synth_target = ":repacketizer_synth_asap7", + name = "aligned_parallel_ram_benchmark_synth", + synth_target = ":aligned_parallel_ram_synth_asap7", tags = ["manual"], ) place_and_route( - name = "repacketizer_place_and_route", - clock_period = "750", + name = "aligned_parallel_ram_place_and_route", + clock_period = CLOCK_PERIOD_PS, core_padding_microns = 2, min_pin_distance = "0.5", placement_density = "0.30", stop_after_step = "global_routing", - synthesized_rtl = ":repacketizer_synth_asap7", + synthesized_rtl = ":aligned_parallel_ram_synth_asap7", tags = ["manual"], target_die_utilization_percentage = "10", ) xls_dslx_library( - name = "zstd_dec_dslx", + name = "history_buffer_dslx", srcs = [ - "zstd_dec.x", + "history_buffer.x", ], deps = [ - ":block_dec_dslx", - ":block_header_dslx", - ":buffer_dslx", ":common_dslx", - ":frame_header_dslx", - ":frame_header_test_dslx", - ":magic_dslx", - ":ram_printer_dslx", - ":repacketizer_dslx", - ":sequence_executor_dslx", + ":aligned_parallel_ram_dslx", "//xls/examples:ram_dslx", ], ) +xls_dslx_test( + name = "history_buffer_dslx_test", + dslx_test_args = {"compare": "jit"}, + library = ":history_buffer_dslx", + tags = ["manual"], +) + +history_buffer_codegen_args = common_codegen_args | { + "module_name": "HistoryBuffer", + "clock_period_ps": "750", + "pipeline_stages": "8", + "worst_case_throughput": "1", +} + xls_dslx_verilog( - name = "zstd_dec_verilog", - codegen_args = { - "module_name": "ZstdDecoder", - "generator": "pipeline", - "delay_model": "asap7", - "ram_configurations": ",".join([ - "{ram_name}:1R1W:{rd_req}:{rd_resp}:{wr_req}:{wr_resp}:{latency}".format( - latency = 5, - ram_name = "ram{}".format(num), - rd_req = "zstd_dec__ram_rd_req_{}_s".format(num), - rd_resp = "zstd_dec__ram_rd_resp_{}_r".format(num), - wr_req = "zstd_dec__ram_wr_req_{}_s".format(num), - wr_resp = "zstd_dec__ram_wr_resp_{}_r".format(num), - ) - for num in range(7) - ]), - "pipeline_stages": "10", - "reset": "rst", - "reset_data_path": "true", - "reset_active_low": "false", - "reset_asynchronous": "true", - "flop_inputs": "false", - "flop_single_value_channels": "false", - "flop_outputs": "false", - "worst_case_throughput": "1", - "use_system_verilog": "false", - }, - dslx_top = "ZstdDecoder", - library = ":zstd_dec_dslx", - # TODO: 2024-01-15: Workaround for https://github.com/google/xls/issues/869 - # Force proc inlining for IR optimization + name = "history_buffer_verilog", + codegen_args = history_buffer_codegen_args, + dslx_top = "HistoryBufferInst", + library = ":history_buffer_dslx", + tags = ["manual"], opt_ir_args = { "inline_procs": "true", + "top": "__history_buffer__HistoryBufferInst__HistoryBuffer_0__64_10_7_8_1_8_128_1024_next", }, - tags = ["manual"], - verilog_file = "zstd_dec.v", + verilog_file = "history_buffer.v", ) -xls_dslx_ir( - name = "zstd_dec_test_ir", - dslx_top = "ZstdDecoderTest", - ir_file = "zstd_dec_test.ir", - library = ":zstd_dec_dslx", +xls_benchmark_ir( + name = "history_buffer_opt_ir_benchmark", + src = ":history_buffer_verilog.opt.ir", + benchmark_ir_args = history_buffer_codegen_args, tags = ["manual"], ) -cc_test( - name = "zstd_dec_cc_test", - size = "large", +verilog_library( + name = "history_buffer_verilog_lib", srcs = [ - "zstd_dec_test.cc", + ":history_buffer.v", ], - data = [ - ":zstd_dec_test.ir", + tags = ["manual"], +) + +synthesize_rtl( + name = "history_buffer_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "HistoryBuffer", + deps = [ + ":history_buffer_verilog_lib", + ], +) + +benchmark_synth( + name = "history_buffer_benchmark_synth", + synth_target = ":history_buffer_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "history_buffer_place_and_route", + clock_period = CLOCK_PERIOD_PS, + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":history_buffer_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + +xls_dslx_library( + name = "match_finder_dslx", + srcs = [ + "match_finder.x", ], - shard_count = 50, deps = [ - ":data_generator", - "//xls/common:xls_gunit_main", - "//xls/common/file:filesystem", - "//xls/common/file:get_runfile_path", - "//xls/common/status:matchers", - "//xls/common/status:ret_check", - "//xls/interpreter:channel_queue", - "//xls/interpreter:serial_proc_runtime", - "//xls/ir", - "//xls/ir:bits", - "//xls/ir:channel", - "//xls/ir:events", - "//xls/ir:ir_parser", - "//xls/ir:value", - "//xls/jit:jit_proc_runtime", - "@com_google_absl//absl/container:flat_hash_map", - "@com_google_absl//absl/log", - "@com_google_absl//absl/status:statusor", - "@com_google_absl//absl/types:span", - "@com_google_googletest//:gtest", - "@zstd", + "//xls/modules/zstd/memory:mem_reader_dslx", + "//xls/modules/zstd/memory:mem_writer_dslx", + "//xls/modules/zstd/memory:axi_dslx", + "//xls/modules/zstd/memory:axi_ram_dslx", + ":hash_table_dslx", + ":history_buffer_dslx", ], ) -xls_benchmark_ir( - name = "zstd_dec_opt_ir_benchmark", - src = ":zstd_dec_verilog.opt.ir", - benchmark_ir_args = { - #TODO: rewrite ram in opt_ir step to perform valid IR benchmark - "pipeline_stages": "1", - "delay_model": "asap7", +xls_dslx_test( + name = "match_finder_dslx_test", + library = ":match_finder_dslx", +) + +match_finder_codegen_args = common_codegen_args | { + "module_name": "MatchFinder", + "pipeline_stages": "16", +} + +xls_dslx_verilog( + name = "match_finder_verilog", + codegen_args = match_finder_codegen_args | { + "ram_configurations": "ram:1R1W:{rd_req}:{rd_resp}:{wr_req}:{wr_resp}:{latency}".format( + latency = 5, + rd_req = "match_finder__ram_read_req_s", + rd_resp = "match_finder__ram_read_resp_r", + wr_req = "match_finder__ram_write_req_s", + wr_resp = "match_finder__ram_write_resp_r", + ), }, - tags = ["manual"], + dslx_top = "MatchFinderInst", + library = ":match_finder_dslx", + opt_ir_args = { + "inline_procs": "true", + "top": "__match_finder__MatchFinderInst__MatchFinder_0__32_64_7_8_10_1024_8_512_10_40_3_next", + }, + verilog_file = "match_finder.v", +) + +xls_benchmark_ir( + name = "match_finder_opt_ir_benchmark", + src = ":match_finder_verilog.opt.ir", + benchmark_ir_args = match_finder_codegen_args, +) + +xls_benchmark_verilog( + name = "match_finder_verilog_benchmark", + verilog_target = "match_finder_verilog", ) verilog_library( - name = "zstd_dec_verilog_lib", + name = "match_finder_verilog_lib", srcs = [ - ":zstd_dec.v", + ":match_finder.v", ], - tags = ["manual"], ) synthesize_rtl( - name = "zstd_dec_synth_asap7", + name = "match_finder_asap7", standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", - tags = ["manual"], - top_module = "ZstdDecoder", + top_module = "MatchFinder", deps = [ - ":zstd_dec_verilog_lib", + ":match_finder_verilog_lib", ], ) benchmark_synth( - name = "zstd_dec_benchmark_synth", - synth_target = ":zstd_dec_synth_asap7", - tags = ["manual"], + name = "match_finder_benchmark_synth", + synth_target = ":match_finder_asap7", ) place_and_route( - name = "zstd_dec_place_and_route", + name = "match_finder_place_and_route", clock_period = "750", core_padding_microns = 2, - min_pin_distance = "0.5", + min_pin_distance = "0.09", placement_density = "0.30", stop_after_step = "global_routing", - synthesized_rtl = ":zstd_dec_synth_asap7", - tags = ["manual"], + synthesized_rtl = ":match_finder_asap7", target_die_utilization_percentage = "10", ) diff --git a/xls/modules/zstd/README.md b/xls/modules/zstd/README.md index eff9097fbe..e9a8109687 100644 --- a/xls/modules/zstd/README.md +++ b/xls/modules/zstd/README.md @@ -2,105 +2,204 @@ The ZSTD decoder decompresses the correctly formed ZSTD frames and blocks. It implements the [RFC 8878](https://www.rfc-editor.org/rfc/rfc8878.html) decompression algorithm. -Overview of the decoder architecture is presented on the diagram below. +An overview of the decoder architecture is presented in the diagram below. The decoder comprises: -* frame decoder, -* block dispatcher, -* 3 types of processing units: RAW, RLE, and compressed, -* command aggregator, -* history buffer, -* repacketizer. - -Incoming ZSTD frames are processed in the following order: -1. magic number is detected, -2. frame header is parsed, -3. ZSTD data blocks are being redirected to correct processing unit based on the block header, -4. processing unit results are aggregated in correct order into a stream -and routed to the history buffer, -5. data block outputs are assembled based on the history buffer contents and update history, -6. decoded data is processed by repacketizer in order to prepare the final output of the decoder, -7. (optional) calculated checksum is compared against frame checksum. +* Memory Readers +* Memory Writer, +* Control and Status Registers, +* Frame Header Decoder, +* Block Header Decoder, +* 3 types of processing units: RAW-, RLE-, and Compressed Block Decoders[^1], +* Command Aggregator, + +The Decoder interacts with the environment through a set of ports: +* Memory Interface (AXI) +* CSR Interface (AXI) +* Notify line + +The software controls the core through registers accessible through the `CSR Interface`. +The CSRs are used to configure the decoder and to start the decoding process. + +ZSTD frames to decode are placed in a memory that should be connected to +decoder's `Memory Interface`. + +Once the decoding process is started, the decoder: + +1. Reads the configuration from the CSRs, +2. Decodes the Frame Header, +3. Decodes the Block Header, +4. Decodes the Block Data with the correct processing unit picked based on the Block Type from the Block Header, +4. Aggregates the processing unit results in the correct order into a stream and routes it to the history buffer, +5. Assembles the data block outputs based on the history buffer contents and updates the history, +6. Prepares the final output of the decoder and writes it to the memory, +7. (Optional) Calculates checksum and compares it against the checksum read from the frame.[^2] ![](img/ZSTD_decoder.png) +## Registers description + +The ZSTD Decoder operation is based on the values stored in a set of CSRs accessible to the user through the AXI bus. +The registers are defined below: + +| Name | Address | Description | +| ---- | ------- | ----------- | +| Status | 0x0 | Keeps the code describing the current state of the ZSTD Decoder | +| Start | 0x8 | Writing `1` when the decoder is in the `IDLE` state starts the decoding process | +| Reset | 0x10 | Writing `1` will reset the decoder to the `IDLE` state | +| Input Buffer | 0x18 | Keeps the base address for the input buffer that is used for storing the frame to decode | +| Output Buffer | 0x20 | Keeps the base address for the output buffer, ZSTD Decoder will write the decoded frame into memory starting from this address. | + +### Status codes + +The following is a list of all available status codes that can be written in the `Status` register. + +| Name | Value | Description | +| ---- | ------- | ----------- | +| IDLE | 0 | Previous decoding finished successfully. The decoder waits for the configuration and writes to the `Start` register. | +| RUNNING | 1 | Decoding process is started | +| READ_CONFIG_OK |2 | Successfully read configuration from the CSRs | +| FRAME_HEADER_OK | 3 | Successfully decoded frame header | +| FRAME_HEADER_CORRUPTED | 4 | Frame header data is not valid | +| FRAME_HEADER_UNSUPPORTED_WINDOW_SIZE | 5 | The `WindowSize` parameter read from the frame header is not supported in the decoder | +| BLOCK_HEADER_OK | 6 | Successfully read the header of the Zstd data block | +| BLOCK_HEADER_CORRUPTED | 7 | Block type is `Reserved` | +| BLOCK_HEADER_MEMORY_ACCESS_ERROR | 8 | Failure in communication with the memory | +| RAW_BLOCK_OK | 9 | Successfully decoded raw data block | +| RAW_BLOCK_ERROR | 10 | Failure in communication with the memory | +| RLE_BLOCK_OK | 11 | Successfully decoded RLE data block | + +### Reset handling + +The expected behavior of the `Reset` CSR cannot be achieved solely in the DSLX code. +As of [cb2829ab](https://github.com/google/xls/commit/cb2829ab809c58f21d957a47e400456a8c8f8db1), the XLS toolchain does not support resetting the proc network on the DSLX level. +As a workaround for this issue, the `ZstdDec` proc defines a `reset` output channel that sends a pulse when there is a write to the `Reset` CSR. +The Verilog code that integrates the decoder in a target system must connect this output back to the standard `rst` input of the decoder. +If any external reset signal exists and is intended to be used with the decoder, it should be OR-ed with the `reset` channel output before connecting to the decoder's `rst` input. +Please refer to the diagram of the Verilog wrapper in the [Testing Methodology](#testing-methodology) chapter for example reset connection. + +## Controlling the decoder from the software + +The configuration done by the software must be carried out when the decoder is in the `IDLE` state. +It is the only time when the decoder will be able to take the configuration values from the CSRs and use those in the decoding process. + +The software should first read the `Status` register to confirm that the decoder is in the `IDLE` state. +In case it is not in the `IDLE` state, it is possible to reset the decoder by writing `1` to the `Reset` register. +Please note that this will stop ongoing decoding and all progress will be lost. + +Then, the software has to reserve the memory for the input buffer and write the frame to decode there. +The address of the buffer should be written into `Input Buffer` register so that the decoder will know where to look for the frame to decode. + +The next step is to reserve the memory space for the decoded frame where the Decoder will write the decompressed data. +The address to that buffer should be written to the `Output Buffer` register. + +Finally, it is possible to start the decoding process by writing `1` to the `Start` register. +This orders the Decoder to read the configuration CSRs and start reading and decoding data stored in the input buffer. +The Decoder transitions to the `RUNNING` state and then to other states that describe the status of the last operation finished in the decoder (see #status-codes for other possible status codes) which will be visible in the `Status` register. + +When the decoding process is finished the Decoder transitions back to the `IDLE` state and signals this on the `Notify` IRQ line. +The decoded data is stored under the address configured previously in the `Output Buffer` register. + +In case an error occurs during the decoding process it is also signaled on the `Notify` IRQ line and the error code is written to the `Status` CSR. + ## ZSTD decoder architecture ### Top level Proc -This state machine is responsible for receiving encoded ZSTD frames, buffering the input and passing it to decoder's internal components based on the state of the proc. -The states defined for the processing of ZSTD frame are as follows: +This state machine is responsible for controlling the operation of the whole decoder. +It uses the configuration data from the CSRs, connects all underlying modules and sends processing requests to those based on the state of the machine. +The states defined for the processing of the ZSTD frame are as follows: ```mermaid stateDiagram - direction LR + [*] --> IDLE + + IDLE --> READ_CONFIG: Start + IDLE --> mem_write_done + + READ_CONFIG --> DECODE_FRAME_HEADER + READ_CONFIG --> mem_write_done + + DECODE_FRAME_HEADER --> DECODE_BLOCK_HEADER + DECODE_FRAME_HEADER --> ERROR + DECODE_FRAME_HEADER --> mem_write_done - [*] --> DECODE_MAGIC_NUMBER + DECODE_BLOCK_HEADER --> DECODE_RAW_BLOCK + DECODE_BLOCK_HEADER --> DECODE_RLE_BLOCK + DECODE_BLOCK_HEADER --> DECODE_COMPRESED_BLOCK + DECODE_BLOCK_HEADER --> ERROR + DECODE_BLOCK_HEADER --> mem_write_done - DECODE_MAGIC_NUMBER --> DECODE_MAGIC_NUMBER: Not enough data - DECODE_MAGIC_NUMBER --> DECODE_FRAME_HEADER: Got magic number - DECODE_MAGIC_NUMBER --> ERROR: Corrupted + state if_block_last <> + DECODE_RAW_BLOCK --> ERROR + DECODE_RAW_BLOCK --> if_block_last + DECODE_RAW_BLOCK --> mem_write_done - DECODE_FRAME_HEADER --> DECODE_FRAME_HEADER: Not enough data - DECODE_FRAME_HEADER --> DECODE_BLOCK_HEADER: Header decoded - DECODE_FRAME_HEADER --> ERROR: Unsupported window size - DECODE_FRAME_HEADER --> ERROR: Corrupted + DECODE_RLE_BLOCK --> ERROR + DECODE_RLE_BLOCK --> if_block_last + DECODE_RLE_BLOCK --> mem_write_done - DECODE_BLOCK_HEADER --> DECODE_BLOCK_HEADER: Not enough data - DECODE_BLOCK_HEADER --> FEED_BLOCK_DECODER: Feed raw data - DECODE_BLOCK_HEADER --> FEED_BLOCK_DECODER: Feed RLE data - DECODE_BLOCK_HEADER --> FEED_BLOCK_DECODER: Feed compressed data - DECODE_BLOCK_HEADER --> ERROR: Corrupted + DECODE_COMPRESSED_BLOCK --> ERROR + DECODE_COMPRESSED_BLOCK --> if_block_last + DECODE_COMPRESSED_BLOCK --> mem_write_done - state if_decode_checksum <> - state if_block_done <> + if_block_last --> DECODE_BLOCK_HEADER: Not last block in the frame + if_block_last --> DECODE_CHECKSUM: Last block in the frame - FEED_BLOCK_DECODER --> if_decode_checksum: Is the checksum available? - if_decode_checksum --> DECODE_CHECKSUM: True - if_decode_checksum --> DECODE_MAGIC_NUMBER: False - FEED_BLOCK_DECODER --> if_block_done: Is the block decoding done? - if_block_done --> DECODE_BLOCK_HEADER: Decode next block - if_block_done --> FEED_BLOCK_DECODER: Continue feeding + DECODE_CHECKSUM --> mem_write_done - DECODE_CHECKSUM --> DECODE_MAGIC_NUMBER: Frame decoded + state mem_write_done <> + mem_write_done --> FINISH: Frame written to the memory - ERROR --> [*] + FINISH --> IDLE + ERROR --> IDLE ``` -After going through initial stages of decoding magic number and frame header, decoder starts the block division process. -It decodes block headers to calculate how many bytes must be sent to the block dispatcher and when the current frame's last data block is being processed. -Knowing that, it starts feeding the block decoder with data required for decoding current block. -After transmitting all data required for current block, it loops around to the block header decoding state and when next block header is not found it decodes checksum when it was requested in frame header or finishes ZSTD frame decoding and loops around to magic number decoding. - -### ZSTD frame header decoder -This part of the design starts with detecting the ZSTD magic number. -Then it parses and decodes the frame header's content and checks the header's correctness. -If the frame header has the checksum option enabled, this will enable `DECODE_CHECKSUM` stage at the end of the frame decoding where the frame's checksum will be computed and compared with the checksum embedded at the end of the frame stream. - -### Block dispatcher (demux) -At this stage, block headers are parsed and removed from the block data stream. -Based on parse values, it directs the block data stream to either RAW, RLE or compressed block sections. -For this task it uses an 8 byte native interface: a 64-bit data bus and a 64-bit length field that contains the number of correct bits on the data bus. -It also attaches a unique block ID value to each processed data block. -The IDs are sequential starting from 0 and are reset only after receiving and processing the current frame's last data block. - -### RAW -This proc passes the received data directly to its output channel. +After going through the initial stage of reading the configuration from the CSRs, the decoder sends the processing requests to the underlying parts of the decoder. +The processing requests contain the addresses in the memory where particular parts of the encoded ZSTD frames reside. +The decoder, based on responses from consecutive internal modules, calculates offsets from the base address that was written to `Input Buffer` CSR and forms the requests for the next internal modules, e.g.: for `BlockHeaderDecoder` or any of the processing units (`RawBlockDecoder`, `RleBlockDecoder`, `CompressedBlockDecoder`). + +Each of the internal modules waits for the processing request. +Once received, the module fetches the data from the memory starting from the address received in the processing request. +`MemReader` procs are used by those modules to communicate with the external memory through the AXI interface. +Internal modules decode the acquired parts of the frame and return responses with the results back to the top level proc. + +The processing units also output the decoded blocks of data through a stream-based interface to the `SequenceExecutor` proc. +This proc performs the last step of the decoding before the final output is sent out back to the memory under the address stored in the `Output Buffer` CSR by the `MemWriter` proc. +Once the decoding process is completed and the decoded frame is written back to the memory, the decoder sends the `Notify` signal and transitions back to the `IDLE` state. + +### Internal modules + +#### FrameHeaderDecoder +This proc receives requests with the address of the beginning of the ZSTD frame. +It then reads the frame data from the memory and starts parsing the frame header. +If the magic number is not detected or the frame header is invalid, the proc will send a response with an error code. +Otherwise, it will put the frame header into internal DSLX representation, calculate the length of the header and send those as a response with `OKAY` status. + +#### BlockHeaderDecoder +ZSTD block header size is always 3 bytes. +BlockHeaderDecoder always reads 4 bytes of data. +It extracts the information on block type, size and whether the block is the last one in the ZSTD frame and puts that data in the response. +The additional byte is also placed in the response as an optimization for the RleBlockDecoder. + +#### RawBlockDecoder +This proc passes the data read from the memory directly to its output channel. It preserves the block ID and attaches a tag, stating that the data contains literals and should be placed in the history buffer unchanged, to each data output. -### RLE decoder -This proc receives a tuple (s, N), where s is an 8 bit symbol and N is an accompanying `symbol_count`. +#### RleBlockDecoder +This proc receives a tuple (s, N), where s is an 8-bit symbol and N is an accompanying `symbol_count`. +It does not have to read the 8-bit symbol from the memory because `BlockHeaderDecoder` did that before and passed the symbol in the processing request to the `RleBlockDecoder`. The proc produces `N*s` repeats of the given symbol. This step preserves the block ID and attaches the literals tag to all its outputs. -### Compressed block decoder +#### CompressedBlockDecoder[^1] This part of the design is responsible for decoding the compressed data blocks. -It ingests the bytes stream, internally translates and interprets incoming data. +It ingests the bytes stream, and internally translates and interprets incoming data. Only this part of the design creates data chunks tagged both with `literals` and/or `copy`. This step preserves the block ID. -More in depth description can be found in [Compressed block decoder architecture](#compressed-block-decoder-architecture) paragraph of this doc. +More in-depth description can be found in [Compressed block decoder architecture](#compressed-block-decoder-architecture) paragraph of this doc. -### Commands aggregator (mux) -This stage takes the output from either RAW, RLE or Command constructor and sends it to the History buffer and command execution stage. -This stage orders streams based on the ID value assigned by the block dispatcher. +#### Commands aggregator (DecMux) +This stage takes the output from either RAW, RLE or CompressedBlockDecoder and sends it to the History buffer and command execution stage. +This stage orders streams based on the ID value assigned by the top level proc. It is expected that single base decoders (RAW, RLE, compressed block decoder) will be continuously transmitting a single ID to the point of sending the `last` signal which marks the last packet of currently decoded block. That ID can change only when mux receives the `last` signal or `last` and `last_block` signals. @@ -110,7 +209,7 @@ It continues to read that stream until the `last` signal is set, then it switche The command aggregator starts by waiting for `ID = 0`, after receiving the `last` signal it expects `ID = 1` and so on. Only when both `last` and `last_block` are set the command aggregator will wait for `ID = 0`. -### History buffer and command execution +#### History buffer and command execution (SequenceExecutor) This stage receives data which is tagged either `literals` or `copy`. This stage will show the following behavior, depending on the tag: * `literals` @@ -121,13 +220,13 @@ This stage will show the following behavior, depending on the tag: * Copy `copy_length` literals starting `offset _length` from the newest in history buffer to the decoder's output, * Copy `copy_length` literals starting `offset _length` from the newest in history buffer to the history buffer as the newest. -### Compressed block decoder architecture +### Compressed block decoder architecture[^1] This part of the design is responsible for processing the compressed blocks up to the `literals`/`copy` command sequence. -This sequence is then processed by the history buffer to generate expected data output. -Overview of the architecture is provided on the diagram below. -The architecture is split into 2 paths: literals path and sequence path. +This sequence is then processed by the history buffer to generate the expected data output. +An overview of the architecture is provided in the diagram below. +The architecture is split into 2 paths: the literals path and the sequence path. Architecture is split into 3 paths: literals path, FSE encoded Huffman trees and sequence path. -Literals path uses Hufman trees to decode some types of compressed blocks: Compressed and Treeless blocks. +Literals path uses Huffman trees to decode some types of compressed blocks: Compressed and Treeless blocks. ![](img/ZSTD_compressed_block_decoder.png) @@ -144,11 +243,11 @@ When `literals length` is greater than 0, it will send a request to the literals Then based on the offset and copy length it either creates a match command using the provided offset and match lengths, or uses repeated offset and updates the repeated offset memory. Formed commands are sent to the Commands aggregator (mux). -### Literals path architecture +#### Literals path architecture ![](img/ZSTD_compressed_block_literals_decoder.png) -#### Literals decoder dispatcher +##### Literals decoder dispatcher This proc parses and consumes the literals section header. Based on the received values it passes the remaining bytes to RAW/RLE/Huffman tree/Huffman code decoders. It also controls the 4 stream operation mode [4-stream mode in RFC](https://www.rfc-editor.org/rfc/rfc8878.html#name-jump_table). @@ -156,59 +255,59 @@ It also controls the 4 stream operation mode [4-stream mode in RFC](https://www. All packets sent to the Huffman bitstream buffer will be tagged either `in_progress` or `finished`. If the compressed literals use the 4 streams encoding, the dispatcher will send the `finished` tag 4 times, each time a fully compressed stream is sent to the bitstream buffer. -#### RAW Literals +##### RAW Literals This stage simply passes the incoming bytes as literals to the literals buffer. -#### RLE Literals +##### RLE Literals This stage works similarly to the [RLE stage](#rle-decoder) for RLE data blocks. -#### Huffman bitstream buffer +##### Huffman bitstream buffer This stage takes data from the literals decoder dispatcher and stores it in the buffer memory. Once the data with the `finished` tag set is received, this stage sends a tuple containing (start, end) positions for the current bitstream to the Huffman codes decoder. This stage receives a response from the Huffman codes decoder when decoding is done and all bits got processed. Upon receiving this message, the buffer will reclaim free space. -#### Huffman codes decoder +##### Huffman codes decoder This stage receives bitstream pointers from the Huffman bitstream buffer and Huffman tree configuration from the Huffman tree builder. It accesses the bitstream buffers memory to retrieve bitstream data in reversed byte order and runs it through an array of comparators to decode Huffman code to correct literals values. -#### Literals buffer +##### Literals buffer This stage receives data either from RAW, RLE or Huffman decoder and stores it. Upon receiving the literals copy command from the Command Constructor for `N` number of bytes, it provides a reply with `N` literals. -### FSE Huffman decoder architecture +#### FSE Huffman decoder architecture ![](img/ZSTD_compressed_block_Huffman_decoder.png) -#### Huffman tree decoder dispatcher +##### Huffman tree decoder dispatcher This stage parses and consumes the Huffman tree description header. Based on the value of the Huffman descriptor header, it passes the tree description to the FSE decoder or to direct weight extraction. -#### FSE weight decoder +##### FSE weight decoder This stage performs multiple functions. 1. It decodes and builds the FSE distribution table. 2. It stores all remaining bitstream data. 3. After receiving the last byte, it translates the bitstream to Huffman weights using 2 interleaved FSE streams. -#### Direct weight decoder +##### Direct weight decoder This stage takes the incoming bytes and translates them to the stream of Huffman tree weights. The first byte of the transfer defines the number of symbols to be decoded. -#### Weight aggregator +##### Weight aggregator This stage receives tree weights either from the FSE decoder or the direct decoder and transfers them to Huffman tree builder. This stage also resolves the number of bits of the final weight and the max number of bits required in the tree representation. This stage will emit the weights and number of symbols of the same weight before the current symbol for all possible byte values. -#### Huffman tree builder +##### Huffman tree builder This stage takes `max_number_of_bits` (maximal length of Huffman code) as the first value, then the number of symbols with lower weight for each possible weight (11 bytes), followed by a tuple (number of preceding symbols with the same weight, symbol's_weight). It's expected to receive weights for all possible byte values in the correct order. Based on this information, this stage will configure the Huffman codes decoder. -### Sequence path architecture +#### Sequence path architecture ![](img/ZSTD_compressed_block_sequence_decoder.png) -#### Sequence Header parser and dispatcher +##### Sequence Header parser and dispatcher This stage parses and consumes `Sequences_Section_Header`. Based on the parsed data, it redirects FSE description to the FSE table decoder and triggers Literals FSE, Offset FSE or Match FSE decoder to reconfigure its values based on the FSE table decoder. After parsing the FSE tables, this stage buffers bitstream and starts sending bytes, starting from the last one received as per ZSTD format. @@ -216,37 +315,24 @@ Bytes are sent to all decoders at the same time. This stage monitors and triggers sequence decoding phases starting from initialization, followed by decode and state advance. FSE decoders send each other the number of bits they read. -#### Literals FSE decoder +##### Literals FSE decoder This stage reconfigures its FSE table when triggered from [sequence header parse and dispatcher](#sequence-header-parser-and-dispatcher). It initializes its state as the first FSE decoder. In the decode phase, this stage is the last one to decode extra raw bits from the bitstream, and the number of ingested bits is transmitted to all other decoders. This stage is the first stage to get a new FSE state from the bitstream, and it transmits the number of bits it used. -#### Offset FSE decoder +##### Offset FSE decoder This stage reconfigures its FSE table when triggered from [sequence header parse and dispatcher](#sequence-header-parser-and-dispatcher). It initializes its state as the second FSE decoder. In the decode phase, this stage is the first one to decode extra raw bits from bitstream, and the number of ingested bits is transmitted to all other decoders. This stage is the last decoder to update its FSE state after the decode phase, and it transmits the number of used bits to other decoders. -#### Match FSE decoder +##### Match FSE decoder This stage reconfigures its FSE table when triggered from [sequence header parse and dispatcher](#sequence-header-parser-and-dispatcher). It initializes its state as the last FSE decoder. In the decode phase, this stage is the second one to decode extra raw bits from the bitstream, and the number of ingested bits is transmitted to all other decoders. This stage is the second stage to update its state after the decode phase, and the number of used bits is sent to all other decoders. -### Repacketizer -This proc is used at the end of the processing flow in the ZSTD decoder. -It gathers the output of `SequenceExecutor` proc and processes it to form final output packets of the ZSTD decoder. -Input packets coming from the `SequenceExecutor` consist of: - -* data - bit vector of constant length -* length - field describing how many bits in bit vector are valid -* last - flag which marks the last packet in currently decoded ZSTD frame. - -It is not guaranteed that all bits in data bit vectors in packets received from `SequenceExecutor` are valid as those can include padding bits which were added in previous decoding steps and now have to be removed. -Repacketizer buffers input packets, removes the padding bits and forms new packets with all bits of the bit vector valid, meaning that all bits are decoded data. -Newly formed packets are then sent out to the output of the whole ZSTD decoder. - ## Testing methodology Testing of the `ZSTD decoder` is carried out on two levels: @@ -255,14 +341,36 @@ Testing of the `ZSTD decoder` is carried out on two levels: * Integrated decoder Each component of the decoder is tested individually in DSLX tests. -Testing on the DSLX level allows the creation of small test cases that test for both positive and negative outcomes of a given part of the design. +Testing on the DSLX level allows the creation of small test cases that test for positive outcomes of a given part of the design. When need be, those test cases can be also modified by the user to better understand how the component operates. -Tests of the integrated ZSTD decoder are written in C++. +Tests of the integrated ZSTD decoder are carried out on DSLX and Verilog levels. The objective of those is to verify the functionality of the decoder as a whole. Testing setup for the ZSTD decoder is based on comparing the simulated decoding results against the decoding of the reference library. Currently, due to the restrictions from the ZSTD frame generator, it is possible to test only the positive cases (decoding valid ZSTD frames). +Verilog tests are written in Python as [cocotb](https://github.com/cocotb/cocotb) testbench. + +ZstdDecoder's main communication interfaces are the AXI buses. +Due to the way XLS handles the codegen of DSLX channels that model the AXI channels, the particular ports of the AXI channels are not represented correctly. +This enforces the introduction of a Verilog wrapper that maps the ports generated by XLS into proper AXI ports (see AXI peripherals [README](memory/README.md) for more information). +Additionally, the wrapper is used to mux multiple AXI interfaces from `Memory Readers` and `Memory Writer` into a single outside-facing AXI interface (`Memory Interface`) that can be connected to the external memory. +The mux is implemented by a third-party [AXI Crossbar](https://github.com/alexforencich/verilog-axi). + +![](img/ZSTD_decoder_wrapper.png) + +Cocotb testbench interacts with the decoder with the help of a [cocotbext-axi](https://github.com/alexforencich/cocotbext-axi) extension that provides AXI bus models, drivers, monitors and RAM model accessible through AXI interface. +Cocotb AXI Master is connected to the decoder's `CSR Interface` and is used to simulate the software's interaction with the decoder. + +The Basic test case for the ZstdDecoder is composed of the following steps: + +1. The testbench generates a ZSTD frame using the [decodecorpus](https://github.com/facebook/zstd/blob/dev/tests/decodecorpus.c) utility from the [zstd reference library](https://github.com/facebook/zstd). +2. The encoded frame is placed in an AXI RAM model that is connected to the decoder's `Memory Interface`. +3. The encoded frame is decoded with the zstd reference library and the results are represented in the decoder's output format as the expected data from the simulation. +4. AXI Master performs a series of writes to the ZstdDecoder CSRs to configure it and start the decoding process. +5. Testbench waits for the signal on the `Notify` channel and checks the output of the decoder stored in the memory against the expected output data. +6. Test case succeeds once `Notify` is asserted, all expected data is received and the decoder lands in `IDLE` state with status `OKAY` in the `Status` CSR. + ### Failure points #### User-facing decoder errors @@ -274,19 +382,8 @@ The design will fail the tests under the following conditions: * Simulation encounters `assert!()` or `fail!()` statements * The decoding result from the simulation has a different size than the results from the reference library * The decoding result from the simulation has different contents than the results from the reference library -* Caveats: - * Timeout occurred while waiting for a valid `Magic Number` to start the decoding process - * Other timeouts occurring while waiting on channel operations (To be fixed) Currently, all mentioned conditions lead to an eventual test failure. -Most of those cases are handled properly while some are yet to be reworked to finish faster or to provide more information about the error. -For example, in case of transitioning to the `ERROR` state, the test will timeout on channel operations waiting to read from the decoder output. -In case of waiting for a valid `Magic Number`, the decoder will transition to an `ERROR` state without registering the correct `Magic Number` on the input channel which will lead to a similar timeout. - -Those cases should be handled in a way that allows for early failure of the test. -It can be done through a Proc parameter enabled for tests that change the behavior of the logic, e.g. launching `assert!()` when the decoder enters the `ERROR` state. -Another idea is to use a special output channel for signaling internal states and errors to monitor the decoder for the errors encountered during decoding. -For example, in an invalid `Magic Number`, the test case should expect a certain type of error reported on this channel at the very beginning of the simulation. #### Failures in ZSTD Decoder components @@ -295,24 +392,20 @@ However, the majority of the errors require modification of the deeper parts of Because of that, it is better to rely on DSLX tests for the individual components where inputs for the test cases are smaller, easier to understand and modify when needed. The components of the ZSTD decoder can fail on `assert!()` and `fail!()` statements or propagate specific error states to the Top Level Proc and cause it to transition to the `ERROR` state. +Upon entering the `ERROR` state, the decoder will write a specific error code to the `Status` CSR and send a `Notify` signal to the output. +The interacting software can then read the code from the register and properly handle the error. + The following enumeration will describe how to trigger each possible ZSTD Decoder error. -The `ERROR` state can be encountered under the following conditions when running Top Level Proc C++ tests but also in DSLX tests for the specific components: -* Corrupted data on the `Magic Number` decoding stage +The `ERROR` state can be encountered under the following conditions when running Top Level Proc Verilog tests but also in DSLX tests for the specific components: +* Corrupted data on the frame header decoding stage * Provide data for the decoding with the first 4 bytes not being the valid `Magic Number` (0xFD2FB528) -* Corrupted data during frame header decoding * Set the `Reserved bit` in the frame header descriptor -* Unsupported Window Size during frame header decoding * Set `Window Size` in frame header to value greater than `max window size` calculated from current `WINDOW_LOG_MAX` (by default in Top Level Proc tests `Window Size` must be greater than `0x78000000` to trigger the error) * Corrupted data during Block Header decoding * Set the `Block Type` of any block in the ZSTD frame to `RESERVED` The `assert!()` or `fail!()` will occur in: -* Buffer - * Add data to the buffer with `buffer_append()` when it is already full or unable to fit the whole length of the data - * Fetch data from the buffer with `buffer_pop()` when it is empty or have not enough data -* DecoderDemux - * Receive more than one `raw` or `compressed` block in a single `BlockDataPacket` * RawBlockDecoder * Receive `BlockDataPacket` with `ID` different than the previous packet which did not have the `last` flag set * DecoderMux @@ -321,34 +414,26 @@ The `assert!()` or `fail!()` will occur in: * SequenceExecutor * Receive `SequenceExecutorPacket` with `msg_type==SEQUENCE` and `content` field with value: `0` -There are also several `impossible cases` covered by `assert!()` and `fail!()`: +There are also several `impossible cases` covered by `fail!()`. +Those are mostly enforced by the type checker for the `match` expressions to cover unreachable cases. +This is done for example in: * Frame header decoder - * `Window Descriptor` does not exist after checking that it is available in the frame header - * `Frame Content Size` does not exist after checking that it is available in the frame header - * `Dictionary ID Flag` has an illegal value - * `Frame Content Size Flag` has an illegal value -* DecoderDemux - * Data packet has a different `Block Type` than `RAW`, `RLE` or `COMPRESSED` * SequenceExecutor - * Proc transitions to `SEQUENCE_READ` state after receiving `SequenceExecutorPacket` with `msg_type` different than `SEQUENCE` or the message was invalid -* Top Level Proc - * Block header type is different than `RAW`, `RLE`, `COMPRESSED` - * There is not enough data to feed the `BlockDecoder`, even though the previous check indicated a valid amount of data in the buffer ### Testing against [libzstd](https://github.com/facebook/zstd) Design is verified by comparing decoding results to the reference library `libzstd`. ZSTD frames used for testing are generated with [decodecorpus](https://github.com/facebook/zstd/blob/dev/tests/decodecorpus.c) utility. -The generated frame is then decoded with `libzstd`. +The generated frame is then decoded with `libzstd` and with simulated `ZstdDecoder`. #### Positive test cases If the results of decoding with `libzstd` are valid, the test runs the same encoded frame through the simulation of DSLX design. The output of the simulation is gathered and compared with the results of `libzstd` in terms of its size and contents. -Encoded ZSTD frame is generated with the function `GenerateFrame(int seed, BlockType btype)` from [data_generator](https://github.com/antmicro/xls/blob/52186-zstd-top/xls/modules/zstd/data_generator.cc) library. -This function takes as arguments the seed for the generator and enum which codes the type of blocks that should be generated in a given frame. +Encoded ZSTD frame is generated with the function `GenerateFrame(seed, btype, output_path)` from [data_generator](https://github.com/antmicro/xls/blob/main/xls/modules/zstd/cocotb/data_generator.py) library. +This function takes as arguments the seed for the generator, an enum that codes the type of blocks that should be generated in a given frame and the output path to write the generated frame into a file. The available block types are: * RAW @@ -357,12 +442,12 @@ The available block types are: * RANDOM The function returns a vector of bytes representing a valid encoded ZSTD frame. -Such generated frame can be passed to `ParseAndCompareWithZstd(std::vector frame)` which is responsible for decoding the frame, running simulation and comparing the results. +Such generated frame can be passed to DSLX and cocotb testbenches to be decoded in the simulation and compared against the results from the reference library. -Tests are available in the `zstd_dec_test.cc` file and can be launched with the following Bazel command: +Verilog tests are available in the `zstd_dec_cocotb_test.py` file and can be launched with the following Bazel command: ``` -bazel test //xls/modules/zstd:zstd_dec_cc_test +bazel run -c opt -- //xls/modules/zstd:zstd_dec_cocotb_test --logtostderr ``` #### Negative test cases @@ -373,11 +458,6 @@ Because of that, it is not possible to efficiently provide valuable negative tes The alternatives for writing negative tests include: * Generating a well-known valid ZSTD frame from a specific generator seed and then tweaking the raw bits in this frame to trigger the error response from the decoder -* Using [FuzzTest](https://github.com/google/fuzztest) to create multiple randomized test cases for the decoder and then compare `libzstd` decoder failure with `ZSTD Decoder` failure. - -### Known Limitations - -* **[WIP]** Bugs in the current flow cause failures in some of the test cases of decoding ZSTD frame with RLE block types -* **[WIP]** Compressed block type is not supported -* Checksum is not being verified +[^1]: `CompressedBlockDecoder` is to be added in follow-up PRs. +[^2]: Checksum verification is currently unsupported. diff --git a/xls/modules/zstd/aligned_parallel_ram.x b/xls/modules/zstd/aligned_parallel_ram.x new file mode 100644 index 0000000000..88f45d34ba --- /dev/null +++ b/xls/modules/zstd/aligned_parallel_ram.x @@ -0,0 +1,988 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// this file contains implementation of parallel RAMs with aligned access +// write requests' address should be a multiple of data width +// read requests` address dont have to be a multiple of data width + + +import std; +import xls.modules.zstd.common as common; +import xls.examples.ram; + +// Configurable RAM parameters, RAM_NUM has to be a power of 2 +pub const RAM_NUM = u32:8; +pub const RAM_NUM_W = std::clog2(RAM_NUM); + +pub struct AlignedParallelRamReadReq { + addr: uN[ADDR_W], +} + +pub struct AlignedParallelRamReadResp { + data: uN[DATA_W], +} + +pub struct AlignedParallelRamWriteReq { + addr: uN[ADDR_W], + data: uN[DATA_W], +} + +pub struct AlignedParallelRamWriteResp {} + +enum AlignedParallelRamReadRespHandlerFSM : u1 { + IDLE = 0, + READ_RESP = 1, +} + +struct AlignedParallelRamReadRespHandlerState { + fsm: AlignedParallelRamReadRespHandlerFSM, + ram_offset: uN[RAM_NUM_W], + resp_recv: bool[RAM_NUM], + resp_data: uN[RAM_DATA_W][RAM_NUM], +} + +struct AlignedParallelRamReadRespHandlerCtrl { + ram_offset: uN[RAM_NUM_W], +} + +proc AlignedParallelRamReadRespHandler< + DATA_W: u32, + RAM_DATA_W: u32 = {DATA_W / RAM_NUM}, +> { + type ReadResp = AlignedParallelRamReadResp; + type RamReadResp = ram::ReadResp; + + type FSM = AlignedParallelRamReadRespHandlerFSM; + type Ctrl = AlignedParallelRamReadRespHandlerCtrl; + type State = AlignedParallelRamReadRespHandlerState; + + ctrl_r: chan in; + + read_resp_s: chan out; + + ram_read_resp_0_r: chan in; + ram_read_resp_1_r: chan in; + ram_read_resp_2_r: chan in; + ram_read_resp_3_r: chan in; + ram_read_resp_4_r: chan in; + ram_read_resp_5_r: chan in; + ram_read_resp_6_r: chan in; + ram_read_resp_7_r: chan in; + + config ( + ctrl_r: chan in, + read_resp_s: chan out, + ram_read_resp_0_r: chan in, + ram_read_resp_1_r: chan in, + ram_read_resp_2_r: chan in, + ram_read_resp_3_r: chan in, + ram_read_resp_4_r: chan in, + ram_read_resp_5_r: chan in, + ram_read_resp_6_r: chan in, + ram_read_resp_7_r: chan in, + ) { + ( + ctrl_r, + read_resp_s, + ram_read_resp_0_r, + ram_read_resp_1_r, + ram_read_resp_2_r, + ram_read_resp_3_r, + ram_read_resp_4_r, + ram_read_resp_5_r, + ram_read_resp_6_r, + ram_read_resp_7_r, + ) + } + + init { zero!() } + + next (state: State) { + // receive ctrl + let (_, ctrl, ctrl_valid) = recv_if_non_blocking(join(), ctrl_r, state.fsm == FSM::IDLE, zero!()); + + let state = if ctrl_valid { + State { + fsm: FSM::READ_RESP, + ram_offset: ctrl.ram_offset, + ..state + } + } else { + state + }; + + // receive response from each RAM + let (_, ram_read_resp_0, ram_read_resp_0_valid) = recv_if_non_blocking( + join(), ram_read_resp_0_r, !state.resp_recv[u32:0] && state.fsm == FSM::READ_RESP, zero!() + ); + let (_, ram_read_resp_1, ram_read_resp_1_valid) = recv_if_non_blocking( + join(), ram_read_resp_1_r, !state.resp_recv[u32:1] && state.fsm == FSM::READ_RESP, zero!() + ); + let (_, ram_read_resp_2, ram_read_resp_2_valid) = recv_if_non_blocking( + join(), ram_read_resp_2_r, !state.resp_recv[u32:2] && state.fsm == FSM::READ_RESP, zero!() + ); + let (_, ram_read_resp_3, ram_read_resp_3_valid) = recv_if_non_blocking( + join(), ram_read_resp_3_r, !state.resp_recv[u32:3] && state.fsm == FSM::READ_RESP, zero!() + ); + let (_, ram_read_resp_4, ram_read_resp_4_valid) = recv_if_non_blocking( + join(), ram_read_resp_4_r, !state.resp_recv[u32:4] && state.fsm == FSM::READ_RESP, zero!() + ); + let (_, ram_read_resp_5, ram_read_resp_5_valid) = recv_if_non_blocking( + join(), ram_read_resp_5_r, !state.resp_recv[u32:5] && state.fsm == FSM::READ_RESP, zero!() + ); + let (_, ram_read_resp_6, ram_read_resp_6_valid) = recv_if_non_blocking( + join(), ram_read_resp_6_r, !state.resp_recv[u32:6] && state.fsm == FSM::READ_RESP, zero!() + ); + let (_, ram_read_resp_7, ram_read_resp_7_valid) = recv_if_non_blocking( + join(), ram_read_resp_7_r, !state.resp_recv[u32:7] && state.fsm == FSM::READ_RESP, zero!() + ); + + let ram_read_resp_valid = [ + ram_read_resp_0_valid, + ram_read_resp_1_valid, + ram_read_resp_2_valid, + ram_read_resp_3_valid, + ram_read_resp_4_valid, + ram_read_resp_5_valid, + ram_read_resp_6_valid, + ram_read_resp_7_valid, + ]; + + let ram_read_resp = [ + ram_read_resp_0, + ram_read_resp_1, + ram_read_resp_2, + ram_read_resp_3, + ram_read_resp_4, + ram_read_resp_5, + ram_read_resp_6, + ram_read_resp_7, + ]; + + let state = for (i, state) in range(u32:0, RAM_NUM) { + if ram_read_resp_valid[i] { + State { + resp_recv: update(state.resp_recv, i, true), + resp_data: update(state.resp_data, i, ram_read_resp[i].data), + ..state + } + } else { + state + } + }(state); + + // check if all data is received + let all_received = for (i, all_received) in range(u32:0, RAM_NUM) { + all_received & state.resp_recv[i] + }(true); + + // concatenate data + let concat_data = ( + state.resp_data[state.ram_offset + uN[RAM_NUM_W]:7] ++ + state.resp_data[state.ram_offset + uN[RAM_NUM_W]:6] ++ + state.resp_data[state.ram_offset + uN[RAM_NUM_W]:5] ++ + state.resp_data[state.ram_offset + uN[RAM_NUM_W]:4] ++ + state.resp_data[state.ram_offset + uN[RAM_NUM_W]:3] ++ + state.resp_data[state.ram_offset + uN[RAM_NUM_W]:2] ++ + state.resp_data[state.ram_offset + uN[RAM_NUM_W]:1] ++ + state.resp_data[state.ram_offset + uN[RAM_NUM_W]:0] + ); + + // send response + send_if(join(), read_resp_s, all_received, ReadResp { + data: concat_data + }); + + // reset state + let state = if all_received { + zero!() + } else { + state + }; + + state + } +} + +struct AlignedParallelRamWriteRespHandlerState { + resp_recv: bool[RAM_NUM], +} + +proc AlignedParallelRamWriteRespHandler { + type WriteResp = AlignedParallelRamWriteResp; + type RamWriteResp = ram::WriteResp; + + type State = AlignedParallelRamWriteRespHandlerState; + + write_resp_s: chan out; + + ram_write_resp_0_r: chan in; + ram_write_resp_1_r: chan in; + ram_write_resp_2_r: chan in; + ram_write_resp_3_r: chan in; + ram_write_resp_4_r: chan in; + ram_write_resp_5_r: chan in; + ram_write_resp_6_r: chan in; + ram_write_resp_7_r: chan in; + + config ( + write_resp_s: chan out, + ram_write_resp_0_r: chan in, + ram_write_resp_1_r: chan in, + ram_write_resp_2_r: chan in, + ram_write_resp_3_r: chan in, + ram_write_resp_4_r: chan in, + ram_write_resp_5_r: chan in, + ram_write_resp_6_r: chan in, + ram_write_resp_7_r: chan in, + ) { + ( + write_resp_s, + ram_write_resp_0_r, + ram_write_resp_1_r, + ram_write_resp_2_r, + ram_write_resp_3_r, + ram_write_resp_4_r, + ram_write_resp_5_r, + ram_write_resp_6_r, + ram_write_resp_7_r, + ) + } + + init { zero!() } + + next (state: State) { + // receive response from each RAM + let (_, _, ram_read_resp_0_valid) = recv_if_non_blocking( + join(), ram_write_resp_0_r, !state.resp_recv[u32:0], zero!() + ); + let (_, _, ram_read_resp_1_valid) = recv_if_non_blocking( + join(), ram_write_resp_1_r, !state.resp_recv[u32:1], zero!() + ); + let (_, _, ram_read_resp_2_valid) = recv_if_non_blocking( + join(), ram_write_resp_2_r, !state.resp_recv[u32:2], zero!() + ); + let (_, _, ram_read_resp_3_valid) = recv_if_non_blocking( + join(), ram_write_resp_3_r, !state.resp_recv[u32:3], zero!() + ); + let (_, _, ram_read_resp_4_valid) = recv_if_non_blocking( + join(), ram_write_resp_4_r, !state.resp_recv[u32:4], zero!() + ); + let (_, _, ram_read_resp_5_valid) = recv_if_non_blocking( + join(), ram_write_resp_5_r, !state.resp_recv[u32:5], zero!() + ); + let (_, _, ram_read_resp_6_valid) = recv_if_non_blocking( + join(), ram_write_resp_6_r, !state.resp_recv[u32:6], zero!() + ); + let (_, _, ram_read_resp_7_valid) = recv_if_non_blocking( + join(), ram_write_resp_7_r, !state.resp_recv[u32:7], zero!() + ); + + let ram_read_resp_valid = [ + ram_read_resp_0_valid, + ram_read_resp_1_valid, + ram_read_resp_2_valid, + ram_read_resp_3_valid, + ram_read_resp_4_valid, + ram_read_resp_5_valid, + ram_read_resp_6_valid, + ram_read_resp_7_valid, + ]; + + let state = for (i, state) in range(u32:0, RAM_NUM) { + if ram_read_resp_valid[i] { + State { + resp_recv: update(state.resp_recv, i, true), + } + } else { + state + } + }(state); + + // check if all data is received + let all_received = for (i, all_received) in range(u32:0, RAM_NUM) { + all_received & state.resp_recv[i] + }(true); + + // send response + send_if(join(), write_resp_s, all_received, WriteResp {}); + + // reset state + let state = if all_received { + zero!() + } else { + state + }; + + state + } + +} + + +pub proc AlignedParallelRam< + SIZE: u32, + DATA_W: u32, + ADDR_W: u32 = {std::clog2(SIZE)}, + RAM_SIZE: u32 = {SIZE / RAM_NUM}, + RAM_DATA_W: u32 = {DATA_W / RAM_NUM}, + RAM_ADDR_W: u32 = {std::clog2(RAM_SIZE)}, + RAM_PARTITION_SIZE: u32 = {RAM_DATA_W}, + RAM_NUM_PARTITIONS: u32 = {ram::num_partitions(RAM_PARTITION_SIZE, RAM_DATA_W)}, +> { + type ReadReq = AlignedParallelRamReadReq; + type ReadResp = AlignedParallelRamReadResp; + type WriteReq = AlignedParallelRamWriteReq; + type WriteResp = AlignedParallelRamWriteResp; + + type RamReadReq = ram::ReadReq; + type RamReadResp = ram::ReadResp; + type RamWriteReq = ram::WriteReq; + type RamWriteResp = ram::WriteResp; + + read_req_r: chan in; + write_req_r: chan in; + + read_resp_handler_ctrl_s: chan out; + + // RAMs read interfaces + ram_read_req_0_s: chan out; + ram_read_req_1_s: chan out; + ram_read_req_2_s: chan out; + ram_read_req_3_s: chan out; + ram_read_req_4_s: chan out; + ram_read_req_5_s: chan out; + ram_read_req_6_s: chan out; + ram_read_req_7_s: chan out; + + // RAMs write interfaces + ram_write_req_0_s: chan out; + ram_write_req_1_s: chan out; + ram_write_req_2_s: chan out; + ram_write_req_3_s: chan out; + ram_write_req_4_s: chan out; + ram_write_req_5_s: chan out; + ram_write_req_6_s: chan out; + ram_write_req_7_s: chan out; + + config ( + read_req_r: chan in, + read_resp_s: chan out, + write_req_r: chan in, + write_resp_s: chan out, + ram_read_req_s: chan[RAM_NUM] out, + ram_read_resp_r: chan[RAM_NUM] in, + ram_write_req_s: chan[RAM_NUM] out, + ram_write_resp_r: chan[RAM_NUM] in, + ) { + let (read_resp_handler_ctrl_s, read_resp_handler_ctrl_r) = + chan("read_resp_handler_ctrl"); + + spawn AlignedParallelRamReadRespHandler( + read_resp_handler_ctrl_r, + read_resp_s, + ram_read_resp_r[0], + ram_read_resp_r[1], + ram_read_resp_r[2], + ram_read_resp_r[3], + ram_read_resp_r[4], + ram_read_resp_r[5], + ram_read_resp_r[6], + ram_read_resp_r[7], + ); + + spawn AlignedParallelRamWriteRespHandler ( + write_resp_s, + ram_write_resp_r[0], + ram_write_resp_r[1], + ram_write_resp_r[2], + ram_write_resp_r[3], + ram_write_resp_r[4], + ram_write_resp_r[5], + ram_write_resp_r[6], + ram_write_resp_r[7], + ); + + ( + read_req_r, + write_req_r, + read_resp_handler_ctrl_s, + ram_read_req_s[0], + ram_read_req_s[1], + ram_read_req_s[2], + ram_read_req_s[3], + ram_read_req_s[4], + ram_read_req_s[5], + ram_read_req_s[6], + ram_read_req_s[7], + ram_write_req_s[0], + ram_write_req_s[1], + ram_write_req_s[2], + ram_write_req_s[3], + ram_write_req_s[4], + ram_write_req_s[5], + ram_write_req_s[6], + ram_write_req_s[7], + ) + } + + init { } + + next (state: ()) { + // handle read request + let (tok_read, read_req, read_req_valid) = recv_non_blocking(join(), read_req_r, zero!()); + + // send ctrl to read resp hanlder + let read_resp_handler_ctrl = AlignedParallelRamReadRespHandlerCtrl { + ram_offset: read_req.addr as uN[RAM_NUM_W], + }; + send_if(tok_read, read_resp_handler_ctrl_s, read_req_valid, read_resp_handler_ctrl); + + // send requests to each RAM + let ram_read_req = for (i, ram_read_req) in range(u32:0, RAM_NUM) { + let offset = if read_req.addr as uN[RAM_NUM_W] > i as uN[RAM_NUM_W] { + uN[RAM_ADDR_W]:1 + } else { + uN[RAM_ADDR_W]:0 + }; + update(ram_read_req, i, RamReadReq { + addr: (read_req.addr >> std::clog2(RAM_NUM)) as uN[RAM_ADDR_W] + offset, + mask: !uN[RAM_NUM_PARTITIONS]:0, + }) + }(zero!()); + send_if(tok_read, ram_read_req_0_s, read_req_valid, ram_read_req[0]); + send_if(tok_read, ram_read_req_1_s, read_req_valid, ram_read_req[1]); + send_if(tok_read, ram_read_req_2_s, read_req_valid, ram_read_req[2]); + send_if(tok_read, ram_read_req_3_s, read_req_valid, ram_read_req[3]); + send_if(tok_read, ram_read_req_4_s, read_req_valid, ram_read_req[4]); + send_if(tok_read, ram_read_req_5_s, read_req_valid, ram_read_req[5]); + send_if(tok_read, ram_read_req_6_s, read_req_valid, ram_read_req[6]); + send_if(tok_read, ram_read_req_7_s, read_req_valid, ram_read_req[7]); + + // handle write request + let (tok_write, write_req, write_req_valid) = recv_non_blocking(join(), write_req_r, zero!()); + + // send requests to each RAM + let ram_write_req = for (i, ram_write_req) in range(u32:0, RAM_NUM) { + update(ram_write_req, i, RamWriteReq { + addr: (write_req.addr >> std::clog2(RAM_NUM)) as uN[RAM_ADDR_W], + data: (write_req.data >> (RAM_DATA_W * i)) as uN[RAM_DATA_W], + mask: !uN[RAM_NUM_PARTITIONS]:0, + }) + }(zero!()); + send_if(tok_read, ram_write_req_0_s, write_req_valid, ram_write_req[0]); + send_if(tok_read, ram_write_req_1_s, write_req_valid, ram_write_req[1]); + send_if(tok_read, ram_write_req_2_s, write_req_valid, ram_write_req[2]); + send_if(tok_read, ram_write_req_3_s, write_req_valid, ram_write_req[3]); + send_if(tok_read, ram_write_req_4_s, write_req_valid, ram_write_req[4]); + send_if(tok_read, ram_write_req_5_s, write_req_valid, ram_write_req[5]); + send_if(tok_read, ram_write_req_6_s, write_req_valid, ram_write_req[6]); + send_if(tok_read, ram_write_req_7_s, write_req_valid, ram_write_req[7]); + } +} + + +const INST_SIZE = u32:1024; +const INST_DATA_W = u32:64; +const INST_ADDR_W = std::clog2(INST_SIZE); +const INST_RAM_SIZE = INST_SIZE / RAM_NUM; +const INST_RAM_DATA_W = {INST_DATA_W / RAM_NUM}; +const INST_RAM_ADDR_W = {std::clog2(INST_RAM_SIZE)}; +const INST_RAM_PARTITION_SIZE = {INST_RAM_DATA_W}; +const INST_RAM_NUM_PARTITIONS = {ram::num_partitions(INST_RAM_PARTITION_SIZE, INST_RAM_DATA_W)}; + +proc AlignedParallelRamInst { + type InstReadReq = AlignedParallelRamReadReq; + type InstReadResp = AlignedParallelRamReadResp; + type InstWriteReq = AlignedParallelRamWriteReq; + type InstWriteResp = AlignedParallelRamWriteResp; + + type InstRamReadReq = ram::ReadReq; + type InstRamReadResp = ram::ReadResp; + type InstRamWriteReq = ram::WriteReq; + type InstRamWriteResp = ram::WriteResp; + + config ( + read_req_r: chan in, + read_resp_s: chan out, + write_req_r: chan in, + write_resp_s: chan out, + ram_read_req_s: chan[RAM_NUM] out, + ram_read_resp_r: chan[RAM_NUM] in, + ram_write_req_s: chan[RAM_NUM] out, + ram_write_resp_r: chan[RAM_NUM] in, + ) { + spawn AlignedParallelRam( + read_req_r, read_resp_s, + write_req_r, write_resp_s, + ram_read_req_s, ram_read_resp_r, + ram_write_req_s, ram_write_resp_r, + ); + } + + init { } + + next (state: ()) { } +} + + +const TEST_SIZE = u32:1024; +const TEST_DATA_W = u32:64; +const TEST_ADDR_W = std::clog2(TEST_SIZE); +const TEST_RAM_SIZE = TEST_SIZE / RAM_NUM; +const TEST_RAM_DATA_W = {TEST_DATA_W / RAM_NUM}; +const TEST_RAM_ADDR_W = {std::clog2(TEST_RAM_SIZE)}; +const TEST_RAM_PARTITION_SIZE = {TEST_RAM_DATA_W}; +const TEST_RAM_NUM_PARTITIONS = {ram::num_partitions(TEST_RAM_PARTITION_SIZE, TEST_RAM_DATA_W)}; + +const TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR = ram::SimultaneousReadWriteBehavior::READ_BEFORE_WRITE; +const TEST_RAM_INITIALIZED = true; + +type TestReadReq = AlignedParallelRamReadReq; +type TestReadResp = AlignedParallelRamReadResp; +type TestWriteReq = AlignedParallelRamWriteReq; +type TestWriteResp = AlignedParallelRamWriteResp; + +type TestRamReadReq = ram::ReadReq; +type TestRamReadResp = ram::ReadResp; +type TestRamWriteReq = ram::WriteReq; +type TestRamWriteResp = ram::WriteResp; + +struct TestData { + addr: uN[TEST_ADDR_W], + data: uN[TEST_DATA_W], +} + +const TEST_DATA = TestData[64]:[ + TestData {addr: uN[TEST_ADDR_W]:0x0c8, data: uN[TEST_DATA_W]:0x698dbd57f739d8ce}, + TestData {addr: uN[TEST_ADDR_W]:0x248, data: uN[TEST_DATA_W]:0x4cf6fc9b695676ad}, + TestData {addr: uN[TEST_ADDR_W]:0x3a0, data: uN[TEST_DATA_W]:0x5da52c3bd7b39603}, + TestData {addr: uN[TEST_ADDR_W]:0x208, data: uN[TEST_DATA_W]:0x5afa80c1c45a5bd2}, + TestData {addr: uN[TEST_ADDR_W]:0x068, data: uN[TEST_DATA_W]:0x27befcb367237e3f}, + TestData {addr: uN[TEST_ADDR_W]:0x358, data: uN[TEST_DATA_W]:0xa477d4887cec7fc2}, + TestData {addr: uN[TEST_ADDR_W]:0x328, data: uN[TEST_DATA_W]:0x38ecf19cf314ba5c}, + TestData {addr: uN[TEST_ADDR_W]:0x258, data: uN[TEST_DATA_W]:0x97a504cfa39e6750}, + TestData {addr: uN[TEST_ADDR_W]:0x1b8, data: uN[TEST_DATA_W]:0x2fa75c1effecf687}, + TestData {addr: uN[TEST_ADDR_W]:0x2e8, data: uN[TEST_DATA_W]:0xb1315d70b63629d8}, + TestData {addr: uN[TEST_ADDR_W]:0x2f0, data: uN[TEST_DATA_W]:0x44c025ebee513c44}, + TestData {addr: uN[TEST_ADDR_W]:0x250, data: uN[TEST_DATA_W]:0x295250fa0d795902}, + TestData {addr: uN[TEST_ADDR_W]:0x2a0, data: uN[TEST_DATA_W]:0x1f76bb3cf745235e}, + TestData {addr: uN[TEST_ADDR_W]:0x168, data: uN[TEST_DATA_W]:0x0d06b1d161037460}, + TestData {addr: uN[TEST_ADDR_W]:0x010, data: uN[TEST_DATA_W]:0x0c7b320db86382df}, + TestData {addr: uN[TEST_ADDR_W]:0x178, data: uN[TEST_DATA_W]:0x547e5874fdae8c09}, + TestData {addr: uN[TEST_ADDR_W]:0x0f8, data: uN[TEST_DATA_W]:0xc75ca52d83d65bba}, + TestData {addr: uN[TEST_ADDR_W]:0x0d0, data: uN[TEST_DATA_W]:0x3c10031e89ac070a}, + TestData {addr: uN[TEST_ADDR_W]:0x3f8, data: uN[TEST_DATA_W]:0xe881ce7c3e4515b4}, + TestData {addr: uN[TEST_ADDR_W]:0x378, data: uN[TEST_DATA_W]:0xa10c92b84419eb3d}, + TestData {addr: uN[TEST_ADDR_W]:0x018, data: uN[TEST_DATA_W]:0x7b9537f92c4958e0}, + TestData {addr: uN[TEST_ADDR_W]:0x350, data: uN[TEST_DATA_W]:0x38a1a5e8a7206e81}, + TestData {addr: uN[TEST_ADDR_W]:0x030, data: uN[TEST_DATA_W]:0xda2cf6b0b380862c}, + TestData {addr: uN[TEST_ADDR_W]:0x248, data: uN[TEST_DATA_W]:0xa56492b3fb19c8b8}, + TestData {addr: uN[TEST_ADDR_W]:0x258, data: uN[TEST_DATA_W]:0x9cbfccbf72c7948b}, + TestData {addr: uN[TEST_ADDR_W]:0x008, data: uN[TEST_DATA_W]:0x7fb6d361a608db56}, + TestData {addr: uN[TEST_ADDR_W]:0x108, data: uN[TEST_DATA_W]:0xba2aef614c7c5c1e}, + TestData {addr: uN[TEST_ADDR_W]:0x090, data: uN[TEST_DATA_W]:0xe7a5ab55633078fa}, + TestData {addr: uN[TEST_ADDR_W]:0x0c0, data: uN[TEST_DATA_W]:0xb5132e7e378f3f5b}, + TestData {addr: uN[TEST_ADDR_W]:0x198, data: uN[TEST_DATA_W]:0xeac9fe191bfd8b31}, + TestData {addr: uN[TEST_ADDR_W]:0x218, data: uN[TEST_DATA_W]:0x82ad45d959f8dbec}, + TestData {addr: uN[TEST_ADDR_W]:0x070, data: uN[TEST_DATA_W]:0x4d4e255058d00ccb}, + TestData {addr: uN[TEST_ADDR_W]:0x3a0, data: uN[TEST_DATA_W]:0x2a69306cf695b2f5}, + TestData {addr: uN[TEST_ADDR_W]:0x1e0, data: uN[TEST_DATA_W]:0x571a30f8cd940e39}, + TestData {addr: uN[TEST_ADDR_W]:0x300, data: uN[TEST_DATA_W]:0x7069a4c406076fd9}, + TestData {addr: uN[TEST_ADDR_W]:0x2a8, data: uN[TEST_DATA_W]:0x9af366c878230764}, + TestData {addr: uN[TEST_ADDR_W]:0x328, data: uN[TEST_DATA_W]:0x1e6bc1e2df3c8a7b}, + TestData {addr: uN[TEST_ADDR_W]:0x298, data: uN[TEST_DATA_W]:0x1ff9be4f810cd87a}, + TestData {addr: uN[TEST_ADDR_W]:0x250, data: uN[TEST_DATA_W]:0x9ad30cee350aebfa}, + TestData {addr: uN[TEST_ADDR_W]:0x090, data: uN[TEST_DATA_W]:0x31fca7f91dfcafb5}, + TestData {addr: uN[TEST_ADDR_W]:0x3b8, data: uN[TEST_DATA_W]:0xe434deef583c3cd1}, + TestData {addr: uN[TEST_ADDR_W]:0x3c0, data: uN[TEST_DATA_W]:0x4170c371a2025f27}, + TestData {addr: uN[TEST_ADDR_W]:0x0e8, data: uN[TEST_DATA_W]:0x616754a100d9decc}, + TestData {addr: uN[TEST_ADDR_W]:0x1f0, data: uN[TEST_DATA_W]:0x8d93fa35edab37b7}, + TestData {addr: uN[TEST_ADDR_W]:0x208, data: uN[TEST_DATA_W]:0x6582012a83ffcec3}, + TestData {addr: uN[TEST_ADDR_W]:0x3d0, data: uN[TEST_DATA_W]:0x6c66a69e87eac130}, + TestData {addr: uN[TEST_ADDR_W]:0x248, data: uN[TEST_DATA_W]:0xbfd5e4e261bbd7e3}, + TestData {addr: uN[TEST_ADDR_W]:0x058, data: uN[TEST_DATA_W]:0x2f8ba1fd6a8b6ee9}, + TestData {addr: uN[TEST_ADDR_W]:0x1a0, data: uN[TEST_DATA_W]:0xef9ab2936ef6833e}, + TestData {addr: uN[TEST_ADDR_W]:0x380, data: uN[TEST_DATA_W]:0x279130ba7b5ced6f}, + TestData {addr: uN[TEST_ADDR_W]:0x170, data: uN[TEST_DATA_W]:0xc1977f6a2153db09}, + TestData {addr: uN[TEST_ADDR_W]:0x3d8, data: uN[TEST_DATA_W]:0xd4ea85571e440cef}, + TestData {addr: uN[TEST_ADDR_W]:0x360, data: uN[TEST_DATA_W]:0x9bc5756ab3328603}, + TestData {addr: uN[TEST_ADDR_W]:0x2f8, data: uN[TEST_DATA_W]:0x14217d1804170f39}, + TestData {addr: uN[TEST_ADDR_W]:0x268, data: uN[TEST_DATA_W]:0x0098755165e9ae81}, + TestData {addr: uN[TEST_ADDR_W]:0x050, data: uN[TEST_DATA_W]:0x3ee0b48789cc94e0}, + TestData {addr: uN[TEST_ADDR_W]:0x398, data: uN[TEST_DATA_W]:0x9ff7fbc9906d3d63}, + TestData {addr: uN[TEST_ADDR_W]:0x068, data: uN[TEST_DATA_W]:0x507bc61f805b0e68}, + TestData {addr: uN[TEST_ADDR_W]:0x350, data: uN[TEST_DATA_W]:0x77802819dc14663a}, + TestData {addr: uN[TEST_ADDR_W]:0x168, data: uN[TEST_DATA_W]:0xd8ca0711ca37bfa9}, + TestData {addr: uN[TEST_ADDR_W]:0x068, data: uN[TEST_DATA_W]:0x30464e3d2630b6de}, + TestData {addr: uN[TEST_ADDR_W]:0x360, data: uN[TEST_DATA_W]:0xdbac58596c50f62f}, + TestData {addr: uN[TEST_ADDR_W]:0x2e0, data: uN[TEST_DATA_W]:0x9992cfd966824669}, + TestData {addr: uN[TEST_ADDR_W]:0x2e0, data: uN[TEST_DATA_W]:0x1a4a65b0257c223b}, +]; + +#[test_proc] +proc AlignedParallelRam_test_aligned_read { + terminator: chan out; + read_req_s: chan out; + read_resp_r: chan in; + write_req_s: chan out; + write_resp_r: chan in; + + config (terminator: chan out) { + let (read_req_s, read_req_r) = chan("read_req"); + let (read_resp_s, read_resp_r) = chan("read_resp"); + let (write_req_s, write_req_r) = chan("write_req"); + let (write_resp_s, write_resp_r) = chan("write_resp"); + + let (ram_read_req_s, ram_read_req_r) = chan[RAM_NUM]("ram_read_req"); + let (ram_read_resp_s, ram_read_resp_r) = chan[RAM_NUM]("ram_read_resp"); + let (ram_write_req_s, ram_write_req_r) = chan[RAM_NUM]("ram_write_req"); + let (ram_write_resp_s, ram_write_resp_r) = chan[RAM_NUM]("ram_write_resp"); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[0], ram_read_resp_s[0], ram_write_req_r[0], ram_write_resp_s[0], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[1], ram_read_resp_s[1], ram_write_req_r[1], ram_write_resp_s[1], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[2], ram_read_resp_s[2], ram_write_req_r[2], ram_write_resp_s[2], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[3], ram_read_resp_s[3], ram_write_req_r[3], ram_write_resp_s[3], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[4], ram_read_resp_s[4], ram_write_req_r[4], ram_write_resp_s[4], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[5], ram_read_resp_s[5], ram_write_req_r[5], ram_write_resp_s[5], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[6], ram_read_resp_s[6], ram_write_req_r[6], ram_write_resp_s[6], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[7], ram_read_resp_s[7], ram_write_req_r[7], ram_write_resp_s[7], + ); + + spawn AlignedParallelRam( + read_req_r, read_resp_s, + write_req_r, write_resp_s, + ram_read_req_s, ram_read_resp_r, + ram_write_req_s, ram_write_resp_r, + ); + + ( + terminator, + read_req_s, + read_resp_r, + write_req_s, + write_resp_r, + ) + } + + init { } + + next (state: ()) { + let tok = join(); + + let tok = for (i, tok):(u32, token) in range(u32:0, array_size(TEST_DATA)) { + let test_data = TEST_DATA[i]; + + let write_req = TestWriteReq { + addr: test_data.addr, + data: test_data.data, + }; + let tok = send(tok, write_req_s, write_req); + trace_fmt!("Sent #{} write request {:#x}", i + u32:1, write_req); + + let (tok, write_resp) = recv(tok, write_resp_r); + trace_fmt!("Received #{} write response {:#x}", i + u32:1, write_resp); + + let read_req = TestReadReq { + addr: test_data.addr, + }; + let tok = send(tok, read_req_s, read_req); + trace_fmt!("Sent #{} read request {:#x}", i + u32:1, read_req); + + let (tok, read_resp) = recv(tok, read_resp_r); + trace_fmt!("Received #{} read response {:#x}", i + u32:1, read_resp); + + assert_eq(test_data.data, read_resp.data); + + tok + }(tok); + + send(tok, terminator, true); + } +} + +const TEST_RAM_CONTENT = uN[TEST_DATA_W][64]:[ + uN[TEST_DATA_W]:0x2122337ed367496b, uN[TEST_DATA_W]:0x33de22e4291ecb66, + uN[TEST_DATA_W]:0x62052eccbde0009d, uN[TEST_DATA_W]:0xfa179c402e7b5f47, + uN[TEST_DATA_W]:0x118fa2c81d1230e9, uN[TEST_DATA_W]:0xe48ee076b41120c0, + uN[TEST_DATA_W]:0xa33d467d80575e5b, uN[TEST_DATA_W]:0x61213ebe00890570, + uN[TEST_DATA_W]:0xe9210eae2507442f, uN[TEST_DATA_W]:0x4b8c721627c19c44, + uN[TEST_DATA_W]:0x55e768d2e4586bba, uN[TEST_DATA_W]:0x2fc234017ac1deb5, + uN[TEST_DATA_W]:0xdc6afd5db30446aa, uN[TEST_DATA_W]:0xe91512402a2f68ab, + uN[TEST_DATA_W]:0x13fd96b93aef2c85, uN[TEST_DATA_W]:0x980b4054b9f66fc2, + uN[TEST_DATA_W]:0xa8bf09e77757ca28, uN[TEST_DATA_W]:0x94a67ff04004de7b, + uN[TEST_DATA_W]:0x6d1f3071a446b0c3, uN[TEST_DATA_W]:0x01605527a50fdecf, + uN[TEST_DATA_W]:0xd839258508c3efd1, uN[TEST_DATA_W]:0x1207a4d0de5c9724, + uN[TEST_DATA_W]:0xef39682f0810f43c, uN[TEST_DATA_W]:0x4781977bc26ce834, + uN[TEST_DATA_W]:0x0805a350ed25812f, uN[TEST_DATA_W]:0x8ad82cb67bf49cef, + uN[TEST_DATA_W]:0x2fd11ff78f85f169, uN[TEST_DATA_W]:0xd624be58457eab2a, + uN[TEST_DATA_W]:0x873e4be71afa1355, uN[TEST_DATA_W]:0x6e0e9f264151b531, + uN[TEST_DATA_W]:0xa69015c537b4da78, uN[TEST_DATA_W]:0x0879638aa8045ad9, + uN[TEST_DATA_W]:0x30dd170b7bf89cbf, uN[TEST_DATA_W]:0xcbfeb8219960a267, + uN[TEST_DATA_W]:0x9f6fcd2d4a4ba9f2, uN[TEST_DATA_W]:0xdf0ead33b3e55ac3, + uN[TEST_DATA_W]:0x64a24c19037f850b, uN[TEST_DATA_W]:0x4dcbb4de2d3aba5a, + uN[TEST_DATA_W]:0xa40749b2be1450b6, uN[TEST_DATA_W]:0x99bed65d2d28d1f6, + uN[TEST_DATA_W]:0xbe8d35f27bb892b4, uN[TEST_DATA_W]:0x23315a3a70110048, + uN[TEST_DATA_W]:0x68b0e22cb8885787, uN[TEST_DATA_W]:0xcac1a152d43dae98, + uN[TEST_DATA_W]:0x1fb5cec8c64ad46a, uN[TEST_DATA_W]:0xcbe25f0d2b21e9a1, + uN[TEST_DATA_W]:0x46e161fd5f490ae7, uN[TEST_DATA_W]:0xff2dd0e7a120d222, + uN[TEST_DATA_W]:0xa764165b6d09fb90, uN[TEST_DATA_W]:0xee4d9484b63f6a66, + uN[TEST_DATA_W]:0x204d6d789e9fe377, uN[TEST_DATA_W]:0x9ad53311a1a95bcf, + uN[TEST_DATA_W]:0x63d497f105d8661f, uN[TEST_DATA_W]:0x40e7a242cc26483c, + uN[TEST_DATA_W]:0x5a82a7265627cab1, uN[TEST_DATA_W]:0x42de42323222a24b, + uN[TEST_DATA_W]:0xdede8c218f3ef36a, uN[TEST_DATA_W]:0xec86b8e8734da0c7, + uN[TEST_DATA_W]:0x9b209d6959c36b79, uN[TEST_DATA_W]:0x829c158fd6678675, + uN[TEST_DATA_W]:0x5c59a4845b68a509, uN[TEST_DATA_W]:0xcc9e851a38b01d04, + uN[TEST_DATA_W]:0x5e15f41bd09acd33, uN[TEST_DATA_W]:0x953425686ce51623, +]; + +const TEST_READ_ADDR = uN[TEST_ADDR_W][128]:[ + uN[TEST_ADDR_W]:0x0d0, uN[TEST_ADDR_W]:0x01c, uN[TEST_ADDR_W]:0x094, uN[TEST_ADDR_W]:0x153, + uN[TEST_ADDR_W]:0x1cb, uN[TEST_ADDR_W]:0x14f, uN[TEST_ADDR_W]:0x021, uN[TEST_ADDR_W]:0x1f5, + uN[TEST_ADDR_W]:0x155, uN[TEST_ADDR_W]:0x0db, uN[TEST_ADDR_W]:0x070, uN[TEST_ADDR_W]:0x13a, + uN[TEST_ADDR_W]:0x0bf, uN[TEST_ADDR_W]:0x16b, uN[TEST_ADDR_W]:0x143, uN[TEST_ADDR_W]:0x0b4, + uN[TEST_ADDR_W]:0x1f4, uN[TEST_ADDR_W]:0x17f, uN[TEST_ADDR_W]:0x096, uN[TEST_ADDR_W]:0x03a, + uN[TEST_ADDR_W]:0x0ec, uN[TEST_ADDR_W]:0x030, uN[TEST_ADDR_W]:0x0e1, uN[TEST_ADDR_W]:0x1e7, + uN[TEST_ADDR_W]:0x006, uN[TEST_ADDR_W]:0x088, uN[TEST_ADDR_W]:0x1e9, uN[TEST_ADDR_W]:0x16f, + uN[TEST_ADDR_W]:0x152, uN[TEST_ADDR_W]:0x1a2, uN[TEST_ADDR_W]:0x0ac, uN[TEST_ADDR_W]:0x0d3, + uN[TEST_ADDR_W]:0x0d5, uN[TEST_ADDR_W]:0x107, uN[TEST_ADDR_W]:0x121, uN[TEST_ADDR_W]:0x01a, + uN[TEST_ADDR_W]:0x1c2, uN[TEST_ADDR_W]:0x117, uN[TEST_ADDR_W]:0x0e9, uN[TEST_ADDR_W]:0x0ac, + uN[TEST_ADDR_W]:0x16e, uN[TEST_ADDR_W]:0x105, uN[TEST_ADDR_W]:0x01e, uN[TEST_ADDR_W]:0x186, + uN[TEST_ADDR_W]:0x1bb, uN[TEST_ADDR_W]:0x05b, uN[TEST_ADDR_W]:0x07a, uN[TEST_ADDR_W]:0x1d3, + uN[TEST_ADDR_W]:0x120, uN[TEST_ADDR_W]:0x142, uN[TEST_ADDR_W]:0x0ee, uN[TEST_ADDR_W]:0x083, + uN[TEST_ADDR_W]:0x1ce, uN[TEST_ADDR_W]:0x016, uN[TEST_ADDR_W]:0x041, uN[TEST_ADDR_W]:0x040, + uN[TEST_ADDR_W]:0x073, uN[TEST_ADDR_W]:0x197, uN[TEST_ADDR_W]:0x1d1, uN[TEST_ADDR_W]:0x074, + uN[TEST_ADDR_W]:0x087, uN[TEST_ADDR_W]:0x168, uN[TEST_ADDR_W]:0x1f7, uN[TEST_ADDR_W]:0x19e, + uN[TEST_ADDR_W]:0x06f, uN[TEST_ADDR_W]:0x0c9, uN[TEST_ADDR_W]:0x102, uN[TEST_ADDR_W]:0x077, + uN[TEST_ADDR_W]:0x0ff, uN[TEST_ADDR_W]:0x1ac, uN[TEST_ADDR_W]:0x02c, uN[TEST_ADDR_W]:0x116, + uN[TEST_ADDR_W]:0x04d, uN[TEST_ADDR_W]:0x16b, uN[TEST_ADDR_W]:0x14c, uN[TEST_ADDR_W]:0x173, + uN[TEST_ADDR_W]:0x055, uN[TEST_ADDR_W]:0x1e1, uN[TEST_ADDR_W]:0x028, uN[TEST_ADDR_W]:0x103, + uN[TEST_ADDR_W]:0x01c, uN[TEST_ADDR_W]:0x168, uN[TEST_ADDR_W]:0x096, uN[TEST_ADDR_W]:0x15b, + uN[TEST_ADDR_W]:0x1aa, uN[TEST_ADDR_W]:0x010, uN[TEST_ADDR_W]:0x08c, uN[TEST_ADDR_W]:0x083, + uN[TEST_ADDR_W]:0x014, uN[TEST_ADDR_W]:0x013, uN[TEST_ADDR_W]:0x00d, uN[TEST_ADDR_W]:0x1eb, + uN[TEST_ADDR_W]:0x09d, uN[TEST_ADDR_W]:0x079, uN[TEST_ADDR_W]:0x146, uN[TEST_ADDR_W]:0x191, + uN[TEST_ADDR_W]:0x070, uN[TEST_ADDR_W]:0x1bc, uN[TEST_ADDR_W]:0x037, uN[TEST_ADDR_W]:0x130, + uN[TEST_ADDR_W]:0x0d8, uN[TEST_ADDR_W]:0x0d1, uN[TEST_ADDR_W]:0x136, uN[TEST_ADDR_W]:0x05b, + uN[TEST_ADDR_W]:0x1f3, uN[TEST_ADDR_W]:0x036, uN[TEST_ADDR_W]:0x0db, uN[TEST_ADDR_W]:0x149, + uN[TEST_ADDR_W]:0x11e, uN[TEST_ADDR_W]:0x1c2, uN[TEST_ADDR_W]:0x0a3, uN[TEST_ADDR_W]:0x061, + uN[TEST_ADDR_W]:0x0eb, uN[TEST_ADDR_W]:0x131, uN[TEST_ADDR_W]:0x04a, uN[TEST_ADDR_W]:0x0ab, + uN[TEST_ADDR_W]:0x0d5, uN[TEST_ADDR_W]:0x083, uN[TEST_ADDR_W]:0x1cb, uN[TEST_ADDR_W]:0x03f, + uN[TEST_ADDR_W]:0x02d, uN[TEST_ADDR_W]:0x14d, uN[TEST_ADDR_W]:0x120, uN[TEST_ADDR_W]:0x194, + uN[TEST_ADDR_W]:0x062, uN[TEST_ADDR_W]:0x182, uN[TEST_ADDR_W]:0x124, uN[TEST_ADDR_W]:0x06d, +]; + +#[test_proc] +proc AlignedParallelRam_test_non_aligned_read { + terminator: chan out; + read_req_s: chan out; + read_resp_r: chan in; + write_req_s: chan out; + write_resp_r: chan in; + + config (terminator: chan out) { + let (read_req_s, read_req_r) = chan("read_req"); + let (read_resp_s, read_resp_r) = chan("read_resp"); + let (write_req_s, write_req_r) = chan("write_req"); + let (write_resp_s, write_resp_r) = chan("write_resp"); + + let (ram_read_req_s, ram_read_req_r) = chan[RAM_NUM]("ram_read_req"); + let (ram_read_resp_s, ram_read_resp_r) = chan[RAM_NUM]("ram_read_resp"); + let (ram_write_req_s, ram_write_req_r) = chan[RAM_NUM]("ram_write_req"); + let (ram_write_resp_s, ram_write_resp_r) = chan[RAM_NUM]("ram_write_resp"); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[0], ram_read_resp_s[0], ram_write_req_r[0], ram_write_resp_s[0], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[1], ram_read_resp_s[1], ram_write_req_r[1], ram_write_resp_s[1], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[2], ram_read_resp_s[2], ram_write_req_r[2], ram_write_resp_s[2], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[3], ram_read_resp_s[3], ram_write_req_r[3], ram_write_resp_s[3], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[4], ram_read_resp_s[4], ram_write_req_r[4], ram_write_resp_s[4], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[5], ram_read_resp_s[5], ram_write_req_r[5], ram_write_resp_s[5], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[6], ram_read_resp_s[6], ram_write_req_r[6], ram_write_resp_s[6], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[7], ram_read_resp_s[7], ram_write_req_r[7], ram_write_resp_s[7], + ); + + spawn AlignedParallelRam( + read_req_r, read_resp_s, + write_req_r, write_resp_s, + ram_read_req_s, ram_read_resp_r, + ram_write_req_s, ram_write_resp_r, + ); + + ( + terminator, + read_req_s, + read_resp_r, + write_req_s, + write_resp_r, + ) + } + + init { } + + next (state: ()) { + let tok = join(); + + // write RAM content + let tok = for (i, tok):(u32, token) in range(u32:0, array_size(TEST_RAM_CONTENT)) { + let test_data = TEST_RAM_CONTENT[i]; + + let write_req = TestWriteReq { + addr: (i * TEST_RAM_DATA_W) as uN[TEST_ADDR_W], + data: test_data, + }; + let tok = send(tok, write_req_s, write_req); + trace_fmt!("Sent #{} write request {:#x}", i + u32:1, write_req); + + let (tok, write_resp) = recv(tok, write_resp_r); + trace_fmt!("Received #{} write response {:#x}", i + u32:1, write_resp); + + let read_req = TestReadReq { + addr: (i * TEST_RAM_DATA_W) as uN[TEST_ADDR_W], + }; + let tok = send(tok, read_req_s, read_req); + trace_fmt!("Sent #{} read request {:#x}", i + u32:1, read_req); + + let (tok, read_resp) = recv(tok, read_resp_r); + trace_fmt!("Received #{} read response {:#x}", i + u32:1, read_resp); + + assert_eq(test_data, read_resp.data); + + tok + }(tok); + + // read unaligned data + let tok = for (i, tok):(u32, token) in range(u32:0, array_size(TEST_READ_ADDR)) { + let test_read_addr = TEST_READ_ADDR[i]; + + let read_req = TestReadReq { + addr: test_read_addr, + }; + + let tok = send(tok, read_req_s, read_req); + trace_fmt!("Sent #{} read request {:#x}", i + u32:1, read_req); + + let (tok, read_resp) = recv(tok, read_resp_r); + trace_fmt!("Received #{} read response {:#x}", i + u32:1, read_resp); + + let ram_offset = test_read_addr as uN[RAM_NUM_W]; + let expected_data = if ram_offset == uN[RAM_NUM_W]:0 { + TEST_RAM_CONTENT[test_read_addr >> RAM_NUM_W] + } else { + let data_0 = TEST_RAM_CONTENT[test_read_addr >> RAM_NUM_W]; + let data_1 = TEST_RAM_CONTENT[(test_read_addr >> RAM_NUM_W) + uN[TEST_ADDR_W]:1]; + ( + (data_0 >> (TEST_RAM_DATA_W * ram_offset as u32)) | + (data_1 << (TEST_RAM_DATA_W * (RAM_NUM - ram_offset as u32))) + ) + }; + + assert_eq(expected_data, read_resp.data); + + tok + }(tok); + + send(tok, terminator, true); + } +} diff --git a/xls/modules/zstd/axi_csr_accessor.x b/xls/modules/zstd/axi_csr_accessor.x new file mode 100644 index 0000000000..860b36fe36 --- /dev/null +++ b/xls/modules/zstd/axi_csr_accessor.x @@ -0,0 +1,385 @@ +// Copyright 2023-2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains implementation of a proc that handles CSRs. It provides +// an AXI interface for reading and writing the values as well as separate +// request/response channels. Apart from that it has an output channel which +// notifies aboud changes made to CSRs. + +import std; +import xls.modules.zstd.memory.axi; +import xls.modules.zstd.csr_config; + + +struct AxiCsrAccessorState { + w_id: uN[ID_W], + w_addr: uN[ADDR_W], + r_id: uN[ID_W], + r_addr: uN[ADDR_W], +} + +pub proc AxiCsrAccessor< + ID_W: u32, ADDR_W: u32, DATA_W: u32, REGS_N: u32, + DATA_W_DIV8: u32 = { DATA_W / u32:8 }, + LOG2_REGS_N: u32 = { std::clog2(REGS_N) }, + LOG2_DATA_W_DIV8: u32 = { std::clog2(DATA_W / u32:8) }, +> { + type AxiAw = axi::AxiAw; + type AxiW = axi::AxiW; + type AxiB = axi::AxiB; + type AxiAr = axi::AxiAr; + type AxiR = axi::AxiR; + + type RdReq = csr_config::CsrRdReq; + type RdResp = csr_config::CsrRdResp; + type WrReq = csr_config::CsrWrReq; + type WrResp = csr_config::CsrWrResp; + + type State = AxiCsrAccessorState; + type Data = uN[DATA_W]; + type RegN = uN[LOG2_REGS_N]; + + axi_aw_r: chan in; + axi_w_r: chan in; + axi_b_s: chan out; + axi_ar_r: chan in; + axi_r_s: chan out; + + csr_rd_req_s: chan out; + csr_rd_resp_r: chan in; + csr_wr_req_s: chan out; + csr_wr_resp_r: chan in; + + config ( + axi_aw_r: chan in, + axi_w_r: chan in, + axi_b_s: chan out, + axi_ar_r: chan in, + axi_r_s: chan out, + + csr_rd_req_s: chan out, + csr_rd_resp_r: chan in, + csr_wr_req_s: chan out, + csr_wr_resp_r: chan in, + ) { + ( + axi_aw_r, axi_w_r, axi_b_s, + axi_ar_r, axi_r_s, + csr_rd_req_s, csr_rd_resp_r, + csr_wr_req_s, csr_wr_resp_r, + ) + } + + init { + zero!() + } + + next (state: State) { + let tok_0 = join(); + // write to CSR via AXI + let (tok_1_1, axi_aw, axi_aw_valid) = recv_non_blocking(tok_0, axi_aw_r, AxiAw {id: state.w_id, addr: state.w_addr, ..zero!()}); + // validate axi aw + assert!(!(axi_aw_valid && axi_aw.addr as u32 >= (REGS_N << LOG2_DATA_W_DIV8)), "invalid_aw_addr"); + assert!(!(axi_aw_valid && axi_aw.len != u8:0), "invalid_aw_len"); + + let (tok_1_2, axi_w, axi_w_valid) = recv_non_blocking(tok_1_1, axi_w_r, zero!()); + + // Send WriteRequest to CSRs + let data_w = if axi_w_valid { + trace_fmt!("[CSR ACCESSOR] received csr write at {:#x}", axi_w); + + let (w_data, _, _) = for (i, (w_data, strb, mask)): (u32, (uN[DATA_W], uN[DATA_W_DIV8], uN[DATA_W])) in range(u32:0, DATA_W_DIV8) { + let w_data = if axi_w.strb as u1 { + w_data | (axi_w.data & mask) + } else { + w_data + }; + ( + w_data, + strb >> u32:1, + mask << u32:8, + ) + }((uN[DATA_W]:0, axi_w.strb, uN[DATA_W]:0xFF)); + w_data + } else { + uN[DATA_W]:0 + }; + + let wr_req = WrReq { + csr: (axi_aw.addr >> LOG2_DATA_W_DIV8) as uN[LOG2_REGS_N], + value: data_w + }; + + let tok_1_3 = send_if(tok_1_2, csr_wr_req_s, axi_w_valid, wr_req); + + let (tok_2_1, csr_wr_resp, csr_wr_resp_valid) = recv_non_blocking(tok_0, csr_wr_resp_r, zero!()); + let axi_write_resp = AxiB { + resp: axi::AxiWriteResp::OKAY, + id: axi_aw.id, + }; + let tok_2_2 = send_if(tok_2_1, axi_b_s, csr_wr_resp_valid, axi_write_resp); + + + // Send ReadRequest to CSRs + let (tok_3_1, axi_ar, axi_ar_valid) = recv_non_blocking(tok_0, axi_ar_r, AxiAr {id: state.r_id, addr: state.r_addr, ..zero!()}); + // validate ar bundle + assert!(!(axi_ar_valid && axi_ar.addr as u32 >= (REGS_N << LOG2_DATA_W_DIV8)), "invalid_ar_addr"); + assert!(!(axi_ar_valid && axi_ar.len != u8:0), "invalid_ar_len"); + let rd_req = RdReq { + csr: (axi_ar.addr >> LOG2_DATA_W_DIV8) as uN[LOG2_REGS_N], + }; + let tok_3_2 = send_if(tok_3_1, csr_rd_req_s, axi_ar_valid, rd_req); + + let (tok_4_1, csr_rd_resp, csr_rd_resp_valid) = recv_non_blocking(tok_0, csr_rd_resp_r, zero!()); + + let axi_read_resp = AxiR { + id: axi_ar.id, + data: csr_rd_resp.value, + resp: axi::AxiReadResp::OKAY, + last: true, + }; + let tok_4_2 = send_if(tok_4_1, axi_r_s, csr_rd_resp_valid, axi_read_resp); + + State { + w_id: axi_aw.id, + w_addr: axi_aw.addr, + r_id: axi_ar.id, + r_addr: axi_ar.addr, + } + } +} + +const INST_ID_W = u32:4; +const INST_DATA_W = u32:32; +const INST_ADDR_W = u32:16; +const INST_REGS_N = u32:16; +const INST_DATA_W_DIV8 = INST_DATA_W / u32:8; +const INST_LOG2_REGS_N = std::clog2(INST_REGS_N); + +proc AxiCsrAccessorInst { + type InstAxiAw = axi::AxiAw; + type InstAxiW = axi::AxiW; + type InstAxiB = axi::AxiB; + type InstAxiAr = axi::AxiAr; + type InstAxiR = axi::AxiR; + + type InstCsrRdReq = csr_config::CsrRdReq; + type InstCsrRdResp = csr_config::CsrRdResp; + type InstCsrWrReq = csr_config::CsrWrReq; + type InstCsrWrResp = csr_config::CsrWrResp; + type InstCsrChange = csr_config::CsrChange; + + config( + axi_aw_r: chan in, + axi_w_r: chan in, + axi_b_s: chan out, + axi_ar_r: chan in, + axi_r_s: chan out, + + + csr_rd_req_s: chan out, + csr_rd_resp_r: chan in, + csr_wr_req_s: chan out, + csr_wr_resp_r: chan in, + ) { + spawn AxiCsrAccessor ( + axi_aw_r, axi_w_r, axi_b_s, + axi_ar_r, axi_r_s, + csr_rd_req_s, csr_rd_resp_r, + csr_wr_req_s, csr_wr_resp_r, + ); + } + + init { } + + next (state: ()) { } +} + +const TEST_ID_W = u32:4; +const TEST_DATA_W = u32:32; +const TEST_ADDR_W = u32:16; +const TEST_REGS_N = u32:4; +const TEST_DATA_W_DIV8 = TEST_DATA_W / u32:8; +const TEST_LOG2_REGS_N = std::clog2(TEST_REGS_N); +const TEST_LOG2_DATA_W_DIV8 = std::clog2(TEST_DATA_W_DIV8); + +type TestCsr = uN[TEST_LOG2_REGS_N]; +type TestValue = uN[TEST_DATA_W]; + +struct TestData { + csr: uN[TEST_LOG2_REGS_N], + value: uN[TEST_DATA_W], +} + +const TEST_DATA = TestData[20]:[ + TestData{ csr: TestCsr:0, value: TestValue:0xca32_9f4a }, + TestData{ csr: TestCsr:1, value: TestValue:0x0fb3_fa42 }, + TestData{ csr: TestCsr:2, value: TestValue:0xe7ee_da41 }, + TestData{ csr: TestCsr:3, value: TestValue:0xef51_f98c }, + TestData{ csr: TestCsr:0, value: TestValue:0x97a3_a2d2 }, + TestData{ csr: TestCsr:0, value: TestValue:0xea06_e94b }, + TestData{ csr: TestCsr:1, value: TestValue:0x5fac_17ce }, + TestData{ csr: TestCsr:3, value: TestValue:0xf9d8_9938 }, + TestData{ csr: TestCsr:2, value: TestValue:0xc262_2d2e }, + TestData{ csr: TestCsr:2, value: TestValue:0xb4dd_424e }, + TestData{ csr: TestCsr:1, value: TestValue:0x01f9_b9e4 }, + TestData{ csr: TestCsr:1, value: TestValue:0x3020_6eec }, + TestData{ csr: TestCsr:3, value: TestValue:0x3124_87b5 }, + TestData{ csr: TestCsr:0, value: TestValue:0x0a49_f5e3 }, + TestData{ csr: TestCsr:2, value: TestValue:0xde3b_5d0f }, + TestData{ csr: TestCsr:3, value: TestValue:0x5948_c1b3 }, + TestData{ csr: TestCsr:0, value: TestValue:0xa26d_851f }, + TestData{ csr: TestCsr:3, value: TestValue:0x3fa9_59c0 }, + TestData{ csr: TestCsr:1, value: TestValue:0x4efd_dd09 }, + TestData{ csr: TestCsr:1, value: TestValue:0x6d75_058a }, +]; + +#[test_proc] +proc AxiCsrAccessorTest { + type TestAxiAw = axi::AxiAw; + type TestAxiW = axi::AxiW; + type TestAxiB = axi::AxiB; + type TestAxiAr = axi::AxiAr; + type TestAxiR = axi::AxiR; + + + type TestCsrRdReq = csr_config::CsrRdReq; + type TestCsrRdResp = csr_config::CsrRdResp; + type TestCsrWrReq = csr_config::CsrWrReq; + type TestCsrWrResp = csr_config::CsrWrResp; + type TestCsrChange = csr_config::CsrChange; + + terminator: chan out; + + axi_aw_s: chan out; + axi_w_s: chan out; + axi_b_r: chan in; + axi_ar_s: chan out; + axi_r_r: chan in; + + csr_rd_req_r: chan in; + csr_rd_resp_s: chan out; + csr_wr_req_r: chan in; + csr_wr_resp_s: chan out; + + config (terminator: chan out) { + let (axi_aw_s, axi_aw_r) = chan("axi_aw"); + let (axi_w_s, axi_w_r) = chan("axi_w"); + let (axi_b_s, axi_b_r) = chan("axi_b"); + let (axi_ar_s, axi_ar_r) = chan("axi_ar"); + let (axi_r_s, axi_r_r) = chan("axi_r"); + + let (csr_rd_req_s, csr_rd_req_r) = chan("csr_rd_req"); + let (csr_rd_resp_s, csr_rd_resp_r) = chan("csr_rd_resp"); + + let (csr_wr_req_s, csr_wr_req_r) = chan("csr_wr_req"); + let (csr_wr_resp_s, csr_wr_resp_r) = chan("csr_wr_resp"); + + spawn AxiCsrAccessor ( + axi_aw_r, axi_w_r, axi_b_s, + axi_ar_r, axi_r_s, + csr_rd_req_s, csr_rd_resp_r, + csr_wr_req_s, csr_wr_resp_r, + ); + + ( + terminator, + axi_aw_s, axi_w_s, axi_b_r, + axi_ar_s, axi_r_r, + csr_rd_req_r, csr_rd_resp_s, + csr_wr_req_r, csr_wr_resp_s, + ) + } + + init { } + + next (state: ()) { + // test writing via AXI + let tok = for ((i, test_data), tok): ((u32, TestData), token) in enumerate(TEST_DATA) { + // write CSR via AXI + let axi_aw = TestAxiAw { + id: i as uN[TEST_ID_W], + addr: (test_data.csr as uN[TEST_ADDR_W]) << TEST_LOG2_DATA_W_DIV8, + size: axi::AxiAxSize::MAX_4B_TRANSFER, + len: u8:0, + burst: axi::AxiAxBurst::FIXED, + }; + let tok = send(tok, axi_aw_s, axi_aw); + trace_fmt!("Sent #{} AXI AW: {:#x}", i + u32:1, axi_aw); + + let axi_w = TestAxiW { + data: test_data.value, + strb: !uN[TEST_DATA_W_DIV8]:0, + last: true, + }; + let tok = send(tok, axi_w_s, axi_w); + trace_fmt!("Sent #{} AXI W: {:#x}", i + u32:1, axi_w); + + let expected_wr_req = TestCsrWrReq { + csr: test_data.csr, + value: test_data.value + }; + let (tok, wr_req) = recv(tok, csr_wr_req_r); + trace_fmt!("Received #{} CSR WriteRequest: {:#x}", i + u32:1, wr_req); + assert_eq(expected_wr_req, wr_req); + + let tok = send(tok, csr_wr_resp_s, TestCsrWrResp{}); + trace_fmt!("Sent #{} CsrWrResp", i + u32:1); + let (tok, axi_b) = recv(tok, axi_b_r); + trace_fmt!("Received #{} AXI B: {:#x}", i + u32:1, axi_b); + let expected_axi_resp = TestAxiB{ + resp: axi::AxiWriteResp::OKAY, + id: i as uN[TEST_ID_W], + }; + assert_eq(expected_axi_resp, axi_b); + + // read CSRs via AXI + let axi_ar = TestAxiAr { + id: i as uN[TEST_ID_W], + addr: (test_data.csr as uN[TEST_ADDR_W]) << TEST_LOG2_DATA_W_DIV8, + len: u8:0, + ..zero!() + }; + let tok = send(tok, axi_ar_s, axi_ar); + trace_fmt!("Sent #{} AXI AR: {:#x}", i + u32:1, axi_ar); + + let expected_rd_req = TestCsrRdReq { + csr: test_data.csr, + }; + let (tok, rd_req) = recv(tok, csr_rd_req_r); + trace_fmt!("Received #{} CSR ReadRequest: {:#x}", i + u32:1, rd_req); + assert_eq(expected_rd_req, rd_req); + let rd_resp = TestCsrRdResp { + csr: test_data.csr, + value: test_data.value + }; + let tok = send(tok, csr_rd_resp_s, rd_resp); + trace_fmt!("Sent #{} CsrRdResp: {:#x}", i + u32:1, rd_resp); + + let (tok, axi_r) = recv(tok, axi_r_r); + trace_fmt!("Received #{} AXI R: {:#x}", i + u32:1, axi_r); + let expected_axi_rd_resp = TestAxiR{ + id: i as uN[TEST_ID_W], + data: test_data.value, + resp: axi::AxiReadResp::OKAY, + last: true, + }; + assert_eq(expected_axi_rd_resp, axi_r); + + tok + }(join()); + + send(tok, terminator, true); + } +} diff --git a/xls/modules/zstd/block_dec.x b/xls/modules/zstd/block_dec.x deleted file mode 100644 index f068a8e8b6..0000000000 --- a/xls/modules/zstd/block_dec.x +++ /dev/null @@ -1,170 +0,0 @@ -// Copyright 2024 The XLS Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import xls.modules.zstd.common; -import xls.modules.zstd.dec_demux as demux; -import xls.modules.zstd.raw_block_dec as raw; -import xls.modules.zstd.rle_block_dec as rle; -import xls.modules.zstd.dec_mux as mux; - -type BlockDataPacket = common::BlockDataPacket; -type BlockData = common::BlockData; -type BlockPacketLength = common::BlockPacketLength; -type ExtendedBlockDataPacket = common::ExtendedBlockDataPacket; -type CopyOrMatchContent = common::CopyOrMatchContent; -type CopyOrMatchLength = common::CopyOrMatchLength; -type SequenceExecutorPacket = common::SequenceExecutorPacket; -type SequenceExecutorMessageType = common::SequenceExecutorMessageType; - -// Proc responsible for connecting internal procs used in Block data decoding. -// It handles incoming block data packets by redirecting those to demuxer which passes those to -// block decoder procs specific for given block type. Results are then gathered by mux which -// transfers decoded data further. The connections are visualised on the following diagram: -// -// Block Decoder -// ┌───────────────────────────────────────┐ -// │ Raw Block Decoder │ -// │ ┌───────────────────┐ │ -// │ ┌─► ├┐ │ -// │ Demux │ └───────────────────┘│ Mux │ -// │┌─────┐│ Rle Block Decoder │ ┌─────┐│ -// ││ ├┘ ┌───────────────────┐└─► ││ -// ──┼► ├──► ├──► ├┼─► -// ││ ├┐ └───────────────────┘┌─► ││ -// │└─────┘│ Cmp Block Decoder │ └─────┘│ -// │ │ ┌───────────────────┐│ │ -// │ └─► ├┘ │ -// │ └───────────────────┘ │ -// └───────────────────────────────────────┘ - -pub proc BlockDecoder { - input_r: chan in; - output_s: chan out; - - config (input_r: chan in, output_s: chan out) { - let (demux_raw_s, demux_raw_r) = chan("demux_raw"); - let (demux_rle_s, demux_rle_r) = chan("demux_rle"); - let (demux_cmp_s, demux_cmp_r) = chan("demux_cmp"); - let (mux_raw_s, mux_raw_r) = chan("mux_raw"); - let (mux_rle_s, mux_rle_r) = chan("mux_rle"); - let (mux_cmp_s, mux_cmp_r) = chan("mux_cmp"); - - spawn demux::DecoderDemux(input_r, demux_raw_s, demux_rle_s, demux_cmp_s); - spawn raw::RawBlockDecoder(demux_raw_r, mux_raw_s); - spawn rle::RleBlockDecoder(demux_rle_r, mux_rle_s); - // TODO(antmicro): 2023-11-28 change to compressed block decoder proc - spawn raw::RawBlockDecoder(demux_cmp_r, mux_cmp_s); - spawn mux::DecoderMux(mux_raw_r, mux_rle_r, mux_cmp_r, output_s); - - (input_r, output_s) - } - - init { } - - next(state: ()) { } -} - -#[test_proc] -proc BlockDecoderTest { - terminator: chan out; - input_s: chan out; - output_r: chan in; - - init {} - - config (terminator: chan out) { - let (input_s, input_r) = chan("input"); - let (output_s, output_r) = chan("output"); - - spawn BlockDecoder(input_r, output_s); - - (terminator, input_s, output_r) - } - - next(state: ()) { - let tok = join(); - let EncodedDataBlocksPackets: BlockDataPacket[13] = [ - // RAW Block 1 byte - BlockDataPacket { id: u32:0, last: true, last_block: false, data: BlockData:0xDE000008, length: BlockPacketLength:32 }, - // RAW Block 2 bytes - BlockDataPacket { id: u32:1, last: true, last_block: false, data: BlockData:0xDEAD000010, length: BlockPacketLength:40 }, - // RAW Block 4 bytes - BlockDataPacket { id: u32:2, last: true, last_block: false, data: BlockData:0xDEADBEEF000020, length: BlockPacketLength:56 }, - // RAW Block 5 bytes (block header takes one full packet) - BlockDataPacket { id: u32:3, last: true, last_block: false, data: BlockData:0xDEADBEEFEF000028, length: BlockPacketLength:64 }, - // RAW Block 24 bytes (multi-packet block header with unaligned data in the last packet) - BlockDataPacket { id: u32:4, last: false, last_block: false, data: BlockData:0x12345678900000C0, length: BlockPacketLength:64 }, - BlockDataPacket { id: u32:4, last: false, last_block: false, data: BlockData:0x1234567890ABCDEF, length: BlockPacketLength:64 }, - BlockDataPacket { id: u32:4, last: false, last_block: false, data: BlockData:0xFEDCBA0987654321, length: BlockPacketLength:64 }, - BlockDataPacket { id: u32:4, last: true, last_block: false, data: BlockData:0xF0F0F0, length: BlockPacketLength:24 }, - - // RLE Block 1 byte - BlockDataPacket { id: u32:5, last: true, last_block: false, data: BlockData:0x6700000a, length: BlockPacketLength:32 }, - // RLE Block 2 bytes - BlockDataPacket { id: u32:6, last: true, last_block: false, data: BlockData:0x45000012, length: BlockPacketLength:32 }, - // RLE Block 4 bytes - BlockDataPacket { id: u32:7, last: true, last_block: false, data: BlockData:0x23000022, length: BlockPacketLength:32 }, - // RLE Block 8 bytes (block takes one full packet) - BlockDataPacket { id: u32:8, last: true, last_block: false, data: BlockData:0x10000042, length: BlockPacketLength:32 }, - // RLE Block 26 bytes (multi-packet block header with unaligned data in the last packet) - BlockDataPacket { id: u32:9, last: true, last_block: true, data: BlockData:0xDE0000d2, length: BlockPacketLength:32 }, - ]; - - let tok = for ((counter, block_packet), tok): ((u32, BlockDataPacket), token) in enumerate(EncodedDataBlocksPackets) { - let tok = send(tok, input_s, block_packet); - trace_fmt!("Sent #{} encoded block packet, {:#x}", counter + u32:1, block_packet); - (tok) - }(tok); - - let DecodedDataBlocksPackets: SequenceExecutorPacket[16] = [ - // RAW Block 1 byte - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDE, length: CopyOrMatchLength:8 }, - // RAW Block 2 bytes - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEAD, length: CopyOrMatchLength:16 }, - // RAW Block 4 bytes - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEADBEEF, length: CopyOrMatchLength:32 }, - // RAW Block 5 bytes (block header takes one full packet) - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEADBEEFEF, length: CopyOrMatchLength:40 }, - // RAW Block 24 bytes (multi-packet block header with unaligned data in the last packet) - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x1234567890, length: CopyOrMatchLength:40 }, - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x1234567890ABCDEF, length: CopyOrMatchLength:64 }, - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xFEDCBA0987654321, length: CopyOrMatchLength:64 }, - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xF0F0F0, length: CopyOrMatchLength:24 }, - - // RLE Block 1 byte - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x67, length: CopyOrMatchLength:8 }, - // RLE Block 2 bytes - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x4545, length: CopyOrMatchLength:16 }, - // RLE Block 4 bytes - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x23232323, length: CopyOrMatchLength:32 }, - // RLE Block 8 bytes (block takes one full packet) - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0x1010101010101010, length: CopyOrMatchLength:64 }, - // RLE Block 26 bytes (multi-packet block header with unaligned data in the last packet) - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEDEDEDEDEDEDEDE, length: CopyOrMatchLength:64 }, - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEDEDEDEDEDEDEDE, length: CopyOrMatchLength:64 }, - SequenceExecutorPacket { last: false, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEDEDEDEDEDEDEDE, length: CopyOrMatchLength:64 }, - SequenceExecutorPacket { last: true, msg_type: SequenceExecutorMessageType::LITERAL, content: CopyOrMatchContent:0xDEDE, length: CopyOrMatchLength:16 }, - ]; - - let tok = for ((counter, expected_block_packet), tok): ((u32, SequenceExecutorPacket), token) in enumerate(DecodedDataBlocksPackets) { - let (tok, decoded_block_packet) = recv(tok, output_r); - trace_fmt!("Received #{} decoded block packet, data: 0x{:x}", counter + u32:1, decoded_block_packet); - trace_fmt!("Expected #{} decoded block packet, data: 0x{:x}", counter + u32:1, expected_block_packet); - assert_eq(decoded_block_packet, expected_block_packet); - (tok) - }(tok); - - send(tok, terminator, true); - } -} diff --git a/xls/modules/zstd/block_header.x b/xls/modules/zstd/block_header.x index 455b3295e1..9e8679a72d 100644 --- a/xls/modules/zstd/block_header.x +++ b/xls/modules/zstd/block_header.x @@ -17,12 +17,10 @@ // https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.2 import std; -import xls.modules.zstd.buffer as buff; import xls.modules.zstd.common as common; -type Buffer = buff::Buffer; -type BufferStatus = buff::BufferStatus; type BlockType = common::BlockType; +type BlockSize = common::BlockSize; // Status values reported by the block header parsing function pub enum BlockHeaderStatus: u2 { @@ -35,14 +33,7 @@ pub enum BlockHeaderStatus: u2 { pub struct BlockHeader { last: bool, btype: BlockType, - size: u21, -} - -// Structure for returning results of block header parsing -pub struct BlockHeaderResult { - buffer: Buffer, - status: BlockHeaderStatus, - header: BlockHeader, + size: BlockSize, } // Auxiliary constant that can be used to initialize Proc's state @@ -58,51 +49,3 @@ pub fn extract_block_header(data:u24) -> BlockHeader { last: data[0:1], } } - -// Parses a Buffer and extracts information from a Block_Header. Returns BufferResult -// with outcome of operations on buffer and information extracted from the Block_Header. -pub fn parse_block_header(buffer: Buffer) -> BlockHeaderResult { - let (result, data) = buff::buffer_fixed_pop_checked(buffer); - - match result.status { - BufferStatus::OK => { - let block_header = extract_block_header(data); - if (block_header.btype != BlockType::RESERVED) { - BlockHeaderResult {status: BlockHeaderStatus::OK, header: block_header, buffer: result.buffer} - } else { - BlockHeaderResult {status: BlockHeaderStatus::CORRUPTED, header: zero!(), buffer: buffer} - } - }, - _ => { - trace_fmt!("parse_block_header: Not enough data to parse block header! {}", buffer.length); - BlockHeaderResult {status: BlockHeaderStatus::NO_ENOUGH_DATA, header: zero!(), buffer: buffer} - } - } -} - -#[test] -fn test_parse_block_header() { - let buffer = Buffer { content: u32:0x8001 , length: u32:24}; - let result = parse_block_header(buffer); - assert_eq(result, BlockHeaderResult { - status: BlockHeaderStatus::OK, - header: BlockHeader { last: u1:1, btype: BlockType::RAW, size: u21:0x1000 }, - buffer: Buffer { content: u32:0, length: u32:0 } - }); - - let buffer = Buffer { content: u32:0x91A2, length: u32:24}; - let result = parse_block_header(buffer); - assert_eq(result, BlockHeaderResult { - status: BlockHeaderStatus::OK, - header: BlockHeader { last: u1:0, btype: BlockType::RLE, size: u21:0x1234 }, - buffer: Buffer { content: u32:0, length: u32:0 } - }); - - let buffer = Buffer { content: u32:0x001, length: u32:16}; - let result = parse_block_header(buffer); - assert_eq(result, BlockHeaderResult { - status: BlockHeaderStatus::NO_ENOUGH_DATA, - header: zero!(), - buffer: Buffer { content: u32:0x001, length: u32:16 } - }); -} diff --git a/xls/modules/zstd/block_header_dec.x b/xls/modules/zstd/block_header_dec.x new file mode 100644 index 0000000000..45c69e921c --- /dev/null +++ b/xls/modules/zstd/block_header_dec.x @@ -0,0 +1,293 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import xls.modules.zstd.block_header as block_header; +import xls.modules.zstd.common as common; +import xls.modules.zstd.memory.mem_reader as mem_reader; + +type BlockSize = common::BlockSize; +type BlockType = common::BlockType; +type BlockHeader = block_header::BlockHeader; + +pub struct BlockHeaderDecoderReq { + addr: uN[ADDR_W], +} + +pub enum BlockHeaderDecoderStatus: u2 { + OKAY = 0, + CORRUPTED = 1, + MEMORY_ACCESS_ERROR = 2, +} + +pub struct BlockHeaderDecoderResp { + status: BlockHeaderDecoderStatus, + header: BlockHeader, + rle_symbol: u8, +} + +pub proc BlockHeaderDecoder { + type Req = BlockHeaderDecoderReq; + type Resp = BlockHeaderDecoderResp; + + type MemReaderReq = mem_reader::MemReaderReq; + type MemReaderResp = mem_reader::MemReaderResp; + type MemReaderStatus = mem_reader::MemReaderStatus; + + type Status = BlockHeaderDecoderStatus; + type Length = uN[ADDR_W]; + type Addr = uN[ADDR_W]; + + req_r: chan in; + resp_s: chan out; + mem_req_s: chan out; + mem_resp_r: chan in; + + config ( + req_r: chan in, + resp_s: chan out, + mem_req_s: chan out, + mem_resp_r: chan in, + ) { + (req_r, resp_s, mem_req_s, mem_resp_r) + } + + init { } + + next (state: ()) { + let tok0 = join(); + + // receive request + let (tok1_0, req, req_valid) = recv_non_blocking(tok0, req_r, zero!()); + + // send memory read request + let mem_req = MemReaderReq {addr: req.addr, length: Length:4 }; + let tok2_0 = send_if(tok1_0, mem_req_s, req_valid, mem_req); + + // receive memory read response + let (tok1_1, mem_resp, mem_resp_valid) = recv_non_blocking(tok0, mem_resp_r, zero!()); + + let header = block_header::extract_block_header(mem_resp.data as u24); + let rle_symbol = mem_resp.data[u32:24 +: u8]; + let status = match ( mem_resp.status == MemReaderStatus::OKAY, header.btype != BlockType::RESERVED) { + (true, true) => Status::OKAY, + (true, false) => Status::CORRUPTED, + ( _, _) => Status::MEMORY_ACCESS_ERROR, + }; + + let resp = Resp { status, header, rle_symbol }; + let tok2_1 = send_if(tok1_1, resp_s, mem_resp_valid, resp); + } +} + +const INST_DATA_W = u32:64; +const INST_ADDR_W = u32:16; + +proc BlockHeaderDecoderInst { + type Req = BlockHeaderDecoderReq; + type Resp = BlockHeaderDecoderResp; + type MemReaderReq = mem_reader::MemReaderReq; + type MemReaderResp = mem_reader::MemReaderResp; + + config ( + req_r: chan in, + resp_s: chan out, + mem_req_s: chan out, + mem_resp_r: chan in, + ) { + spawn BlockHeaderDecoder( req_r, resp_s, mem_req_s, mem_resp_r); + } + + init { } + next (state: ()) { } +} + +const TEST_DATA_W = u32:32; +const TEST_ADDR_W = u32:32; + +fn header_to_raw(header: BlockHeader, rle_symbol: u8) -> u32 { + rle_symbol ++ header.size ++ (header.btype as u2) ++ header.last +} + + +#[test_proc] +proc BlockHeaderDecoderTest { + type Req = BlockHeaderDecoderReq; + type Resp = BlockHeaderDecoderResp; + + type MemReaderReq = mem_reader::MemReaderReq; + type MemReaderResp = mem_reader::MemReaderResp; + type MemReaderStatus = mem_reader::MemReaderStatus; + + type Data = uN[TEST_DATA_W]; + type Addr = uN[TEST_ADDR_W]; + type Length = uN[TEST_ADDR_W]; + + terminator: chan out; + + req_s: chan out; + resp_r: chan in; + + mem_req_r: chan in; + mem_resp_s: chan out; + + config (terminator: chan out) { + + let (req_s, req_r) = chan("req"); + let (resp_s, resp_r) = chan("resp"); + + let (mem_req_s, mem_req_r) = chan("mem_req"); + let (mem_resp_s, mem_resp_r) = chan("mem_resp"); + + spawn BlockHeaderDecoder ( + req_r, resp_s, mem_req_s, mem_resp_r + ); + + (terminator, req_s, resp_r, mem_req_r, mem_resp_s) + } + + init { } + + next (state: ()) { + const LENGTH = Length:4; + + let tok = join(); + + // Test Raw + let addr = Addr:0x1234; + let header = BlockHeader { size: BlockSize:0x100, btype: BlockType::RAW, last: true}; + let rle_symbol = u8:0; + + let req = Req { addr }; + let tok = send(tok, req_s, req); + + let (tok, mem_req) = recv(tok, mem_req_r); + assert_eq(mem_req, MemReaderReq { addr, length: LENGTH }); + + let mem_resp = MemReaderResp { + status: MemReaderStatus::OKAY, + data: checked_cast(header_to_raw(header, rle_symbol)), + length: LENGTH, + last: true, + }; + let tok = send(tok, mem_resp_s, mem_resp); + let (tok, resp) = recv(tok, resp_r); + assert_eq(resp, Resp { + status: BlockHeaderDecoderStatus::OKAY, + header: header, + rle_symbol: rle_symbol + }); + + // Test RLE + let addr = Addr:0x2000; + let header = BlockHeader { size: BlockSize:0x40, btype: BlockType::RLE, last: false}; + let rle_symbol = u8:123; + + let req = Req { addr }; + let tok = send(tok, req_s, req); + + let (tok, mem_req) = recv(tok, mem_req_r); + assert_eq(mem_req, MemReaderReq { addr, length: LENGTH }); + + let mem_resp = MemReaderResp { + status: MemReaderStatus::OKAY, + data: checked_cast(header_to_raw(header, rle_symbol)), + length: LENGTH, + last: true, + }; + let tok = send(tok, mem_resp_s, mem_resp); + let (tok, resp) = recv(tok, resp_r); + assert_eq(resp, Resp { + status: BlockHeaderDecoderStatus::OKAY, + header: header, + rle_symbol: rle_symbol + }); + + // Test COMPRESSED + let addr = Addr:0x2000; + let header = BlockHeader { size: BlockSize:0x40, btype: BlockType::COMPRESSED, last: true}; + let rle_symbol = u8:0; + + let req = Req { addr }; + let tok = send(tok, req_s, req); + + let (tok, mem_req) = recv(tok, mem_req_r); + assert_eq(mem_req, MemReaderReq { addr, length: LENGTH }); + + let mem_resp = MemReaderResp { + status: MemReaderStatus::OKAY, + data: checked_cast(header_to_raw(header, rle_symbol)), + length: LENGTH, + last: true, + }; + let tok = send(tok, mem_resp_s, mem_resp); + let (tok, resp) = recv(tok, resp_r); + assert_eq(resp, Resp { + status: BlockHeaderDecoderStatus::OKAY, + header: header, + rle_symbol: rle_symbol + }); + + // Test RESERVED + let addr = Addr:0x2000; + let header = BlockHeader { size: BlockSize:0x40, btype: BlockType::RESERVED, last: true}; + let rle_symbol = u8:0; + + let req = Req { addr }; + let tok = send(tok, req_s, req); + + let (tok, mem_req) = recv(tok, mem_req_r); + assert_eq(mem_req, MemReaderReq { addr, length: LENGTH }); + + let mem_resp = MemReaderResp { + status: MemReaderStatus::OKAY, + data: checked_cast(header_to_raw(header, rle_symbol)), + length: LENGTH, + last: true, + }; + let tok = send(tok, mem_resp_s, mem_resp); + let (tok, resp) = recv(tok, resp_r); + assert_eq(resp, Resp { + status: BlockHeaderDecoderStatus::CORRUPTED, + header: header, + rle_symbol: rle_symbol + }); + + // Test memory error + let addr = Addr:0x2000; + let header = BlockHeader { size: BlockSize:0x40, btype: BlockType::RESERVED, last: true}; + let rle_symbol = u8:0; + + let req = Req { addr }; + let tok = send(tok, req_s, req); + + let (tok, mem_req) = recv(tok, mem_req_r); + assert_eq(mem_req, MemReaderReq { addr, length: LENGTH }); + + let mem_resp = MemReaderResp { + status: MemReaderStatus::ERROR, + data: checked_cast(header_to_raw(header, rle_symbol)), + length: LENGTH, + last: true, + }; + let tok = send(tok, mem_resp_s, mem_resp); + let (tok, resp) = recv(tok, resp_r); + assert_eq(resp, Resp { + status: BlockHeaderDecoderStatus::MEMORY_ACCESS_ERROR, + header: header, + rle_symbol: rle_symbol + }); + + send(tok, terminator, true); + } +} diff --git a/xls/modules/zstd/cocotb/BUILD b/xls/modules/zstd/cocotb/BUILD new file mode 100644 index 0000000000..cdb788d732 --- /dev/null +++ b/xls/modules/zstd/cocotb/BUILD @@ -0,0 +1,75 @@ +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +load("@xls_pip_deps//:requirements.bzl", "requirement") + +package( + default_applicable_licenses = ["//:license"], + default_visibility = ["//xls:xls_users"], + licenses = ["notice"], +) + +py_library( + name = "channel", + srcs = ["channel.py"], + deps = [ + ":xlsstruct", + requirement("cocotb"), + requirement("cocotb_bus"), + ], +) + +py_library( + name = "memory", + srcs = ["memory.py"], + deps = [ + requirement("cocotbext-axi"), + ], +) + +py_library( + name = "scoreboard", + srcs = ["scoreboard.py"], + deps = [ + ":channel", + ":xlsstruct", + requirement("cocotb"), + ], +) + +py_library( + name = "utils", + srcs = ["utils.py"], + deps = [ + requirement("cocotb"), + "//xls/common:runfiles", + ], +) + +py_library( + name = "xlsstruct", + srcs = ["xlsstruct.py"], + deps = [ + requirement("cocotb"), + ], +) + +py_library( + name = "data_generator", + srcs = ["data_generator.py"], + deps = [ + "//xls/common:runfiles", + "@zstd//:decodecorpus", + ], +) diff --git a/xls/modules/zstd/cocotb/channel.py b/xls/modules/zstd/cocotb/channel.py new file mode 100644 index 0000000000..0970ab6e9b --- /dev/null +++ b/xls/modules/zstd/cocotb/channel.py @@ -0,0 +1,95 @@ +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Sequence, Type, Union + +import cocotb +from cocotb.handle import SimHandleBase +from cocotb.triggers import RisingEdge +from cocotb_bus.bus import Bus +from cocotb_bus.drivers import BusDriver +from cocotb_bus.monitors import BusMonitor + +from xls.modules.zstd.cocotb.xlsstruct import XLSStruct + +Transaction = Union[XLSStruct, Sequence[XLSStruct]] + +XLS_CHANNEL_SIGNALS = ["data", "rdy", "vld"] +XLS_CHANNEL_OPTIONAL_SIGNALS = [] + + +class XLSChannel(Bus): + _signals = XLS_CHANNEL_SIGNALS + _optional_signals = XLS_CHANNEL_OPTIONAL_SIGNALS + + def __init__(self, entity, name, clk, *, start_now=False, **kwargs: Any): + super().__init__(entity, name, self._signals, self._optional_signals, **kwargs) + self.clk = clk + if start_now: + self.start_recv_loop() + + @cocotb.coroutine + async def recv_channel(self): + """Cocotb coroutine that acts as a proc receiving data from a channel""" + self.rdy.setimmediatevalue(1) + while True: + await RisingEdge(self.clk) + + def start_recv_loop(self): + cocotb.start_soon(self.recv_channel()) + + +class XLSChannelDriver(BusDriver): + _signals = XLS_CHANNEL_SIGNALS + _optional_signals = XLS_CHANNEL_OPTIONAL_SIGNALS + + def __init__(self, entity: SimHandleBase, name: str, clock: SimHandleBase, **kwargs: Any): + BusDriver.__init__(self, entity, name, clock, **kwargs) + + self.bus.data.setimmediatevalue(0) + self.bus.vld.setimmediatevalue(0) + + async def _driver_send(self, transaction: Transaction, sync: bool = True, **kwargs: Any) -> None: + if sync: + await RisingEdge(self.clock) + + data_to_send = (transaction if isinstance(transaction, Sequence) else [transaction]) + + for word in data_to_send: + self.bus.vld.value = 1 + self.bus.data.value = word.binaryvalue + + while True: + await RisingEdge(self.clock) + if self.bus.rdy.value: + break + + self.bus.vld.value = 0 + + +class XLSChannelMonitor(BusMonitor): + _signals = XLS_CHANNEL_SIGNALS + _optional_signals = XLS_CHANNEL_OPTIONAL_SIGNALS + + def __init__(self, entity: SimHandleBase, name: str, clock: SimHandleBase, struct: Type[XLSStruct], **kwargs: Any): + BusMonitor.__init__(self, entity, name, clock, **kwargs) + self.struct = struct + + @cocotb.coroutine + async def _monitor_recv(self) -> None: + while True: + await RisingEdge(self.clock) + if self.bus.rdy.value and self.bus.vld.value: + vec = self.struct.from_int(self.bus.data.value.integer) + self._recv(vec) diff --git a/xls/modules/zstd/cocotb/data_generator.py b/xls/modules/zstd/cocotb/data_generator.py new file mode 100644 index 0000000000..72b60c5eee --- /dev/null +++ b/xls/modules/zstd/cocotb/data_generator.py @@ -0,0 +1,61 @@ +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from pathlib import Path +from enum import Enum + +from xls.common import runfiles +import subprocess +import zstandard + +class BlockType(Enum): + RAW = 0 + RLE = 1 + COMPRESSED = 2 + RANDOM = 3 + + def __str__(self): + return self.name + + @staticmethod + def from_string(s): + try: + return BlockType[s] + except KeyError as e: + raise ValueError(str(e)) + +def CallDecodecorpus(args): + decodecorpus = Path(runfiles.get_path("decodecorpus", repository = "zstd")) + cmd = args + cmd.insert(0, str(decodecorpus)) + cmd_concat = " ".join(cmd) + subprocess.run(cmd_concat, shell=True, check=True) + +def DecompressFrame(data): + dctx = zstandard.ZstdDecompressor() + return dctx.decompress(data) + +def GenerateFrame(seed, btype, output_path): + args = [] + args.append("-s" + str(seed)) + if (btype != BlockType.RANDOM): + args.append("--block-type=" + str(btype.value)) + args.append("--content-size") + # Test payloads up to 16KB + args.append("--max-content-size-log=14") + args.append("-p" + output_path) + args.append("-vvvvvvv") + + CallDecodecorpus(args) + diff --git a/xls/modules/zstd/cocotb/memory.py b/xls/modules/zstd/cocotb/memory.py new file mode 100644 index 0000000000..52e512e053 --- /dev/null +++ b/xls/modules/zstd/cocotb/memory.py @@ -0,0 +1,43 @@ +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os + +from cocotbext.axi.axi_ram import AxiRam, AxiRamRead, AxiRamWrite +from cocotbext.axi.sparse_memory import SparseMemory + + +def init_axi_mem(path: os.PathLike, kwargs): + with open(path, "rb") as f: + sparse_mem = SparseMemory(size=kwargs["size"]) + sparse_mem.write(0x0, f.read()) + kwargs["mem"] = sparse_mem + + +class AxiRamReadFromFile(AxiRamRead): + def __init__(self, *args, path: os.PathLike, **kwargs): + init_axi_mem(path, kwargs) + super().__init__(*args, **kwargs) + + +class AxiRamFromFile(AxiRam): + def __init__(self, *args, path: os.PathLike, **kwargs): + init_axi_mem(path, kwargs) + super().__init__(*args, **kwargs) + + +class AxiRamWriteFromFile(AxiRamWrite): + def __init__(self, *args, path: os.PathLike, **kwargs): + init_axi_mem(path, kwargs) + super().__init__(*args, **kwargs) diff --git a/xls/modules/zstd/cocotb/scoreboard.py b/xls/modules/zstd/cocotb/scoreboard.py new file mode 100644 index 0000000000..b9b64ca6e2 --- /dev/null +++ b/xls/modules/zstd/cocotb/scoreboard.py @@ -0,0 +1,69 @@ +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dataclasses import dataclass +from queue import Queue + +from cocotb.clock import Clock +from cocotb.log import SimLog +from cocotb.utils import get_sim_time + +from xls.modules.zstd.cocotb.channel import XLSChannelMonitor +from xls.modules.zstd.cocotb.xlsstruct import XLSStruct + + +@dataclass +class LatencyQueueItem: + transaction: XLSStruct + timestamp: int + + +class LatencyScoreboard: + def __init__(self, dut, clock: Clock, req_monitor: XLSChannelMonitor, resp_monitor: XLSChannelMonitor): + self.dut = dut + self.log = SimLog(f"zstd.cocotb.scoreboard.{self.dut._name}") + self.clock = clock + self.req_monitor = req_monitor + self.resp_monitor = resp_monitor + self.pending_req = Queue() + self.results = [] + + self.req_monitor.add_callback(self._req_callback) + self.resp_monitor.add_callback(self._resp_callback) + + def _current_cycle(self): + return get_sim_time(units='step') / self.clock.period + + def _req_callback(self, transaction: XLSStruct): + self.pending_req.put(LatencyQueueItem(transaction, self._current_cycle())) + + def _resp_callback(self, transaction: XLSStruct): + latency_item = self.pending_req.get() + self.results.append(self._current_cycle() - latency_item.timestamp) + + def average_latency(self): + return sum(self.results)/len(self.results) + + def report_result(self): + if not self.pending_req.empty(): + self.log.warning(f"There are unfulfilled requests from channel {self.req_monitor.name}") + while not self.pending_req.empty(): + self.log.warning(f"Unfulfilled request: {self.pending_req.get()}") + if len(self.results) > 0: + self.log.info(f"Latency report - 1st latency: {self.results[0]}") + if len(self.results) > 1: + self.log.info(f"Latency report - 2nd latency: {self.results[1]}") + if len(self.results) > 2: + avg = sum(self.results[2:])/len(self.results[2:]) + self.log.info(f"Latency report - rest of the latencies (average): {avg}") diff --git a/xls/modules/zstd/cocotb/utils.py b/xls/modules/zstd/cocotb/utils.py new file mode 100644 index 0000000000..0930a92932 --- /dev/null +++ b/xls/modules/zstd/cocotb/utils.py @@ -0,0 +1,57 @@ +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from pathlib import Path + +import cocotb +from cocotb.runner import check_results_file, get_runner +from cocotb.triggers import ClockCycles + +from xls.common import runfiles + + +def setup_com_iverilog(): + iverilog_path = Path(runfiles.get_path("iverilog", repository = "com_icarus_iverilog")) + vvp_path = Path(runfiles.get_path("vvp", repository = "com_icarus_iverilog")) + os.environ["PATH"] += os.pathsep + str(iverilog_path.parent) + os.environ["PATH"] += os.pathsep + str(vvp_path.parent) + build_dir = Path(os.environ['BUILD_WORKING_DIRECTORY'], "sim_build") + return build_dir + +def run_test(toplevel, test_module, verilog_sources): + build_dir = setup_com_iverilog() + runner = get_runner("icarus") + runner.build( + verilog_sources=verilog_sources, + hdl_toplevel=toplevel, + timescale=("1ns", "1ps"), + build_dir=build_dir, + defines={"SIMULATION": "1"}, + waves=True, + ) + + results_xml = runner.test( + hdl_toplevel=toplevel, + test_module=test_module, + waves=True, + ) + check_results_file(results_xml) + +@cocotb.coroutine +async def reset(clk, rst, cycles=1): + """Cocotb coroutine that performs the reset""" + rst.value = 1 + await ClockCycles(clk, cycles) + rst.value = 0 diff --git a/xls/modules/zstd/cocotb/xlsstruct.py b/xls/modules/zstd/cocotb/xlsstruct.py new file mode 100644 index 0000000000..a2d686a8af --- /dev/null +++ b/xls/modules/zstd/cocotb/xlsstruct.py @@ -0,0 +1,175 @@ +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import random +from dataclasses import asdict, astuple, dataclass, fields + +from cocotb.binary import BinaryValue + + +class TruncationError(Exception): + pass + +def xls_dataclass(cls): + """ + Class decorator for XLS structs. + Usage: + + @xls_dataclass + class MyStruct(XLSStruct): + ... + """ + return dataclass(cls, repr=False) + +@dataclass +class XLSStruct: + """ + Represents XLS struct on the Python side, allowing serialization/deserialization + to/from common formats and usage with XLS{Driver, Monitor}. + + The intended way to use this class is to inherit from it, specify the fields with + : [= ] syntax and decorate the inheriting class with + @XLSDataclass. Objects of this class can be instantiated and used like usual + dataclass objects, with a few extra methods and properties available. They can also + be passed as arguments to XLSChannelDriver.send and will be serialized to expected + bit vector. Class can be passed to XLSChannelMonitor ``struct`` constructor argument + to automatically deserialize all transfers to the provided struct. + + Example: + + from xlsstruct import XLSDataclass, XLSStruct + + @XLSDataclass + class MyStruct(XLSStruct): + data: 32 + ok: 1 + id: 4 = 0 + + monitor = XLSChannelMonitor(dut, CHANNEL_PREFIX, dut.clk, MyStruct) + + driver = XLSChannelDriver(dut, CHANNEL_PREFIX, dut.clk) + driver.send(MyStruct( + data = 0xdeadbeef, + ok = 1, + id = 3, + )) + # struct fields can also be randomized + driver.send(MyStruct.randomize()) + """ + + @classmethod + def _masks(cls): + """ + Returns a list of field-sized bitmasks. + + For example for fields of widths 2, 3, 4 + returns [2'b11, 3'b111, 4'b1111]. + """ + masks = [] + for field in fields(cls): + width = field.type + masks += [(1 << width) - 1] + return masks + + @classmethod + def _positions(cls): + """ + Returns a list of start positions in a bit vector for + struct's fields. + + For example for fields of widths 1, 2, 3, 4, 5, 6 + returns [20, 18, 15, 11, 6, 0] + """ + positions = [] + for i, field in enumerate(fields(cls)): + width = field.type + if i == 0: + positions += [cls.total_width - width] + else: + positions += [positions[i-1] - width] + return positions + + @classmethod + @property + def total_width(cls): + """ + Returns total bit width of the struct + """ + return sum(field.type for field in fields(cls)) + + @property + def value(self): + """ + Returns struct's value as a Python integer + """ + value = 0 + masks = self._masks() + positions = self._positions() + for field_val, mask, pos in zip(astuple(self), masks, positions): + if field_val > mask: + raise TruncationError(f"Signal value is wider than its bit width") + value |= (field_val & mask) << pos + return value + + @property + def binaryvalue(self): + """ + Returns struct's value as a cocotb.binary.BinaryValue + """ + return BinaryValue(self.binstr) + + @property + def binstr(self): + """ + Returns struct's value as a string with its binary representation + """ + return f"{self.value:>0{self.total_width}b}" + + @property + def hexstr(self): + """ + Returns struct's value as a string with its hex representation + (without leading "0x") + """ + return f"{self.value:>0{self.total_width // 4}x}" + + @classmethod + def from_int(cls, value): + """ + Returns an instance of the struct from Python integer + """ + instance = {} + masks = cls._masks() + positions = cls._positions() + for field, mask, pos in zip(fields(cls), masks, positions): + instance[field.name] = (value >> pos) & mask + return cls(**instance) + + @classmethod + def randomize(cls): + """ + Returns an instance of the struct with all fields' values randomized + """ + instance = {} + for field in fields(cls): + instance[field.name] = random.randrange(0, 2**field.type) + return cls(**instance) + + def __str__(self): + return self.__repr__() + + def __repr__(self): + classname = self.__class__.__name__ + fields = [f"{name}={hex(value)}" for name, value in asdict(self).items()] + return f"{classname}({', '.join(fields)})" diff --git a/xls/modules/zstd/csr_config.x b/xls/modules/zstd/csr_config.x new file mode 100644 index 0000000000..a792757cfa --- /dev/null +++ b/xls/modules/zstd/csr_config.x @@ -0,0 +1,397 @@ +// Copyright 2023-2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains implementation of a proc that handles CSRs. It provides +// an AXI interface for reading and writing the values as well as separate +// request/response channels. Apart from that it has an output channel which +// notifies aboud changes made to CSRs. + +import std; +import xls.modules.zstd.memory.axi; + +pub struct CsrRdReq { + csr: uN[LOG2_REGS_N], +} + +pub struct CsrRdResp { + csr: uN[LOG2_REGS_N], + value: uN[DATA_W], +} + +pub struct CsrWrReq { + csr: uN[LOG2_REGS_N], + value: uN[DATA_W], +} + +pub struct CsrWrResp { } + +pub struct CsrChange { + csr: uN[LOG2_REGS_N], +} + +struct CsrConfigState { + register_file: uN[DATA_W][REGS_N], +} + +pub proc CsrConfig< + ID_W: u32, ADDR_W: u32, DATA_W: u32, REGS_N: u32, + //REGS_INIT: u64[64] = {u64[64]:[u64:0, ...]}, + DATA_W_DIV8: u32 = { DATA_W / u32:8 }, + LOG2_REGS_N: u32 = { std::clog2(REGS_N) }, +> { + + type RdReq = CsrRdReq; + type RdResp = CsrRdResp; + type WrReq = CsrWrReq; + type WrResp = CsrWrResp; + type Change = CsrChange; + + type State = CsrConfigState; + type Data = uN[DATA_W]; + type RegN = uN[LOG2_REGS_N]; + + ext_csr_rd_req_r: chan in; + ext_csr_rd_resp_s: chan out; + ext_csr_wr_req_r: chan in; + ext_csr_wr_resp_s: chan out; + + csr_rd_req_r: chan in; + csr_rd_resp_s: chan out; + csr_wr_req_r: chan in; + csr_wr_resp_s: chan out; + + csr_change_s: chan out; + + config ( + ext_csr_rd_req_r: chan in, + ext_csr_rd_resp_s: chan out, + ext_csr_wr_req_r: chan in, + ext_csr_wr_resp_s: chan out, + + csr_rd_req_r: chan in, + csr_rd_resp_s: chan out, + csr_wr_req_r: chan in, + csr_wr_resp_s: chan out, + csr_change_s: chan out, + ) { + ( + ext_csr_rd_req_r, ext_csr_rd_resp_s, + ext_csr_wr_req_r, ext_csr_wr_resp_s, + csr_rd_req_r, csr_rd_resp_s, + csr_wr_req_r, csr_wr_resp_s, + csr_change_s, + ) + } + + init { + zero!() + } + + next (state: State) { + let register_file = state.register_file; + + let tok_0 = join(); + + // write to CSR + let (tok_1_1_1, ext_csr_wr_req, ext_csr_wr_req_valid) = recv_non_blocking(tok_0, ext_csr_wr_req_r, zero!()); + let (tok_1_1_2, csr_wr_req, csr_wr_req_valid) = recv_non_blocking(tok_0, csr_wr_req_r, zero!()); + + // Mux the Write Requests from External and Internal sources + // Write requests from external source take precedence before internal writes + let wr_req = if (ext_csr_wr_req_valid) { + ext_csr_wr_req + } else if {csr_wr_req_valid} { + csr_wr_req + } else { + zero!() + }; + + let wr_req_valid = ext_csr_wr_req_valid | csr_wr_req_valid; + + let register_file = if wr_req_valid { + update(register_file, wr_req.csr as u32, wr_req.value) + } else { + register_file + }; + + // Send Write Response + let tok_1_1 = join(tok_1_1_1, tok_1_1_2); + let tok_1_2_1 = send_if(tok_1_1, ext_csr_wr_resp_s, ext_csr_wr_req_valid, WrResp {}); + let tok_1_2_2 = send_if(tok_1_1, csr_wr_resp_s, csr_wr_req_valid, WrResp {}); + + // Send change notification + let tok_1_2 = join(tok_1_2_1, tok_1_2_2); + let tok_1_3 = send_if(tok_1_2, csr_change_s, wr_req_valid, Change { csr: wr_req.csr }); + + + // Read from CSRs + let (tok_2_1, ext_csr_rd_req, ext_csr_req_valid) = recv_non_blocking(tok_0, ext_csr_rd_req_r, zero!()); + + send_if(tok_2_1, ext_csr_rd_resp_s, ext_csr_req_valid, RdResp { + csr: ext_csr_rd_req.csr, + value: register_file[ext_csr_rd_req.csr as u32], + }); + + let (tok_3_1, csr_rd_req, csr_req_valid) = recv_non_blocking(tok_0, csr_rd_req_r, zero!()); + send_if(tok_3_1, csr_rd_resp_s, csr_req_valid, RdResp { + csr: csr_rd_req.csr, + value: register_file[csr_rd_req.csr as u32], + }); + + State { + register_file: register_file, + } + } +} + +const INST_ID_W = u32:32; +const INST_DATA_W = u32:32; +const INST_ADDR_W = u32:2; +const INST_REGS_N = u32:4; +const INST_DATA_W_DIV8 = INST_DATA_W / u32:8; +const INST_LOG2_REGS_N = std::clog2(INST_REGS_N); + +proc CsrConfigInst { + type InstCsrRdReq = CsrRdReq; + type InstCsrRdResp = CsrRdResp; + type InstCsrWrReq = CsrWrReq; + type InstCsrWrResp = CsrWrResp; + type InstCsrChange = CsrChange; + + config( + ext_csr_rd_req_r: chan in, + ext_csr_rd_resp_s: chan out, + ext_csr_wr_req_r: chan in, + ext_csr_wr_resp_s: chan out, + + csr_rd_req_r: chan in, + csr_rd_resp_s: chan out, + csr_wr_req_r: chan in, + csr_wr_resp_s: chan out, + csr_change_s: chan out, + ) { + spawn CsrConfig ( + ext_csr_rd_req_r, ext_csr_rd_resp_s, + ext_csr_wr_req_r, ext_csr_wr_resp_s, + csr_rd_req_r, csr_rd_resp_s, + csr_wr_req_r, csr_wr_resp_s, + csr_change_s, + ); + } + + init { } + + next (state: ()) { } +} + +const TEST_ID_W = u32:32; +const TEST_DATA_W = u32:32; +const TEST_ADDR_W = u32:2; +const TEST_REGS_N = u32:4; +const TEST_DATA_W_DIV8 = TEST_DATA_W / u32:8; +const TEST_LOG2_REGS_N = std::clog2(TEST_REGS_N); + +type TestCsr = uN[TEST_LOG2_REGS_N]; +type TestValue = uN[TEST_DATA_W]; + +struct TestData { + csr: uN[TEST_LOG2_REGS_N], + value: uN[TEST_DATA_W], +} + +const TEST_DATA = TestData[20]:[ + TestData{ csr: TestCsr:0, value: TestValue:0xca32_9f4a }, + TestData{ csr: TestCsr:1, value: TestValue:0x0fb3_fa42 }, + TestData{ csr: TestCsr:2, value: TestValue:0xe7ee_da41 }, + TestData{ csr: TestCsr:3, value: TestValue:0xef51_f98c }, + TestData{ csr: TestCsr:0, value: TestValue:0x97a3_a2d2 }, + TestData{ csr: TestCsr:0, value: TestValue:0xea06_e94b }, + TestData{ csr: TestCsr:1, value: TestValue:0x5fac_17ce }, + TestData{ csr: TestCsr:3, value: TestValue:0xf9d8_9938 }, + TestData{ csr: TestCsr:2, value: TestValue:0xc262_2d2e }, + TestData{ csr: TestCsr:2, value: TestValue:0xb4dd_424e }, + TestData{ csr: TestCsr:1, value: TestValue:0x01f9_b9e4 }, + TestData{ csr: TestCsr:1, value: TestValue:0x3020_6eec }, + TestData{ csr: TestCsr:3, value: TestValue:0x3124_87b5 }, + TestData{ csr: TestCsr:0, value: TestValue:0x0a49_f5e3 }, + TestData{ csr: TestCsr:2, value: TestValue:0xde3b_5d0f }, + TestData{ csr: TestCsr:3, value: TestValue:0x5948_c1b3 }, + TestData{ csr: TestCsr:0, value: TestValue:0xa26d_851f }, + TestData{ csr: TestCsr:3, value: TestValue:0x3fa9_59c0 }, + TestData{ csr: TestCsr:1, value: TestValue:0x4efd_dd09 }, + TestData{ csr: TestCsr:1, value: TestValue:0x6d75_058a }, +]; + +#[test_proc] +proc CsrConfig_test { + type TestCsrRdReq = CsrRdReq; + type TestCsrRdResp = CsrRdResp; + type TestCsrWrReq = CsrWrReq; + type TestCsrWrResp = CsrWrResp; + type TestCsrChange = CsrChange; + + terminator: chan out; + + ext_csr_rd_req_s: chan out; + ext_csr_rd_resp_r: chan in; + ext_csr_wr_req_s: chan out; + ext_csr_wr_resp_r: chan in; + + csr_rd_req_s: chan out; + csr_rd_resp_r: chan in; + csr_wr_req_s: chan out; + csr_wr_resp_r: chan in; + + csr_change_r: chan in; + + config (terminator: chan out) { + let (ext_csr_rd_req_s, ext_csr_rd_req_r) = chan("ext_csr_rd_req"); + let (ext_csr_rd_resp_s, ext_csr_rd_resp_r) = chan("ext_csr_rd_resp"); + + let (ext_csr_wr_req_s, ext_csr_wr_req_r) = chan("ext_csr_wr_req"); + let (ext_csr_wr_resp_s, ext_csr_wr_resp_r) = chan("ext_csr_wr_resp"); + + let (csr_rd_req_s, csr_rd_req_r) = chan("csr_rd_req"); + let (csr_rd_resp_s, csr_rd_resp_r) = chan("csr_rd_resp"); + + let (csr_wr_req_s, csr_wr_req_r) = chan("csr_wr_req"); + let (csr_wr_resp_s, csr_wr_resp_r) = chan("csr_wr_resp"); + + let (csr_change_s, csr_change_r) = chan("csr_change"); + + spawn CsrConfig ( + ext_csr_rd_req_r, ext_csr_rd_resp_s, + ext_csr_wr_req_r, ext_csr_wr_resp_s, + csr_rd_req_r, csr_rd_resp_s, + csr_wr_req_r, csr_wr_resp_s, + csr_change_s, + ); + + ( + terminator, + ext_csr_rd_req_s, ext_csr_rd_resp_r, + ext_csr_wr_req_s, ext_csr_wr_resp_r, + csr_rd_req_s, csr_rd_resp_r, + csr_wr_req_s, csr_wr_resp_r, + csr_change_r, + ) + } + + init { } + + next (state: ()) { + let expected_values = zero!(); + + // Test Writes through external interface + let (tok, expected_values) = for ((i, test_data), (tok, expected_values)): ((u32, TestData), (token, uN[TEST_DATA_W][TEST_REGS_N])) in enumerate(TEST_DATA) { + // write CSR via external interface + let wr_req = TestCsrWrReq { + csr: test_data.csr, + value: test_data.value, + }; + let tok = send(tok, ext_csr_wr_req_s, wr_req); + trace_fmt!("Sent #{} WrReq through external interface: {:#x}", i + u32:1, wr_req); + + let (tok, wr_resp) = recv(tok, ext_csr_wr_resp_r); + trace_fmt!("Received #{} WrResp through external interface: {:#x}", i + u32:1, wr_resp); + + // read CSR change + let (tok, csr_change) = recv(tok, csr_change_r); + trace_fmt!("Received #{} CSR change {:#x}", i + u32:1, csr_change); + + assert_eq(test_data.csr, csr_change.csr); + + // update expected values + let expected_values = update(expected_values, test_data.csr as u32, test_data.value); + + let tok = for (test_csr, tok): (u32, token) in u32:0..u32:4 { + let rd_req = TestCsrRdReq { + csr: test_csr as TestCsr, + }; + let expected_rd_resp = TestCsrRdResp{ + csr: test_csr as TestCsr, + value: expected_values[test_csr as u32] + }; + + // Read CSR via external interface + let tok = send(tok, ext_csr_rd_req_s, rd_req); + trace_fmt!("Sent #{} RdReq through external interface: {:#x}", i + u32:1, rd_req); + let (tok, rd_resp) = recv(tok, ext_csr_rd_resp_r); + trace_fmt!("Received #{} RdResp through external interface: {:#x}", i + u32:1, rd_resp); + assert_eq(expected_rd_resp, rd_resp); + + // Read CSR via internal interface + let tok = send(tok, csr_rd_req_s, rd_req); + trace_fmt!("Sent #{} RdReq through internal interface: {:#x}", i + u32:1, rd_req); + let (tok, csr_rd_resp) = recv(tok, csr_rd_resp_r); + trace_fmt!("Received #{} RdResp through internal interface: {:#x}", i + u32:1, csr_rd_resp); + assert_eq(expected_rd_resp, csr_rd_resp); + tok + }(tok); + + (tok, expected_values) + }((join(), expected_values)); + + // Test writes via internal interface + let (tok, _) = for ((i, test_data), (tok, expected_values)): ((u32, TestData), (token, uN[TEST_DATA_W][TEST_REGS_N])) in enumerate(TEST_DATA) { + // write CSR via request channel + let csr_wr_req = TestCsrWrReq { + csr: test_data.csr, + value: test_data.value, + }; + let tok = send(tok, csr_wr_req_s, csr_wr_req); + trace_fmt!("Sent #{} WrReq through internal interface: {:#x}", i + u32:1, csr_wr_req); + + let (tok, csr_wr_resp) = recv(tok, csr_wr_resp_r); + trace_fmt!("Received #{} WrResp through internal interface {:#x}", i + u32:1, csr_wr_resp); + + // read CSR change + let (tok, csr_change) = recv(tok, csr_change_r); + trace_fmt!("Received #{} CSR change {:#x}", i + u32:1, csr_change); + assert_eq(test_data.csr, csr_change.csr); + + // update expected values + let expected_values = update(expected_values, test_data.csr as u32, test_data.value); + + let tok = for (test_csr, tok): (u32, token) in u32:0..u32:4 { + let rd_req = TestCsrRdReq { + csr: test_csr as TestCsr, + }; + let expected_rd_resp = TestCsrRdResp{ + csr: test_csr as TestCsr, + value: expected_values[test_csr as u32] + }; + + // Read CSR via external interface + let tok = send(tok, ext_csr_rd_req_s, rd_req); + trace_fmt!("Sent #{} RdReq through external interface: {:#x}", i + u32:1, rd_req); + let (tok, rd_resp) = recv(tok, ext_csr_rd_resp_r); + trace_fmt!("Received #{} RdResp through external interface: {:#x}", i + u32:1, rd_resp); + assert_eq(expected_rd_resp, rd_resp); + + // Read CSR via internal interface + let tok = send(tok, csr_rd_req_s, rd_req); + trace_fmt!("Sent #{} RdReq through internal interface: {:#x}", i + u32:1, rd_req); + let (tok, csr_rd_resp) = recv(tok, csr_rd_resp_r); + trace_fmt!("Received #{} RdResp through internal interface: {:#x}", i + u32:1, csr_rd_resp); + assert_eq(expected_rd_resp, csr_rd_resp); + tok + }(tok); + + (tok, expected_values) + }((join(), expected_values)); + + send(tok, terminator, true); + } +} diff --git a/xls/modules/zstd/data_generator.cc b/xls/modules/zstd/data_generator.cc index 81ffe95ed9..98b1eb4dba 100644 --- a/xls/modules/zstd/data_generator.cc +++ b/xls/modules/zstd/data_generator.cc @@ -60,9 +60,8 @@ static absl::StatusOr CallDecodecorpus( absl::Span args, const std::optional& cwd = std::nullopt, std::optional timeout = std::nullopt) { - XLS_ASSIGN_OR_RETURN( - std::filesystem::path path, - xls::GetXlsRunfilePath("external/zstd/decodecorpus")); + XLS_ASSIGN_OR_RETURN(std::filesystem::path path, + xls::GetXlsRunfilePath("external/zstd/decodecorpus")); std::vector cmd = {path}; cmd.insert(cmd.end(), args.begin(), args.end()); diff --git a/xls/modules/zstd/dec_demux.x b/xls/modules/zstd/dec_demux.x deleted file mode 100644 index 5bcd380f91..0000000000 --- a/xls/modules/zstd/dec_demux.x +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright 2024 The XLS Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This file contains DecoderDemux Proc, which is responsible for -// parsing Block_Header and sending the obtained data to the Raw, RLE, -// or Compressed Block decoders. - -import std; -import xls.modules.zstd.common as common; -import xls.modules.zstd.block_header as block_header; - -type BlockDataPacket = common::BlockDataPacket; - -const DATA_WIDTH = common::DATA_WIDTH; - -enum DecoderDemuxStatus : u2 { - IDLE = 0, - PASS_RAW = 1, - PASS_RLE = 2, - PASS_COMPRESSED = 3, -} - -struct DecoderDemuxState { - status: DecoderDemuxStatus, - byte_to_pass: u21, - send_data: u21, - id: u32, - last_packet: BlockDataPacket, -} - -// It's safe to assume that data contains full header and some extra data. -// Previous stage aligns block header and data, it also guarantees -// new block headers in new packets. -fn handle_idle_state(data: BlockDataPacket, state: DecoderDemuxState) - -> DecoderDemuxState { - let header = block_header::extract_block_header(data.data[0:24] as u24); - let data = BlockDataPacket { - data: data.data[24:] as bits[DATA_WIDTH], - length: data.length - u32:24, - id: state.id, - ..data - }; - match header.btype { - common::BlockType::RAW => { - DecoderDemuxState { - status: DecoderDemuxStatus::PASS_RAW, - byte_to_pass: header.size, - send_data: u21:0, - last_packet: data, - ..state - } - }, - common::BlockType::RLE => { - DecoderDemuxState { - status: DecoderDemuxStatus::PASS_RLE, - byte_to_pass: header.size, - send_data: u21:0, - last_packet: data, - ..state - } - }, - common::BlockType::COMPRESSED => { - DecoderDemuxState { - status: DecoderDemuxStatus::PASS_COMPRESSED, - byte_to_pass: header.size, - send_data: u21:0, - last_packet: data, - ..state - } - }, - _ => { - fail!("Should_never_happen", state) - } - } -} - -const ZERO_DECODER_DEMUX_STATE = zero!(); -const ZERO_DATA = zero!(); - -pub proc DecoderDemux { - input_r: chan in; - raw_s: chan out; - rle_s: chan out; - cmp_s: chan out; - - init {(ZERO_DECODER_DEMUX_STATE)} - - config ( - input_r: chan in, - raw_s: chan out, - rle_s: chan out, - cmp_s: chan out, - ) {( - input_r, - raw_s, - rle_s, - cmp_s - )} - - next (state: DecoderDemuxState) { - let tok = join(); - let (tok, data) = recv_if(tok, input_r, !state.last_packet.last, ZERO_DATA); - if (!state.last_packet.last) { - trace_fmt!("DecoderDemux: recv: {:#x}", data); - } else {}; - let (send_raw, send_rle, send_cmp, new_state) = match state.status { - DecoderDemuxStatus::IDLE => - (false, false, false, handle_idle_state(data, state)), - DecoderDemuxStatus::PASS_RAW => { - let new_state = DecoderDemuxState { - send_data: state.send_data + (state.last_packet.length >> 3) as u21, - last_packet: data, - ..state - }; - (true, false, false, new_state) - }, - DecoderDemuxStatus::PASS_RLE => { - let new_state = DecoderDemuxState { - send_data: state.send_data + state.byte_to_pass, - last_packet: data, - ..state - }; - (false, true, false, new_state) - }, - DecoderDemuxStatus::PASS_COMPRESSED => { - let new_state = DecoderDemuxState { - send_data: state.send_data +(state.last_packet.length >> 3) as u21, - last_packet: data, - ..state - }; - (false, false, true, new_state) - }, - _ => fail!("IDLE_STATE_IMPOSSIBLE", (false, false, false, state)) - }; - - let end_state = if (send_raw || send_rle || send_cmp) { - let max_packet_width = DATA_WIDTH; - let block_size_bits = u32:24 + (state.byte_to_pass as u32 << 3); - if (!send_rle) && ((block_size_bits <= max_packet_width) && - ((block_size_bits) != state.last_packet.length) && !state.last_packet.last) { - // Demuxer expect that blocks would be received in a separate packets, - // even if 2 block would fit entirely or even partially in a single packet. - // It is the job of top-level ZSTD decoder to split each block into at least one - // BlockDataPacket. - // For Raw and Compressed blocks it is illegal to have block of size smaller than - // max size of packet and have packet length greater than this size. - fail!("Should_never_happen", state) - } else { - state - }; - let data_to_send = BlockDataPacket {id: state.id, ..state.last_packet}; - let tok = send_if(tok, raw_s, send_raw, data_to_send); - if (send_raw) { - trace_fmt!("DecoderDemux: send_raw: {:#x}", data_to_send); - } else {}; - // RLE module expects single byte in data field - // and block length in length field. This is different from - // Raw and Compressed modules. - let rle_data = BlockDataPacket{ - data: state.last_packet.data[0:8] as bits[DATA_WIDTH], - length: state.byte_to_pass as u32, - id: state.id, - ..state.last_packet - }; - let tok = send_if(tok, rle_s, send_rle, rle_data); - if (send_rle) { - trace_fmt!("DecoderDemux: send_rle: {:#x}", rle_data); - } else {}; - let tok = send_if(tok, cmp_s, send_cmp, data_to_send); - if (send_cmp) { - trace_fmt!("DecoderDemux: send_cmp: {:#x}", data_to_send); - } else {}; - let end_state = if (new_state.send_data == new_state.byte_to_pass) { - let next_id = if (state.last_packet.last && state.last_packet.last_block) { - u32: 0 - } else { - state.id + u32:1 - }; - DecoderDemuxState { - status: DecoderDemuxStatus::IDLE, - byte_to_pass: u21:0, - send_data: u21:0, - id: next_id, - last_packet: ZERO_DATA, - } - } else { - new_state - }; - end_state - } else { - new_state - }; - - end_state - } -} - -#[test_proc] -proc DecoderDemuxTest { - terminator: chan out; - input_s: chan out; - raw_r: chan in; - rle_r: chan in; - cmp_r: chan in; - - init {} - - config (terminator: chan out) { - let (raw_s, raw_r) = chan("raw"); - let (rle_s, rle_r) = chan("rle"); - let (cmp_s, cmp_r) = chan("cmp"); - let (input_s, input_r) = chan("input"); - - spawn DecoderDemux(input_r, raw_s, rle_s, cmp_s); - (terminator, input_s, raw_r, rle_r, cmp_r) - } - - next(state: ()) { - let tok = join(); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x11111111110000c0, length: u32:64 }); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x2222222222111111, length: u32:64 }); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x3333333333222222, length: u32:64 }); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: bits[DATA_WIDTH]:0x0000000000333333, length: u32:24 }); - - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0xAAAAAAAAAA000100, length: u32:64 }); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0xBBBBBBBBBBAAAAAA, length: u32:64 }); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0xCCCCCCCCCCBBBBBB, length: u32:64 }); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x0000000000CCCCCC, length: u32:24 }); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: bits[DATA_WIDTH]:0xDDDDDDDDDDDDDDDD, length: u32:64 }); - - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: bits[DATA_WIDTH]:0x0000000FF000102, length: u32:32 }); - - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x4444444444000145, length: u32:64 }); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x5555555555444444, length: u32:64 }); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x6666666666555555, length: u32:64 }); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x7777777777666666, length: u32:64 }); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x8888888888777777, length: u32:64 }); - let tok = send(tok, input_s, BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: true, data: bits[DATA_WIDTH]:0x0000000000888888, length: u32:24 }); - - let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x0000001111111111, length: u32:40 }); - let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x2222222222111111, length: u32:64 }); - let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:0, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x3333333333222222, length: u32:64 }); - let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:0, last: bool: true, last_block: bool: false, data: bits[DATA_WIDTH]:0x0000000000333333, length: u32:24 }); - - let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:1, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x000000AAAAAAAAAA, length: u32:40 }); - let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:1, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0xBBBBBBBBBBAAAAAA, length: u32:64 }); - let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:1, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0xCCCCCCCCCCBBBBBB, length: u32:64 }); - let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:1, last: bool: false, last_block: bool: false, data: bits[DATA_WIDTH]:0x0000000000CCCCCC, length: u32:24 }); - let (tok, data) = recv(tok, raw_r); assert_eq(data, BlockDataPacket { id: u32:1, last: bool: true, last_block: bool: false, data: bits[DATA_WIDTH]:0xDDDDDDDDDDDDDDDD, length: u32:64 }); - - let (tok, data) = recv(tok, rle_r); assert_eq(data, BlockDataPacket { id: u32:2, last: bool: true, last_block: bool: false, data: bits[DATA_WIDTH]:0xFF, length: u32:32 }); - - let (tok, data) = recv(tok, cmp_r); assert_eq(data, BlockDataPacket { id: u32:3, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x0000004444444444, length: u32:40 }); - let (tok, data) = recv(tok, cmp_r); assert_eq(data, BlockDataPacket { id: u32:3, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x5555555555444444, length: u32:64 }); - let (tok, data) = recv(tok, cmp_r); assert_eq(data, BlockDataPacket { id: u32:3, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x6666666666555555, length: u32:64 }); - let (tok, data) = recv(tok, cmp_r); assert_eq(data, BlockDataPacket { id: u32:3, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x7777777777666666, length: u32:64 }); - let (tok, data) = recv(tok, cmp_r); assert_eq(data, BlockDataPacket { id: u32:3, last: bool: false, last_block: bool: true, data: bits[DATA_WIDTH]:0x8888888888777777, length: u32:64 }); - let (tok, data) = recv(tok, cmp_r); assert_eq(data, BlockDataPacket { id: u32:3, last: bool: true, last_block: bool: true, data: bits[DATA_WIDTH]:0x0000000000888888, length: u32:24 }); - - send(tok, terminator, true); - } -} diff --git a/xls/modules/zstd/external/BUILD b/xls/modules/zstd/external/BUILD new file mode 100644 index 0000000000..f24cb69fe0 --- /dev/null +++ b/xls/modules/zstd/external/BUILD @@ -0,0 +1,33 @@ +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +package( + default_applicable_licenses = ["//:license"], + default_visibility = ["//xls:xls_users"], + licenses = ["notice"], +) + +exports_files( + [ + "arbiter.v", + "axi_crossbar.v", + "axi_crossbar_addr.v", + "axi_crossbar_rd.v", + "axi_crossbar_wr.v", + "axi_crossbar_wrapper.v", + "axi_register_rd.v", + "axi_register_wr.v", + "priority_encoder.v", + ], +) diff --git a/xls/modules/zstd/external/arbiter.v b/xls/modules/zstd/external/arbiter.v new file mode 100644 index 0000000000..cfac70d1c6 --- /dev/null +++ b/xls/modules/zstd/external/arbiter.v @@ -0,0 +1,159 @@ +/* + +Copyright (c) 2014-2021 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * Arbiter module + */ +module arbiter # +( + parameter PORTS = 4, + // select round robin arbitration + parameter ARB_TYPE_ROUND_ROBIN = 0, + // blocking arbiter enable + parameter ARB_BLOCK = 0, + // block on acknowledge assert when nonzero, request deassert when 0 + parameter ARB_BLOCK_ACK = 1, + // LSB priority selection + parameter ARB_LSB_HIGH_PRIORITY = 0 +) +( + input wire clk, + input wire rst, + + input wire [PORTS-1:0] request, + input wire [PORTS-1:0] acknowledge, + + output wire [PORTS-1:0] grant, + output wire grant_valid, + output wire [$clog2(PORTS)-1:0] grant_encoded +); + +reg [PORTS-1:0] grant_reg = 0, grant_next; +reg grant_valid_reg = 0, grant_valid_next; +reg [$clog2(PORTS)-1:0] grant_encoded_reg = 0, grant_encoded_next; + +assign grant_valid = grant_valid_reg; +assign grant = grant_reg; +assign grant_encoded = grant_encoded_reg; + +wire request_valid; +wire [$clog2(PORTS)-1:0] request_index; +wire [PORTS-1:0] request_mask; + +priority_encoder #( + .WIDTH(PORTS), + .LSB_HIGH_PRIORITY(ARB_LSB_HIGH_PRIORITY) +) +priority_encoder_inst ( + .input_unencoded(request), + .output_valid(request_valid), + .output_encoded(request_index), + .output_unencoded(request_mask) +); + +reg [PORTS-1:0] mask_reg = 0, mask_next; + +wire masked_request_valid; +wire [$clog2(PORTS)-1:0] masked_request_index; +wire [PORTS-1:0] masked_request_mask; + +priority_encoder #( + .WIDTH(PORTS), + .LSB_HIGH_PRIORITY(ARB_LSB_HIGH_PRIORITY) +) +priority_encoder_masked ( + .input_unencoded(request & mask_reg), + .output_valid(masked_request_valid), + .output_encoded(masked_request_index), + .output_unencoded(masked_request_mask) +); + +always @* begin + grant_next = 0; + grant_valid_next = 0; + grant_encoded_next = 0; + mask_next = mask_reg; + + if (ARB_BLOCK && !ARB_BLOCK_ACK && grant_reg & request) begin + // granted request still asserted; hold it + grant_valid_next = grant_valid_reg; + grant_next = grant_reg; + grant_encoded_next = grant_encoded_reg; + end else if (ARB_BLOCK && ARB_BLOCK_ACK && grant_valid && !(grant_reg & acknowledge)) begin + // granted request not yet acknowledged; hold it + grant_valid_next = grant_valid_reg; + grant_next = grant_reg; + grant_encoded_next = grant_encoded_reg; + end else if (request_valid) begin + if (ARB_TYPE_ROUND_ROBIN) begin + if (masked_request_valid) begin + grant_valid_next = 1; + grant_next = masked_request_mask; + grant_encoded_next = masked_request_index; + if (ARB_LSB_HIGH_PRIORITY) begin + mask_next = {PORTS{1'b1}} << (masked_request_index + 1); + end else begin + mask_next = {PORTS{1'b1}} >> (PORTS - masked_request_index); + end + end else begin + grant_valid_next = 1; + grant_next = request_mask; + grant_encoded_next = request_index; + if (ARB_LSB_HIGH_PRIORITY) begin + mask_next = {PORTS{1'b1}} << (request_index + 1); + end else begin + mask_next = {PORTS{1'b1}} >> (PORTS - request_index); + end + end + end else begin + grant_valid_next = 1; + grant_next = request_mask; + grant_encoded_next = request_index; + end + end +end + +always @(posedge clk) begin + if (rst) begin + grant_reg <= 0; + grant_valid_reg <= 0; + grant_encoded_reg <= 0; + mask_reg <= 0; + end else begin + grant_reg <= grant_next; + grant_valid_reg <= grant_valid_next; + grant_encoded_reg <= grant_encoded_next; + mask_reg <= mask_next; + end +end + +endmodule + +`resetall diff --git a/xls/modules/zstd/external/axi_crossbar.v b/xls/modules/zstd/external/axi_crossbar.v new file mode 100644 index 0000000000..991d45403a --- /dev/null +++ b/xls/modules/zstd/external/axi_crossbar.v @@ -0,0 +1,391 @@ +/* + +Copyright (c) 2018 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 crossbar + */ +module axi_crossbar # +( + // Number of AXI inputs (slave interfaces) + parameter S_COUNT = 4, + // Number of AXI outputs (master interfaces) + parameter M_COUNT = 4, + // Width of data bus in bits + parameter DATA_WIDTH = 32, + // Width of address bus in bits + parameter ADDR_WIDTH = 32, + // Width of wstrb (width of data bus in words) + parameter STRB_WIDTH = (DATA_WIDTH/8), + // Input ID field width (from AXI masters) + parameter S_ID_WIDTH = 8, + // Output ID field width (towards AXI slaves) + // Additional bits required for response routing + parameter M_ID_WIDTH = S_ID_WIDTH+$clog2(S_COUNT), + // Propagate awuser signal + parameter AWUSER_ENABLE = 0, + // Width of awuser signal + parameter AWUSER_WIDTH = 1, + // Propagate wuser signal + parameter WUSER_ENABLE = 0, + // Width of wuser signal + parameter WUSER_WIDTH = 1, + // Propagate buser signal + parameter BUSER_ENABLE = 0, + // Width of buser signal + parameter BUSER_WIDTH = 1, + // Propagate aruser signal + parameter ARUSER_ENABLE = 0, + // Width of aruser signal + parameter ARUSER_WIDTH = 1, + // Propagate ruser signal + parameter RUSER_ENABLE = 0, + // Width of ruser signal + parameter RUSER_WIDTH = 1, + // Number of concurrent unique IDs for each slave interface + // S_COUNT concatenated fields of 32 bits + parameter S_THREADS = {S_COUNT{32'd2}}, + // Number of concurrent operations for each slave interface + // S_COUNT concatenated fields of 32 bits + parameter S_ACCEPT = {S_COUNT{32'd16}}, + // Number of regions per master interface + parameter M_REGIONS = 1, + // Master interface base addresses + // M_COUNT concatenated fields of M_REGIONS concatenated fields of ADDR_WIDTH bits + // set to zero for default addressing based on M_ADDR_WIDTH + parameter M_BASE_ADDR = 0, + // Master interface address widths + // M_COUNT concatenated fields of M_REGIONS concatenated fields of 32 bits + parameter M_ADDR_WIDTH = {M_COUNT{{M_REGIONS{32'd24}}}}, + // Read connections between interfaces + // M_COUNT concatenated fields of S_COUNT bits + parameter M_CONNECT_READ = {M_COUNT{{S_COUNT{1'b1}}}}, + // Write connections between interfaces + // M_COUNT concatenated fields of S_COUNT bits + parameter M_CONNECT_WRITE = {M_COUNT{{S_COUNT{1'b1}}}}, + // Number of concurrent operations for each master interface + // M_COUNT concatenated fields of 32 bits + parameter M_ISSUE = {M_COUNT{32'd4}}, + // Secure master (fail operations based on awprot/arprot) + // M_COUNT bits + parameter M_SECURE = {M_COUNT{1'b0}}, + // Slave interface AW channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_AW_REG_TYPE = {S_COUNT{2'd0}}, + // Slave interface W channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_W_REG_TYPE = {S_COUNT{2'd0}}, + // Slave interface B channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_B_REG_TYPE = {S_COUNT{2'd1}}, + // Slave interface AR channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_AR_REG_TYPE = {S_COUNT{2'd0}}, + // Slave interface R channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_R_REG_TYPE = {S_COUNT{2'd2}}, + // Master interface AW channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_AW_REG_TYPE = {M_COUNT{2'd1}}, + // Master interface W channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_W_REG_TYPE = {M_COUNT{2'd2}}, + // Master interface B channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_B_REG_TYPE = {M_COUNT{2'd0}}, + // Master interface AR channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_AR_REG_TYPE = {M_COUNT{2'd1}}, + // Master interface R channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_R_REG_TYPE = {M_COUNT{2'd0}} +) +( + input wire clk, + input wire rst, + + /* + * AXI slave interfaces + */ + input wire [S_COUNT*S_ID_WIDTH-1:0] s_axi_awid, + input wire [S_COUNT*ADDR_WIDTH-1:0] s_axi_awaddr, + input wire [S_COUNT*8-1:0] s_axi_awlen, + input wire [S_COUNT*3-1:0] s_axi_awsize, + input wire [S_COUNT*2-1:0] s_axi_awburst, + input wire [S_COUNT-1:0] s_axi_awlock, + input wire [S_COUNT*4-1:0] s_axi_awcache, + input wire [S_COUNT*3-1:0] s_axi_awprot, + input wire [S_COUNT*4-1:0] s_axi_awqos, + input wire [S_COUNT*AWUSER_WIDTH-1:0] s_axi_awuser, + input wire [S_COUNT-1:0] s_axi_awvalid, + output wire [S_COUNT-1:0] s_axi_awready, + input wire [S_COUNT*DATA_WIDTH-1:0] s_axi_wdata, + input wire [S_COUNT*STRB_WIDTH-1:0] s_axi_wstrb, + input wire [S_COUNT-1:0] s_axi_wlast, + input wire [S_COUNT*WUSER_WIDTH-1:0] s_axi_wuser, + input wire [S_COUNT-1:0] s_axi_wvalid, + output wire [S_COUNT-1:0] s_axi_wready, + output wire [S_COUNT*S_ID_WIDTH-1:0] s_axi_bid, + output wire [S_COUNT*2-1:0] s_axi_bresp, + output wire [S_COUNT*BUSER_WIDTH-1:0] s_axi_buser, + output wire [S_COUNT-1:0] s_axi_bvalid, + input wire [S_COUNT-1:0] s_axi_bready, + input wire [S_COUNT*S_ID_WIDTH-1:0] s_axi_arid, + input wire [S_COUNT*ADDR_WIDTH-1:0] s_axi_araddr, + input wire [S_COUNT*8-1:0] s_axi_arlen, + input wire [S_COUNT*3-1:0] s_axi_arsize, + input wire [S_COUNT*2-1:0] s_axi_arburst, + input wire [S_COUNT-1:0] s_axi_arlock, + input wire [S_COUNT*4-1:0] s_axi_arcache, + input wire [S_COUNT*3-1:0] s_axi_arprot, + input wire [S_COUNT*4-1:0] s_axi_arqos, + input wire [S_COUNT*ARUSER_WIDTH-1:0] s_axi_aruser, + input wire [S_COUNT-1:0] s_axi_arvalid, + output wire [S_COUNT-1:0] s_axi_arready, + output wire [S_COUNT*S_ID_WIDTH-1:0] s_axi_rid, + output wire [S_COUNT*DATA_WIDTH-1:0] s_axi_rdata, + output wire [S_COUNT*2-1:0] s_axi_rresp, + output wire [S_COUNT-1:0] s_axi_rlast, + output wire [S_COUNT*RUSER_WIDTH-1:0] s_axi_ruser, + output wire [S_COUNT-1:0] s_axi_rvalid, + input wire [S_COUNT-1:0] s_axi_rready, + + /* + * AXI master interfaces + */ + output wire [M_COUNT*M_ID_WIDTH-1:0] m_axi_awid, + output wire [M_COUNT*ADDR_WIDTH-1:0] m_axi_awaddr, + output wire [M_COUNT*8-1:0] m_axi_awlen, + output wire [M_COUNT*3-1:0] m_axi_awsize, + output wire [M_COUNT*2-1:0] m_axi_awburst, + output wire [M_COUNT-1:0] m_axi_awlock, + output wire [M_COUNT*4-1:0] m_axi_awcache, + output wire [M_COUNT*3-1:0] m_axi_awprot, + output wire [M_COUNT*4-1:0] m_axi_awqos, + output wire [M_COUNT*4-1:0] m_axi_awregion, + output wire [M_COUNT*AWUSER_WIDTH-1:0] m_axi_awuser, + output wire [M_COUNT-1:0] m_axi_awvalid, + input wire [M_COUNT-1:0] m_axi_awready, + output wire [M_COUNT*DATA_WIDTH-1:0] m_axi_wdata, + output wire [M_COUNT*STRB_WIDTH-1:0] m_axi_wstrb, + output wire [M_COUNT-1:0] m_axi_wlast, + output wire [M_COUNT*WUSER_WIDTH-1:0] m_axi_wuser, + output wire [M_COUNT-1:0] m_axi_wvalid, + input wire [M_COUNT-1:0] m_axi_wready, + input wire [M_COUNT*M_ID_WIDTH-1:0] m_axi_bid, + input wire [M_COUNT*2-1:0] m_axi_bresp, + input wire [M_COUNT*BUSER_WIDTH-1:0] m_axi_buser, + input wire [M_COUNT-1:0] m_axi_bvalid, + output wire [M_COUNT-1:0] m_axi_bready, + output wire [M_COUNT*M_ID_WIDTH-1:0] m_axi_arid, + output wire [M_COUNT*ADDR_WIDTH-1:0] m_axi_araddr, + output wire [M_COUNT*8-1:0] m_axi_arlen, + output wire [M_COUNT*3-1:0] m_axi_arsize, + output wire [M_COUNT*2-1:0] m_axi_arburst, + output wire [M_COUNT-1:0] m_axi_arlock, + output wire [M_COUNT*4-1:0] m_axi_arcache, + output wire [M_COUNT*3-1:0] m_axi_arprot, + output wire [M_COUNT*4-1:0] m_axi_arqos, + output wire [M_COUNT*4-1:0] m_axi_arregion, + output wire [M_COUNT*ARUSER_WIDTH-1:0] m_axi_aruser, + output wire [M_COUNT-1:0] m_axi_arvalid, + input wire [M_COUNT-1:0] m_axi_arready, + input wire [M_COUNT*M_ID_WIDTH-1:0] m_axi_rid, + input wire [M_COUNT*DATA_WIDTH-1:0] m_axi_rdata, + input wire [M_COUNT*2-1:0] m_axi_rresp, + input wire [M_COUNT-1:0] m_axi_rlast, + input wire [M_COUNT*RUSER_WIDTH-1:0] m_axi_ruser, + input wire [M_COUNT-1:0] m_axi_rvalid, + output wire [M_COUNT-1:0] m_axi_rready +); + +axi_crossbar_wr #( + .S_COUNT(S_COUNT), + .M_COUNT(M_COUNT), + .DATA_WIDTH(DATA_WIDTH), + .ADDR_WIDTH(ADDR_WIDTH), + .STRB_WIDTH(STRB_WIDTH), + .S_ID_WIDTH(S_ID_WIDTH), + .M_ID_WIDTH(M_ID_WIDTH), + .AWUSER_ENABLE(AWUSER_ENABLE), + .AWUSER_WIDTH(AWUSER_WIDTH), + .WUSER_ENABLE(WUSER_ENABLE), + .WUSER_WIDTH(WUSER_WIDTH), + .BUSER_ENABLE(BUSER_ENABLE), + .BUSER_WIDTH(BUSER_WIDTH), + .S_THREADS(S_THREADS), + .S_ACCEPT(S_ACCEPT), + .M_REGIONS(M_REGIONS), + .M_BASE_ADDR(M_BASE_ADDR), + .M_ADDR_WIDTH(M_ADDR_WIDTH), + .M_CONNECT(M_CONNECT_WRITE), + .M_ISSUE(M_ISSUE), + .M_SECURE(M_SECURE), + .S_AW_REG_TYPE(S_AW_REG_TYPE), + .S_W_REG_TYPE (S_W_REG_TYPE), + .S_B_REG_TYPE (S_B_REG_TYPE) +) +axi_crossbar_wr_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI slave interfaces + */ + .s_axi_awid(s_axi_awid), + .s_axi_awaddr(s_axi_awaddr), + .s_axi_awlen(s_axi_awlen), + .s_axi_awsize(s_axi_awsize), + .s_axi_awburst(s_axi_awburst), + .s_axi_awlock(s_axi_awlock), + .s_axi_awcache(s_axi_awcache), + .s_axi_awprot(s_axi_awprot), + .s_axi_awqos(s_axi_awqos), + .s_axi_awuser(s_axi_awuser), + .s_axi_awvalid(s_axi_awvalid), + .s_axi_awready(s_axi_awready), + .s_axi_wdata(s_axi_wdata), + .s_axi_wstrb(s_axi_wstrb), + .s_axi_wlast(s_axi_wlast), + .s_axi_wuser(s_axi_wuser), + .s_axi_wvalid(s_axi_wvalid), + .s_axi_wready(s_axi_wready), + .s_axi_bid(s_axi_bid), + .s_axi_bresp(s_axi_bresp), + .s_axi_buser(s_axi_buser), + .s_axi_bvalid(s_axi_bvalid), + .s_axi_bready(s_axi_bready), + + /* + * AXI master interfaces + */ + .m_axi_awid(m_axi_awid), + .m_axi_awaddr(m_axi_awaddr), + .m_axi_awlen(m_axi_awlen), + .m_axi_awsize(m_axi_awsize), + .m_axi_awburst(m_axi_awburst), + .m_axi_awlock(m_axi_awlock), + .m_axi_awcache(m_axi_awcache), + .m_axi_awprot(m_axi_awprot), + .m_axi_awqos(m_axi_awqos), + .m_axi_awregion(m_axi_awregion), + .m_axi_awuser(m_axi_awuser), + .m_axi_awvalid(m_axi_awvalid), + .m_axi_awready(m_axi_awready), + .m_axi_wdata(m_axi_wdata), + .m_axi_wstrb(m_axi_wstrb), + .m_axi_wlast(m_axi_wlast), + .m_axi_wuser(m_axi_wuser), + .m_axi_wvalid(m_axi_wvalid), + .m_axi_wready(m_axi_wready), + .m_axi_bid(m_axi_bid), + .m_axi_bresp(m_axi_bresp), + .m_axi_buser(m_axi_buser), + .m_axi_bvalid(m_axi_bvalid), + .m_axi_bready(m_axi_bready) +); + +axi_crossbar_rd #( + .S_COUNT(S_COUNT), + .M_COUNT(M_COUNT), + .DATA_WIDTH(DATA_WIDTH), + .ADDR_WIDTH(ADDR_WIDTH), + .STRB_WIDTH(STRB_WIDTH), + .S_ID_WIDTH(S_ID_WIDTH), + .M_ID_WIDTH(M_ID_WIDTH), + .ARUSER_ENABLE(ARUSER_ENABLE), + .ARUSER_WIDTH(ARUSER_WIDTH), + .RUSER_ENABLE(RUSER_ENABLE), + .RUSER_WIDTH(RUSER_WIDTH), + .S_THREADS(S_THREADS), + .S_ACCEPT(S_ACCEPT), + .M_REGIONS(M_REGIONS), + .M_BASE_ADDR(M_BASE_ADDR), + .M_ADDR_WIDTH(M_ADDR_WIDTH), + .M_CONNECT(M_CONNECT_READ), + .M_ISSUE(M_ISSUE), + .M_SECURE(M_SECURE), + .S_AR_REG_TYPE(S_AR_REG_TYPE), + .S_R_REG_TYPE (S_R_REG_TYPE) +) +axi_crossbar_rd_inst ( + .clk(clk), + .rst(rst), + + /* + * AXI slave interfaces + */ + .s_axi_arid(s_axi_arid), + .s_axi_araddr(s_axi_araddr), + .s_axi_arlen(s_axi_arlen), + .s_axi_arsize(s_axi_arsize), + .s_axi_arburst(s_axi_arburst), + .s_axi_arlock(s_axi_arlock), + .s_axi_arcache(s_axi_arcache), + .s_axi_arprot(s_axi_arprot), + .s_axi_arqos(s_axi_arqos), + .s_axi_aruser(s_axi_aruser), + .s_axi_arvalid(s_axi_arvalid), + .s_axi_arready(s_axi_arready), + .s_axi_rid(s_axi_rid), + .s_axi_rdata(s_axi_rdata), + .s_axi_rresp(s_axi_rresp), + .s_axi_rlast(s_axi_rlast), + .s_axi_ruser(s_axi_ruser), + .s_axi_rvalid(s_axi_rvalid), + .s_axi_rready(s_axi_rready), + + /* + * AXI master interfaces + */ + .m_axi_arid(m_axi_arid), + .m_axi_araddr(m_axi_araddr), + .m_axi_arlen(m_axi_arlen), + .m_axi_arsize(m_axi_arsize), + .m_axi_arburst(m_axi_arburst), + .m_axi_arlock(m_axi_arlock), + .m_axi_arcache(m_axi_arcache), + .m_axi_arprot(m_axi_arprot), + .m_axi_arqos(m_axi_arqos), + .m_axi_arregion(m_axi_arregion), + .m_axi_aruser(m_axi_aruser), + .m_axi_arvalid(m_axi_arvalid), + .m_axi_arready(m_axi_arready), + .m_axi_rid(m_axi_rid), + .m_axi_rdata(m_axi_rdata), + .m_axi_rresp(m_axi_rresp), + .m_axi_rlast(m_axi_rlast), + .m_axi_ruser(m_axi_ruser), + .m_axi_rvalid(m_axi_rvalid), + .m_axi_rready(m_axi_rready) +); + +endmodule + +`resetall diff --git a/xls/modules/zstd/external/axi_crossbar_addr.v b/xls/modules/zstd/external/axi_crossbar_addr.v new file mode 100644 index 0000000000..7b7846526b --- /dev/null +++ b/xls/modules/zstd/external/axi_crossbar_addr.v @@ -0,0 +1,418 @@ +/* + +Copyright (c) 2018 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 crossbar address decode and admission control + */ +module axi_crossbar_addr # +( + // Slave interface index + parameter S = 0, + // Number of AXI inputs (slave interfaces) + parameter S_COUNT = 4, + // Number of AXI outputs (master interfaces) + parameter M_COUNT = 4, + // Width of address bus in bits + parameter ADDR_WIDTH = 32, + // ID field width + parameter ID_WIDTH = 8, + // Number of concurrent unique IDs + parameter S_THREADS = 32'd2, + // Number of concurrent operations + parameter S_ACCEPT = 32'd16, + // Number of regions per master interface + parameter M_REGIONS = 1, + // Master interface base addresses + // M_COUNT concatenated fields of M_REGIONS concatenated fields of ADDR_WIDTH bits + // set to zero for default addressing based on M_ADDR_WIDTH + parameter M_BASE_ADDR = 0, + // Master interface address widths + // M_COUNT concatenated fields of M_REGIONS concatenated fields of 32 bits + parameter M_ADDR_WIDTH = {M_COUNT{{M_REGIONS{32'd24}}}}, + // Connections between interfaces + // M_COUNT concatenated fields of S_COUNT bits + parameter M_CONNECT = {M_COUNT{{S_COUNT{1'b1}}}}, + // Secure master (fail operations based on awprot/arprot) + // M_COUNT bits + parameter M_SECURE = {M_COUNT{1'b0}}, + // Enable write command output + parameter WC_OUTPUT = 0 +) +( + input wire clk, + input wire rst, + + /* + * Address input + */ + input wire [ID_WIDTH-1:0] s_axi_aid, + input wire [ADDR_WIDTH-1:0] s_axi_aaddr, + input wire [2:0] s_axi_aprot, + input wire [3:0] s_axi_aqos, + input wire s_axi_avalid, + output wire s_axi_aready, + + /* + * Address output + */ + output wire [3:0] m_axi_aregion, + output wire [$clog2(M_COUNT)-1:0] m_select, + output wire m_axi_avalid, + input wire m_axi_aready, + + /* + * Write command output + */ + output wire [$clog2(M_COUNT)-1:0] m_wc_select, + output wire m_wc_decerr, + output wire m_wc_valid, + input wire m_wc_ready, + + /* + * Reply command output + */ + output wire m_rc_decerr, + output wire m_rc_valid, + input wire m_rc_ready, + + /* + * Completion input + */ + input wire [ID_WIDTH-1:0] s_cpl_id, + input wire s_cpl_valid +); + +parameter CL_S_COUNT = $clog2(S_COUNT); +parameter CL_M_COUNT = $clog2(M_COUNT); + +parameter S_INT_THREADS = S_THREADS > S_ACCEPT ? S_ACCEPT : S_THREADS; +parameter CL_S_INT_THREADS = $clog2(S_INT_THREADS); +parameter CL_S_ACCEPT = $clog2(S_ACCEPT); + +// default address computation +function [M_COUNT*M_REGIONS*ADDR_WIDTH-1:0] calcBaseAddrs(input [31:0] dummy); + integer i; + reg [ADDR_WIDTH-1:0] base; + reg [ADDR_WIDTH-1:0] width; + reg [ADDR_WIDTH-1:0] size; + reg [ADDR_WIDTH-1:0] mask; + begin + calcBaseAddrs = {M_COUNT*M_REGIONS*ADDR_WIDTH{1'b0}}; + base = 0; + for (i = 0; i < M_COUNT*M_REGIONS; i = i + 1) begin + width = M_ADDR_WIDTH[i*32 +: 32]; + mask = {ADDR_WIDTH{1'b1}} >> (ADDR_WIDTH - width); + size = mask + 1; + if (width > 0) begin + if ((base & mask) != 0) begin + base = base + size - (base & mask); // align + end + calcBaseAddrs[i * ADDR_WIDTH +: ADDR_WIDTH] = base; + base = base + size; // increment + end + end + end +endfunction + +parameter M_BASE_ADDR_INT = M_BASE_ADDR ? M_BASE_ADDR : calcBaseAddrs(0); + +integer i, j; + +// check configuration +initial begin + if (S_ACCEPT < 1) begin + $error("Error: need at least 1 accept (instance %m)"); + $finish; + end + + if (S_THREADS < 1) begin + $error("Error: need at least 1 thread (instance %m)"); + $finish; + end + + if (S_THREADS > S_ACCEPT) begin + $warning("Warning: requested thread count larger than accept count; limiting thread count to accept count (instance %m)"); + end + + if (M_REGIONS < 1) begin + $error("Error: need at least 1 region (instance %m)"); + $finish; + end + + for (i = 0; i < M_COUNT*M_REGIONS; i = i + 1) begin + if (M_ADDR_WIDTH[i*32 +: 32] && (M_ADDR_WIDTH[i*32 +: 32] < 12 || M_ADDR_WIDTH[i*32 +: 32] > ADDR_WIDTH)) begin + $error("Error: address width out of range (instance %m)"); + $finish; + end + end + + $display("Addressing configuration for axi_crossbar_addr instance %m"); + for (i = 0; i < M_COUNT*M_REGIONS; i = i + 1) begin + if (M_ADDR_WIDTH[i*32 +: 32]) begin + $display("%2d (%2d): %x / %02d -- %x-%x", + i/M_REGIONS, i%M_REGIONS, + M_BASE_ADDR_INT[i*ADDR_WIDTH +: ADDR_WIDTH], + M_ADDR_WIDTH[i*32 +: 32], + M_BASE_ADDR_INT[i*ADDR_WIDTH +: ADDR_WIDTH] & ({ADDR_WIDTH{1'b1}} << M_ADDR_WIDTH[i*32 +: 32]), + M_BASE_ADDR_INT[i*ADDR_WIDTH +: ADDR_WIDTH] | ({ADDR_WIDTH{1'b1}} >> (ADDR_WIDTH - M_ADDR_WIDTH[i*32 +: 32])) + ); + end + end + + for (i = 0; i < M_COUNT*M_REGIONS; i = i + 1) begin + if ((M_BASE_ADDR_INT[i*ADDR_WIDTH +: ADDR_WIDTH] & (2**M_ADDR_WIDTH[i*32 +: 32]-1)) != 0) begin + $display("Region not aligned:"); + $display("%2d (%2d): %x / %2d -- %x-%x", + i/M_REGIONS, i%M_REGIONS, + M_BASE_ADDR_INT[i*ADDR_WIDTH +: ADDR_WIDTH], + M_ADDR_WIDTH[i*32 +: 32], + M_BASE_ADDR_INT[i*ADDR_WIDTH +: ADDR_WIDTH] & ({ADDR_WIDTH{1'b1}} << M_ADDR_WIDTH[i*32 +: 32]), + M_BASE_ADDR_INT[i*ADDR_WIDTH +: ADDR_WIDTH] | ({ADDR_WIDTH{1'b1}} >> (ADDR_WIDTH - M_ADDR_WIDTH[i*32 +: 32])) + ); + $error("Error: address range not aligned (instance %m)"); + $finish; + end + end + + for (i = 0; i < M_COUNT*M_REGIONS; i = i + 1) begin + for (j = i+1; j < M_COUNT*M_REGIONS; j = j + 1) begin + if (M_ADDR_WIDTH[i*32 +: 32] && M_ADDR_WIDTH[j*32 +: 32]) begin + if (((M_BASE_ADDR_INT[i*ADDR_WIDTH +: ADDR_WIDTH] & ({ADDR_WIDTH{1'b1}} << M_ADDR_WIDTH[i*32 +: 32])) <= (M_BASE_ADDR_INT[j*ADDR_WIDTH +: ADDR_WIDTH] | ({ADDR_WIDTH{1'b1}} >> (ADDR_WIDTH - M_ADDR_WIDTH[j*32 +: 32])))) + && ((M_BASE_ADDR_INT[j*ADDR_WIDTH +: ADDR_WIDTH] & ({ADDR_WIDTH{1'b1}} << M_ADDR_WIDTH[j*32 +: 32])) <= (M_BASE_ADDR_INT[i*ADDR_WIDTH +: ADDR_WIDTH] | ({ADDR_WIDTH{1'b1}} >> (ADDR_WIDTH - M_ADDR_WIDTH[i*32 +: 32]))))) begin + $display("Overlapping regions:"); + $display("%2d (%2d): %x / %2d -- %x-%x", + i/M_REGIONS, i%M_REGIONS, + M_BASE_ADDR_INT[i*ADDR_WIDTH +: ADDR_WIDTH], + M_ADDR_WIDTH[i*32 +: 32], + M_BASE_ADDR_INT[i*ADDR_WIDTH +: ADDR_WIDTH] & ({ADDR_WIDTH{1'b1}} << M_ADDR_WIDTH[i*32 +: 32]), + M_BASE_ADDR_INT[i*ADDR_WIDTH +: ADDR_WIDTH] | ({ADDR_WIDTH{1'b1}} >> (ADDR_WIDTH - M_ADDR_WIDTH[i*32 +: 32])) + ); + $display("%2d (%2d): %x / %2d -- %x-%x", + j/M_REGIONS, j%M_REGIONS, + M_BASE_ADDR_INT[j*ADDR_WIDTH +: ADDR_WIDTH], + M_ADDR_WIDTH[j*32 +: 32], + M_BASE_ADDR_INT[j*ADDR_WIDTH +: ADDR_WIDTH] & ({ADDR_WIDTH{1'b1}} << M_ADDR_WIDTH[j*32 +: 32]), + M_BASE_ADDR_INT[j*ADDR_WIDTH +: ADDR_WIDTH] | ({ADDR_WIDTH{1'b1}} >> (ADDR_WIDTH - M_ADDR_WIDTH[j*32 +: 32])) + ); + $error("Error: address ranges overlap (instance %m)"); + $finish; + end + end + end + end +end + +localparam [2:0] + STATE_IDLE = 3'd0, + STATE_DECODE = 3'd1; + +reg [2:0] state_reg = STATE_IDLE, state_next; + +reg s_axi_aready_reg = 0, s_axi_aready_next; + +reg [3:0] m_axi_aregion_reg = 4'd0, m_axi_aregion_next; +reg [CL_M_COUNT-1:0] m_select_reg = 0, m_select_next; +reg m_axi_avalid_reg = 1'b0, m_axi_avalid_next; +reg m_decerr_reg = 1'b0, m_decerr_next; +reg m_wc_valid_reg = 1'b0, m_wc_valid_next; +reg m_rc_valid_reg = 1'b0, m_rc_valid_next; + +assign s_axi_aready = s_axi_aready_reg; + +assign m_axi_aregion = m_axi_aregion_reg; +assign m_select = m_select_reg; +assign m_axi_avalid = m_axi_avalid_reg; + +assign m_wc_select = m_select_reg; +assign m_wc_decerr = m_decerr_reg; +assign m_wc_valid = m_wc_valid_reg; + +assign m_rc_decerr = m_decerr_reg; +assign m_rc_valid = m_rc_valid_reg; + +reg match; +reg trans_start; +reg trans_complete; + +reg [$clog2(S_ACCEPT+1)-1:0] trans_count_reg = 0; +wire trans_limit = trans_count_reg >= S_ACCEPT && !trans_complete; + +// transfer ID thread tracking +reg [ID_WIDTH-1:0] thread_id_reg[S_INT_THREADS-1:0]; +reg [CL_M_COUNT-1:0] thread_m_reg[S_INT_THREADS-1:0]; +reg [3:0] thread_region_reg[S_INT_THREADS-1:0]; +reg [$clog2(S_ACCEPT+1)-1:0] thread_count_reg[S_INT_THREADS-1:0]; + +wire [S_INT_THREADS-1:0] thread_active; +wire [S_INT_THREADS-1:0] thread_match; +wire [S_INT_THREADS-1:0] thread_match_dest; +wire [S_INT_THREADS-1:0] thread_cpl_match; +wire [S_INT_THREADS-1:0] thread_trans_start; +wire [S_INT_THREADS-1:0] thread_trans_complete; + +generate + genvar n; + + for (n = 0; n < S_INT_THREADS; n = n + 1) begin + initial begin + thread_count_reg[n] <= 0; + end + + assign thread_active[n] = thread_count_reg[n] != 0; + assign thread_match[n] = thread_active[n] && thread_id_reg[n] == s_axi_aid; + assign thread_match_dest[n] = thread_match[n] && thread_m_reg[n] == m_select_next && (M_REGIONS < 2 || thread_region_reg[n] == m_axi_aregion_next); + assign thread_cpl_match[n] = thread_active[n] && thread_id_reg[n] == s_cpl_id; + assign thread_trans_start[n] = (thread_match[n] || (!thread_active[n] && !thread_match && !(thread_trans_start & ({S_INT_THREADS{1'b1}} >> (S_INT_THREADS-n))))) && trans_start; + assign thread_trans_complete[n] = thread_cpl_match[n] && trans_complete; + + always @(posedge clk) begin + if (rst) begin + thread_count_reg[n] <= 0; + end else begin + if (thread_trans_start[n] && !thread_trans_complete[n]) begin + thread_count_reg[n] <= thread_count_reg[n] + 1; + end else if (!thread_trans_start[n] && thread_trans_complete[n]) begin + thread_count_reg[n] <= thread_count_reg[n] - 1; + end + end + + if (thread_trans_start[n]) begin + thread_id_reg[n] <= s_axi_aid; + thread_m_reg[n] <= m_select_next; + thread_region_reg[n] <= m_axi_aregion_next; + end + end + end +endgenerate + +always @* begin + state_next = STATE_IDLE; + + match = 1'b0; + trans_start = 1'b0; + trans_complete = 1'b0; + + s_axi_aready_next = 1'b0; + + m_axi_aregion_next = m_axi_aregion_reg; + m_select_next = m_select_reg; + m_axi_avalid_next = m_axi_avalid_reg && !m_axi_aready; + m_decerr_next = m_decerr_reg; + m_wc_valid_next = m_wc_valid_reg && !m_wc_ready; + m_rc_valid_next = m_rc_valid_reg && !m_rc_ready; + + case (state_reg) + STATE_IDLE: begin + // idle state, store values + s_axi_aready_next = 1'b0; + + if (s_axi_avalid && !s_axi_aready) begin + match = 1'b0; + for (i = 0; i < M_COUNT; i = i + 1) begin + for (j = 0; j < M_REGIONS; j = j + 1) begin + if (M_ADDR_WIDTH[(i*M_REGIONS+j)*32 +: 32] && (!M_SECURE[i] || !s_axi_aprot[1]) && (M_CONNECT & (1 << (S+i*S_COUNT))) && (s_axi_aaddr >> M_ADDR_WIDTH[(i*M_REGIONS+j)*32 +: 32]) == (M_BASE_ADDR_INT[(i*M_REGIONS+j)*ADDR_WIDTH +: ADDR_WIDTH] >> M_ADDR_WIDTH[(i*M_REGIONS+j)*32 +: 32])) begin + m_select_next = i; + m_axi_aregion_next = j; + match = 1'b1; + end + end + end + + if (match) begin + // address decode successful + if (!trans_limit && (thread_match_dest || (!(&thread_active) && !thread_match))) begin + // transaction limit not reached + m_axi_avalid_next = 1'b1; + m_decerr_next = 1'b0; + m_wc_valid_next = WC_OUTPUT; + m_rc_valid_next = 1'b0; + trans_start = 1'b1; + state_next = STATE_DECODE; + end else begin + // transaction limit reached; block in idle + state_next = STATE_IDLE; + end + end else begin + // decode error + m_axi_avalid_next = 1'b0; + m_decerr_next = 1'b1; + m_wc_valid_next = WC_OUTPUT; + m_rc_valid_next = 1'b1; + state_next = STATE_DECODE; + end + end else begin + state_next = STATE_IDLE; + end + end + STATE_DECODE: begin + if (!m_axi_avalid_next && (!m_wc_valid_next || !WC_OUTPUT) && !m_rc_valid_next) begin + s_axi_aready_next = 1'b1; + state_next = STATE_IDLE; + end else begin + state_next = STATE_DECODE; + end + end + endcase + + // manage completions + trans_complete = s_cpl_valid; +end + +always @(posedge clk) begin + if (rst) begin + state_reg <= STATE_IDLE; + s_axi_aready_reg <= 1'b0; + m_axi_avalid_reg <= 1'b0; + m_wc_valid_reg <= 1'b0; + m_rc_valid_reg <= 1'b0; + + trans_count_reg <= 0; + end else begin + state_reg <= state_next; + s_axi_aready_reg <= s_axi_aready_next; + m_axi_avalid_reg <= m_axi_avalid_next; + m_wc_valid_reg <= m_wc_valid_next; + m_rc_valid_reg <= m_rc_valid_next; + + if (trans_start && !trans_complete) begin + trans_count_reg <= trans_count_reg + 1; + end else if (!trans_start && trans_complete) begin + trans_count_reg <= trans_count_reg - 1; + end + end + + m_axi_aregion_reg <= m_axi_aregion_next; + m_select_reg <= m_select_next; + m_decerr_reg <= m_decerr_next; +end + +endmodule + +`resetall diff --git a/xls/modules/zstd/external/axi_crossbar_rd.v b/xls/modules/zstd/external/axi_crossbar_rd.v new file mode 100644 index 0000000000..2b1410ac62 --- /dev/null +++ b/xls/modules/zstd/external/axi_crossbar_rd.v @@ -0,0 +1,569 @@ +/* + +Copyright (c) 2018 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 crossbar (read) + */ +module axi_crossbar_rd # +( + // Number of AXI inputs (slave interfaces) + parameter S_COUNT = 4, + // Number of AXI outputs (master interfaces) + parameter M_COUNT = 4, + // Width of data bus in bits + parameter DATA_WIDTH = 32, + // Width of address bus in bits + parameter ADDR_WIDTH = 32, + // Width of wstrb (width of data bus in words) + parameter STRB_WIDTH = (DATA_WIDTH/8), + // Input ID field width (from AXI masters) + parameter S_ID_WIDTH = 8, + // Output ID field width (towards AXI slaves) + // Additional bits required for response routing + parameter M_ID_WIDTH = S_ID_WIDTH+$clog2(S_COUNT), + // Propagate aruser signal + parameter ARUSER_ENABLE = 0, + // Width of aruser signal + parameter ARUSER_WIDTH = 1, + // Propagate ruser signal + parameter RUSER_ENABLE = 0, + // Width of ruser signal + parameter RUSER_WIDTH = 1, + // Number of concurrent unique IDs for each slave interface + // S_COUNT concatenated fields of 32 bits + parameter S_THREADS = {S_COUNT{32'd2}}, + // Number of concurrent operations for each slave interface + // S_COUNT concatenated fields of 32 bits + parameter S_ACCEPT = {S_COUNT{32'd16}}, + // Number of regions per master interface + parameter M_REGIONS = 1, + // Master interface base addresses + // M_COUNT concatenated fields of M_REGIONS concatenated fields of ADDR_WIDTH bits + // set to zero for default addressing based on M_ADDR_WIDTH + parameter M_BASE_ADDR = 0, + // Master interface address widths + // M_COUNT concatenated fields of M_REGIONS concatenated fields of 32 bits + parameter M_ADDR_WIDTH = {M_COUNT{{M_REGIONS{32'd24}}}}, + // Read connections between interfaces + // M_COUNT concatenated fields of S_COUNT bits + parameter M_CONNECT = {M_COUNT{{S_COUNT{1'b1}}}}, + // Number of concurrent operations for each master interface + // M_COUNT concatenated fields of 32 bits + parameter M_ISSUE = {M_COUNT{32'd4}}, + // Secure master (fail operations based on awprot/arprot) + // M_COUNT bits + parameter M_SECURE = {M_COUNT{1'b0}}, + // Slave interface AR channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_AR_REG_TYPE = {S_COUNT{2'd0}}, + // Slave interface R channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_R_REG_TYPE = {S_COUNT{2'd2}}, + // Master interface AR channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_AR_REG_TYPE = {M_COUNT{2'd1}}, + // Master interface R channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_R_REG_TYPE = {M_COUNT{2'd0}} +) +( + input wire clk, + input wire rst, + + /* + * AXI slave interfaces + */ + input wire [S_COUNT*S_ID_WIDTH-1:0] s_axi_arid, + input wire [S_COUNT*ADDR_WIDTH-1:0] s_axi_araddr, + input wire [S_COUNT*8-1:0] s_axi_arlen, + input wire [S_COUNT*3-1:0] s_axi_arsize, + input wire [S_COUNT*2-1:0] s_axi_arburst, + input wire [S_COUNT-1:0] s_axi_arlock, + input wire [S_COUNT*4-1:0] s_axi_arcache, + input wire [S_COUNT*3-1:0] s_axi_arprot, + input wire [S_COUNT*4-1:0] s_axi_arqos, + input wire [S_COUNT*ARUSER_WIDTH-1:0] s_axi_aruser, + input wire [S_COUNT-1:0] s_axi_arvalid, + output wire [S_COUNT-1:0] s_axi_arready, + output wire [S_COUNT*S_ID_WIDTH-1:0] s_axi_rid, + output wire [S_COUNT*DATA_WIDTH-1:0] s_axi_rdata, + output wire [S_COUNT*2-1:0] s_axi_rresp, + output wire [S_COUNT-1:0] s_axi_rlast, + output wire [S_COUNT*RUSER_WIDTH-1:0] s_axi_ruser, + output wire [S_COUNT-1:0] s_axi_rvalid, + input wire [S_COUNT-1:0] s_axi_rready, + + /* + * AXI master interfaces + */ + output wire [M_COUNT*M_ID_WIDTH-1:0] m_axi_arid, + output wire [M_COUNT*ADDR_WIDTH-1:0] m_axi_araddr, + output wire [M_COUNT*8-1:0] m_axi_arlen, + output wire [M_COUNT*3-1:0] m_axi_arsize, + output wire [M_COUNT*2-1:0] m_axi_arburst, + output wire [M_COUNT-1:0] m_axi_arlock, + output wire [M_COUNT*4-1:0] m_axi_arcache, + output wire [M_COUNT*3-1:0] m_axi_arprot, + output wire [M_COUNT*4-1:0] m_axi_arqos, + output wire [M_COUNT*4-1:0] m_axi_arregion, + output wire [M_COUNT*ARUSER_WIDTH-1:0] m_axi_aruser, + output wire [M_COUNT-1:0] m_axi_arvalid, + input wire [M_COUNT-1:0] m_axi_arready, + input wire [M_COUNT*M_ID_WIDTH-1:0] m_axi_rid, + input wire [M_COUNT*DATA_WIDTH-1:0] m_axi_rdata, + input wire [M_COUNT*2-1:0] m_axi_rresp, + input wire [M_COUNT-1:0] m_axi_rlast, + input wire [M_COUNT*RUSER_WIDTH-1:0] m_axi_ruser, + input wire [M_COUNT-1:0] m_axi_rvalid, + output wire [M_COUNT-1:0] m_axi_rready +); + +parameter CL_S_COUNT = $clog2(S_COUNT); +parameter CL_M_COUNT = $clog2(M_COUNT); +parameter M_COUNT_P1 = M_COUNT+1; +parameter CL_M_COUNT_P1 = $clog2(M_COUNT_P1); + +integer i; + +// check configuration +initial begin + if (M_ID_WIDTH < S_ID_WIDTH+$clog2(S_COUNT)) begin + $error("Error: M_ID_WIDTH must be at least $clog2(S_COUNT) larger than S_ID_WIDTH (instance %m)"); + $finish; + end + + for (i = 0; i < M_COUNT*M_REGIONS; i = i + 1) begin + if (M_ADDR_WIDTH[i*32 +: 32] && (M_ADDR_WIDTH[i*32 +: 32] < 12 || M_ADDR_WIDTH[i*32 +: 32] > ADDR_WIDTH)) begin + $error("Error: value out of range (instance %m)"); + $finish; + end + end +end + +wire [S_COUNT*S_ID_WIDTH-1:0] int_s_axi_arid; +wire [S_COUNT*ADDR_WIDTH-1:0] int_s_axi_araddr; +wire [S_COUNT*8-1:0] int_s_axi_arlen; +wire [S_COUNT*3-1:0] int_s_axi_arsize; +wire [S_COUNT*2-1:0] int_s_axi_arburst; +wire [S_COUNT-1:0] int_s_axi_arlock; +wire [S_COUNT*4-1:0] int_s_axi_arcache; +wire [S_COUNT*3-1:0] int_s_axi_arprot; +wire [S_COUNT*4-1:0] int_s_axi_arqos; +wire [S_COUNT*4-1:0] int_s_axi_arregion; +wire [S_COUNT*ARUSER_WIDTH-1:0] int_s_axi_aruser; +wire [S_COUNT-1:0] int_s_axi_arvalid; +wire [S_COUNT-1:0] int_s_axi_arready; + +wire [S_COUNT*M_COUNT-1:0] int_axi_arvalid; +wire [M_COUNT*S_COUNT-1:0] int_axi_arready; + +wire [M_COUNT*M_ID_WIDTH-1:0] int_m_axi_rid; +wire [M_COUNT*DATA_WIDTH-1:0] int_m_axi_rdata; +wire [M_COUNT*2-1:0] int_m_axi_rresp; +wire [M_COUNT-1:0] int_m_axi_rlast; +wire [M_COUNT*RUSER_WIDTH-1:0] int_m_axi_ruser; +wire [M_COUNT-1:0] int_m_axi_rvalid; +wire [M_COUNT-1:0] int_m_axi_rready; + +wire [M_COUNT*S_COUNT-1:0] int_axi_rvalid; +wire [S_COUNT*M_COUNT-1:0] int_axi_rready; + +generate + + genvar m, n; + + for (m = 0; m < S_COUNT; m = m + 1) begin : s_ifaces + // address decode and admission control + wire [CL_M_COUNT-1:0] a_select; + + wire m_axi_avalid; + wire m_axi_aready; + + wire m_rc_decerr; + wire m_rc_valid; + wire m_rc_ready; + + wire [S_ID_WIDTH-1:0] s_cpl_id; + wire s_cpl_valid; + + axi_crossbar_addr #( + .S(m), + .S_COUNT(S_COUNT), + .M_COUNT(M_COUNT), + .ADDR_WIDTH(ADDR_WIDTH), + .ID_WIDTH(S_ID_WIDTH), + .S_THREADS(S_THREADS[m*32 +: 32]), + .S_ACCEPT(S_ACCEPT[m*32 +: 32]), + .M_REGIONS(M_REGIONS), + .M_BASE_ADDR(M_BASE_ADDR), + .M_ADDR_WIDTH(M_ADDR_WIDTH), + .M_CONNECT(M_CONNECT), + .M_SECURE(M_SECURE), + .WC_OUTPUT(0) + ) + addr_inst ( + .clk(clk), + .rst(rst), + + /* + * Address input + */ + .s_axi_aid(int_s_axi_arid[m*S_ID_WIDTH +: S_ID_WIDTH]), + .s_axi_aaddr(int_s_axi_araddr[m*ADDR_WIDTH +: ADDR_WIDTH]), + .s_axi_aprot(int_s_axi_arprot[m*3 +: 3]), + .s_axi_aqos(int_s_axi_arqos[m*4 +: 4]), + .s_axi_avalid(int_s_axi_arvalid[m]), + .s_axi_aready(int_s_axi_arready[m]), + + /* + * Address output + */ + .m_axi_aregion(int_s_axi_arregion[m*4 +: 4]), + .m_select(a_select), + .m_axi_avalid(m_axi_avalid), + .m_axi_aready(m_axi_aready), + + /* + * Write command output + */ + .m_wc_select(), + .m_wc_decerr(), + .m_wc_valid(), + .m_wc_ready(1'b1), + + /* + * Response command output + */ + .m_rc_decerr(m_rc_decerr), + .m_rc_valid(m_rc_valid), + .m_rc_ready(m_rc_ready), + + /* + * Completion input + */ + .s_cpl_id(s_cpl_id), + .s_cpl_valid(s_cpl_valid) + ); + + assign int_axi_arvalid[m*M_COUNT +: M_COUNT] = m_axi_avalid << a_select; + assign m_axi_aready = int_axi_arready[a_select*S_COUNT+m]; + + // decode error handling + reg [S_ID_WIDTH-1:0] decerr_m_axi_rid_reg = {S_ID_WIDTH{1'b0}}, decerr_m_axi_rid_next; + reg decerr_m_axi_rlast_reg = 1'b0, decerr_m_axi_rlast_next; + reg decerr_m_axi_rvalid_reg = 1'b0, decerr_m_axi_rvalid_next; + wire decerr_m_axi_rready; + + reg [7:0] decerr_len_reg = 8'd0, decerr_len_next; + + assign m_rc_ready = !decerr_m_axi_rvalid_reg; + + always @* begin + decerr_len_next = decerr_len_reg; + decerr_m_axi_rid_next = decerr_m_axi_rid_reg; + decerr_m_axi_rlast_next = decerr_m_axi_rlast_reg; + decerr_m_axi_rvalid_next = decerr_m_axi_rvalid_reg; + + if (decerr_m_axi_rvalid_reg) begin + if (decerr_m_axi_rready) begin + if (decerr_len_reg > 0) begin + decerr_len_next = decerr_len_reg-1; + decerr_m_axi_rlast_next = (decerr_len_next == 0); + decerr_m_axi_rvalid_next = 1'b1; + end else begin + decerr_m_axi_rvalid_next = 1'b0; + end + end + end else if (m_rc_valid && m_rc_ready) begin + decerr_len_next = int_s_axi_arlen[m*8 +: 8]; + decerr_m_axi_rid_next = int_s_axi_arid[m*S_ID_WIDTH +: S_ID_WIDTH]; + decerr_m_axi_rlast_next = (decerr_len_next == 0); + decerr_m_axi_rvalid_next = 1'b1; + end + end + + always @(posedge clk) begin + if (rst) begin + decerr_m_axi_rvalid_reg <= 1'b0; + end else begin + decerr_m_axi_rvalid_reg <= decerr_m_axi_rvalid_next; + end + + decerr_m_axi_rid_reg <= decerr_m_axi_rid_next; + decerr_m_axi_rlast_reg <= decerr_m_axi_rlast_next; + decerr_len_reg <= decerr_len_next; + end + + // read response arbitration + wire [M_COUNT_P1-1:0] r_request; + wire [M_COUNT_P1-1:0] r_acknowledge; + wire [M_COUNT_P1-1:0] r_grant; + wire r_grant_valid; + wire [CL_M_COUNT_P1-1:0] r_grant_encoded; + + arbiter #( + .PORTS(M_COUNT_P1), + .ARB_TYPE_ROUND_ROBIN(1), + .ARB_BLOCK(1), + .ARB_BLOCK_ACK(1), + .ARB_LSB_HIGH_PRIORITY(1) + ) + r_arb_inst ( + .clk(clk), + .rst(rst), + .request(r_request), + .acknowledge(r_acknowledge), + .grant(r_grant), + .grant_valid(r_grant_valid), + .grant_encoded(r_grant_encoded) + ); + + // read response mux + wire [S_ID_WIDTH-1:0] m_axi_rid_mux = {decerr_m_axi_rid_reg, int_m_axi_rid} >> r_grant_encoded*M_ID_WIDTH; + wire [DATA_WIDTH-1:0] m_axi_rdata_mux = {{DATA_WIDTH{1'b0}}, int_m_axi_rdata} >> r_grant_encoded*DATA_WIDTH; + wire [1:0] m_axi_rresp_mux = {2'b11, int_m_axi_rresp} >> r_grant_encoded*2; + wire m_axi_rlast_mux = {decerr_m_axi_rlast_reg, int_m_axi_rlast} >> r_grant_encoded; + wire [RUSER_WIDTH-1:0] m_axi_ruser_mux = {{RUSER_WIDTH{1'b0}}, int_m_axi_ruser} >> r_grant_encoded*RUSER_WIDTH; + wire m_axi_rvalid_mux = ({decerr_m_axi_rvalid_reg, int_m_axi_rvalid} >> r_grant_encoded) & r_grant_valid; + wire m_axi_rready_mux; + + assign int_axi_rready[m*M_COUNT +: M_COUNT] = (r_grant_valid && m_axi_rready_mux) << r_grant_encoded; + assign decerr_m_axi_rready = (r_grant_valid && m_axi_rready_mux) && (r_grant_encoded == M_COUNT_P1-1); + + for (n = 0; n < M_COUNT; n = n + 1) begin + assign r_request[n] = int_axi_rvalid[n*S_COUNT+m] && !r_grant[n]; + assign r_acknowledge[n] = r_grant[n] && int_axi_rvalid[n*S_COUNT+m] && m_axi_rlast_mux && m_axi_rready_mux; + end + + assign r_request[M_COUNT_P1-1] = decerr_m_axi_rvalid_reg && !r_grant[M_COUNT_P1-1]; + assign r_acknowledge[M_COUNT_P1-1] = r_grant[M_COUNT_P1-1] && decerr_m_axi_rvalid_reg && decerr_m_axi_rlast_reg && m_axi_rready_mux; + + assign s_cpl_id = m_axi_rid_mux; + assign s_cpl_valid = m_axi_rvalid_mux && m_axi_rready_mux && m_axi_rlast_mux; + + // S side register + axi_register_rd #( + .DATA_WIDTH(DATA_WIDTH), + .ADDR_WIDTH(ADDR_WIDTH), + .STRB_WIDTH(STRB_WIDTH), + .ID_WIDTH(S_ID_WIDTH), + .ARUSER_ENABLE(ARUSER_ENABLE), + .ARUSER_WIDTH(ARUSER_WIDTH), + .RUSER_ENABLE(RUSER_ENABLE), + .RUSER_WIDTH(RUSER_WIDTH), + .AR_REG_TYPE(S_AR_REG_TYPE[m*2 +: 2]), + .R_REG_TYPE(S_R_REG_TYPE[m*2 +: 2]) + ) + reg_inst ( + .clk(clk), + .rst(rst), + .s_axi_arid(s_axi_arid[m*S_ID_WIDTH +: S_ID_WIDTH]), + .s_axi_araddr(s_axi_araddr[m*ADDR_WIDTH +: ADDR_WIDTH]), + .s_axi_arlen(s_axi_arlen[m*8 +: 8]), + .s_axi_arsize(s_axi_arsize[m*3 +: 3]), + .s_axi_arburst(s_axi_arburst[m*2 +: 2]), + .s_axi_arlock(s_axi_arlock[m]), + .s_axi_arcache(s_axi_arcache[m*4 +: 4]), + .s_axi_arprot(s_axi_arprot[m*3 +: 3]), + .s_axi_arqos(s_axi_arqos[m*4 +: 4]), + .s_axi_arregion(4'd0), + .s_axi_aruser(s_axi_aruser[m*ARUSER_WIDTH +: ARUSER_WIDTH]), + .s_axi_arvalid(s_axi_arvalid[m]), + .s_axi_arready(s_axi_arready[m]), + .s_axi_rid(s_axi_rid[m*S_ID_WIDTH +: S_ID_WIDTH]), + .s_axi_rdata(s_axi_rdata[m*DATA_WIDTH +: DATA_WIDTH]), + .s_axi_rresp(s_axi_rresp[m*2 +: 2]), + .s_axi_rlast(s_axi_rlast[m]), + .s_axi_ruser(s_axi_ruser[m*RUSER_WIDTH +: RUSER_WIDTH]), + .s_axi_rvalid(s_axi_rvalid[m]), + .s_axi_rready(s_axi_rready[m]), + .m_axi_arid(int_s_axi_arid[m*S_ID_WIDTH +: S_ID_WIDTH]), + .m_axi_araddr(int_s_axi_araddr[m*ADDR_WIDTH +: ADDR_WIDTH]), + .m_axi_arlen(int_s_axi_arlen[m*8 +: 8]), + .m_axi_arsize(int_s_axi_arsize[m*3 +: 3]), + .m_axi_arburst(int_s_axi_arburst[m*2 +: 2]), + .m_axi_arlock(int_s_axi_arlock[m]), + .m_axi_arcache(int_s_axi_arcache[m*4 +: 4]), + .m_axi_arprot(int_s_axi_arprot[m*3 +: 3]), + .m_axi_arqos(int_s_axi_arqos[m*4 +: 4]), + .m_axi_arregion(), + .m_axi_aruser(int_s_axi_aruser[m*ARUSER_WIDTH +: ARUSER_WIDTH]), + .m_axi_arvalid(int_s_axi_arvalid[m]), + .m_axi_arready(int_s_axi_arready[m]), + .m_axi_rid(m_axi_rid_mux), + .m_axi_rdata(m_axi_rdata_mux), + .m_axi_rresp(m_axi_rresp_mux), + .m_axi_rlast(m_axi_rlast_mux), + .m_axi_ruser(m_axi_ruser_mux), + .m_axi_rvalid(m_axi_rvalid_mux), + .m_axi_rready(m_axi_rready_mux) + ); + end // s_ifaces + + for (n = 0; n < M_COUNT; n = n + 1) begin : m_ifaces + // in-flight transaction count + wire trans_start; + wire trans_complete; + reg [$clog2(M_ISSUE[n*32 +: 32]+1)-1:0] trans_count_reg = 0; + + wire trans_limit = trans_count_reg >= M_ISSUE[n*32 +: 32] && !trans_complete; + + always @(posedge clk) begin + if (rst) begin + trans_count_reg <= 0; + end else begin + if (trans_start && !trans_complete) begin + trans_count_reg <= trans_count_reg + 1; + end else if (!trans_start && trans_complete) begin + trans_count_reg <= trans_count_reg - 1; + end + end + end + + // address arbitration + wire [S_COUNT-1:0] a_request; + wire [S_COUNT-1:0] a_acknowledge; + wire [S_COUNT-1:0] a_grant; + wire a_grant_valid; + wire [CL_S_COUNT-1:0] a_grant_encoded; + + arbiter #( + .PORTS(S_COUNT), + .ARB_TYPE_ROUND_ROBIN(1), + .ARB_BLOCK(1), + .ARB_BLOCK_ACK(1), + .ARB_LSB_HIGH_PRIORITY(1) + ) + a_arb_inst ( + .clk(clk), + .rst(rst), + .request(a_request), + .acknowledge(a_acknowledge), + .grant(a_grant), + .grant_valid(a_grant_valid), + .grant_encoded(a_grant_encoded) + ); + + // address mux + wire [M_ID_WIDTH-1:0] s_axi_arid_mux = int_s_axi_arid[a_grant_encoded*S_ID_WIDTH +: S_ID_WIDTH] | (a_grant_encoded << S_ID_WIDTH); + wire [ADDR_WIDTH-1:0] s_axi_araddr_mux = int_s_axi_araddr[a_grant_encoded*ADDR_WIDTH +: ADDR_WIDTH]; + wire [7:0] s_axi_arlen_mux = int_s_axi_arlen[a_grant_encoded*8 +: 8]; + wire [2:0] s_axi_arsize_mux = int_s_axi_arsize[a_grant_encoded*3 +: 3]; + wire [1:0] s_axi_arburst_mux = int_s_axi_arburst[a_grant_encoded*2 +: 2]; + wire s_axi_arlock_mux = int_s_axi_arlock[a_grant_encoded]; + wire [3:0] s_axi_arcache_mux = int_s_axi_arcache[a_grant_encoded*4 +: 4]; + wire [2:0] s_axi_arprot_mux = int_s_axi_arprot[a_grant_encoded*3 +: 3]; + wire [3:0] s_axi_arqos_mux = int_s_axi_arqos[a_grant_encoded*4 +: 4]; + wire [3:0] s_axi_arregion_mux = int_s_axi_arregion[a_grant_encoded*4 +: 4]; + wire [ARUSER_WIDTH-1:0] s_axi_aruser_mux = int_s_axi_aruser[a_grant_encoded*ARUSER_WIDTH +: ARUSER_WIDTH]; + wire s_axi_arvalid_mux = int_axi_arvalid[a_grant_encoded*M_COUNT+n] && a_grant_valid; + wire s_axi_arready_mux; + + assign int_axi_arready[n*S_COUNT +: S_COUNT] = (a_grant_valid && s_axi_arready_mux) << a_grant_encoded; + + for (m = 0; m < S_COUNT; m = m + 1) begin + assign a_request[m] = int_axi_arvalid[m*M_COUNT+n] && !a_grant[m] && !trans_limit; + assign a_acknowledge[m] = a_grant[m] && int_axi_arvalid[m*M_COUNT+n] && s_axi_arready_mux; + end + + assign trans_start = s_axi_arvalid_mux && s_axi_arready_mux && a_grant_valid; + + // read response forwarding + wire [CL_S_COUNT-1:0] r_select = m_axi_rid[n*M_ID_WIDTH +: M_ID_WIDTH] >> S_ID_WIDTH; + + assign int_axi_rvalid[n*S_COUNT +: S_COUNT] = int_m_axi_rvalid[n] << r_select; + assign int_m_axi_rready[n] = int_axi_rready[r_select*M_COUNT+n]; + + assign trans_complete = int_m_axi_rvalid[n] && int_m_axi_rready[n] && int_m_axi_rlast[n]; + + // M side register + axi_register_rd #( + .DATA_WIDTH(DATA_WIDTH), + .ADDR_WIDTH(ADDR_WIDTH), + .STRB_WIDTH(STRB_WIDTH), + .ID_WIDTH(M_ID_WIDTH), + .ARUSER_ENABLE(ARUSER_ENABLE), + .ARUSER_WIDTH(ARUSER_WIDTH), + .RUSER_ENABLE(RUSER_ENABLE), + .RUSER_WIDTH(RUSER_WIDTH), + .AR_REG_TYPE(M_AR_REG_TYPE[n*2 +: 2]), + .R_REG_TYPE(M_R_REG_TYPE[n*2 +: 2]) + ) + reg_inst ( + .clk(clk), + .rst(rst), + .s_axi_arid(s_axi_arid_mux), + .s_axi_araddr(s_axi_araddr_mux), + .s_axi_arlen(s_axi_arlen_mux), + .s_axi_arsize(s_axi_arsize_mux), + .s_axi_arburst(s_axi_arburst_mux), + .s_axi_arlock(s_axi_arlock_mux), + .s_axi_arcache(s_axi_arcache_mux), + .s_axi_arprot(s_axi_arprot_mux), + .s_axi_arqos(s_axi_arqos_mux), + .s_axi_arregion(s_axi_arregion_mux), + .s_axi_aruser(s_axi_aruser_mux), + .s_axi_arvalid(s_axi_arvalid_mux), + .s_axi_arready(s_axi_arready_mux), + .s_axi_rid(int_m_axi_rid[n*M_ID_WIDTH +: M_ID_WIDTH]), + .s_axi_rdata(int_m_axi_rdata[n*DATA_WIDTH +: DATA_WIDTH]), + .s_axi_rresp(int_m_axi_rresp[n*2 +: 2]), + .s_axi_rlast(int_m_axi_rlast[n]), + .s_axi_ruser(int_m_axi_ruser[n*RUSER_WIDTH +: RUSER_WIDTH]), + .s_axi_rvalid(int_m_axi_rvalid[n]), + .s_axi_rready(int_m_axi_rready[n]), + .m_axi_arid(m_axi_arid[n*M_ID_WIDTH +: M_ID_WIDTH]), + .m_axi_araddr(m_axi_araddr[n*ADDR_WIDTH +: ADDR_WIDTH]), + .m_axi_arlen(m_axi_arlen[n*8 +: 8]), + .m_axi_arsize(m_axi_arsize[n*3 +: 3]), + .m_axi_arburst(m_axi_arburst[n*2 +: 2]), + .m_axi_arlock(m_axi_arlock[n]), + .m_axi_arcache(m_axi_arcache[n*4 +: 4]), + .m_axi_arprot(m_axi_arprot[n*3 +: 3]), + .m_axi_arqos(m_axi_arqos[n*4 +: 4]), + .m_axi_arregion(m_axi_arregion[n*4 +: 4]), + .m_axi_aruser(m_axi_aruser[n*ARUSER_WIDTH +: ARUSER_WIDTH]), + .m_axi_arvalid(m_axi_arvalid[n]), + .m_axi_arready(m_axi_arready[n]), + .m_axi_rid(m_axi_rid[n*M_ID_WIDTH +: M_ID_WIDTH]), + .m_axi_rdata(m_axi_rdata[n*DATA_WIDTH +: DATA_WIDTH]), + .m_axi_rresp(m_axi_rresp[n*2 +: 2]), + .m_axi_rlast(m_axi_rlast[n]), + .m_axi_ruser(m_axi_ruser[n*RUSER_WIDTH +: RUSER_WIDTH]), + .m_axi_rvalid(m_axi_rvalid[n]), + .m_axi_rready(m_axi_rready[n]) + ); + end // m_ifaces + +endgenerate + +endmodule + +`resetall diff --git a/xls/modules/zstd/external/axi_crossbar_wr.v b/xls/modules/zstd/external/axi_crossbar_wr.v new file mode 100644 index 0000000000..5f55665351 --- /dev/null +++ b/xls/modules/zstd/external/axi_crossbar_wr.v @@ -0,0 +1,678 @@ +/* + +Copyright (c) 2018 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 crossbar (write) + */ +module axi_crossbar_wr # +( + // Number of AXI inputs (slave interfaces) + parameter S_COUNT = 4, + // Number of AXI outputs (master interfaces) + parameter M_COUNT = 4, + // Width of data bus in bits + parameter DATA_WIDTH = 32, + // Width of address bus in bits + parameter ADDR_WIDTH = 32, + // Width of wstrb (width of data bus in words) + parameter STRB_WIDTH = (DATA_WIDTH/8), + // Input ID field width (from AXI masters) + parameter S_ID_WIDTH = 8, + // Output ID field width (towards AXI slaves) + // Additional bits required for response routing + parameter M_ID_WIDTH = S_ID_WIDTH+$clog2(S_COUNT), + // Propagate awuser signal + parameter AWUSER_ENABLE = 0, + // Width of awuser signal + parameter AWUSER_WIDTH = 1, + // Propagate wuser signal + parameter WUSER_ENABLE = 0, + // Width of wuser signal + parameter WUSER_WIDTH = 1, + // Propagate buser signal + parameter BUSER_ENABLE = 0, + // Width of buser signal + parameter BUSER_WIDTH = 1, + // Number of concurrent unique IDs for each slave interface + // S_COUNT concatenated fields of 32 bits + parameter S_THREADS = {S_COUNT{32'd2}}, + // Number of concurrent operations for each slave interface + // S_COUNT concatenated fields of 32 bits + parameter S_ACCEPT = {S_COUNT{32'd16}}, + // Number of regions per master interface + parameter M_REGIONS = 1, + // Master interface base addresses + // M_COUNT concatenated fields of M_REGIONS concatenated fields of ADDR_WIDTH bits + // set to zero for default addressing based on M_ADDR_WIDTH + parameter M_BASE_ADDR = 0, + // Master interface address widths + // M_COUNT concatenated fields of M_REGIONS concatenated fields of 32 bits + parameter M_ADDR_WIDTH = {M_COUNT{{M_REGIONS{32'd24}}}}, + // Write connections between interfaces + // M_COUNT concatenated fields of S_COUNT bits + parameter M_CONNECT = {M_COUNT{{S_COUNT{1'b1}}}}, + // Number of concurrent operations for each master interface + // M_COUNT concatenated fields of 32 bits + parameter M_ISSUE = {M_COUNT{32'd4}}, + // Secure master (fail operations based on awprot/arprot) + // M_COUNT bits + parameter M_SECURE = {M_COUNT{1'b0}}, + // Slave interface AW channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_AW_REG_TYPE = {S_COUNT{2'd0}}, + // Slave interface W channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_W_REG_TYPE = {S_COUNT{2'd0}}, + // Slave interface B channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S_B_REG_TYPE = {S_COUNT{2'd1}}, + // Master interface AW channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_AW_REG_TYPE = {M_COUNT{2'd1}}, + // Master interface W channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_W_REG_TYPE = {M_COUNT{2'd2}}, + // Master interface B channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M_B_REG_TYPE = {M_COUNT{2'd0}} +) +( + input wire clk, + input wire rst, + + /* + * AXI slave interfaces + */ + input wire [S_COUNT*S_ID_WIDTH-1:0] s_axi_awid, + input wire [S_COUNT*ADDR_WIDTH-1:0] s_axi_awaddr, + input wire [S_COUNT*8-1:0] s_axi_awlen, + input wire [S_COUNT*3-1:0] s_axi_awsize, + input wire [S_COUNT*2-1:0] s_axi_awburst, + input wire [S_COUNT-1:0] s_axi_awlock, + input wire [S_COUNT*4-1:0] s_axi_awcache, + input wire [S_COUNT*3-1:0] s_axi_awprot, + input wire [S_COUNT*4-1:0] s_axi_awqos, + input wire [S_COUNT*AWUSER_WIDTH-1:0] s_axi_awuser, + input wire [S_COUNT-1:0] s_axi_awvalid, + output wire [S_COUNT-1:0] s_axi_awready, + input wire [S_COUNT*DATA_WIDTH-1:0] s_axi_wdata, + input wire [S_COUNT*STRB_WIDTH-1:0] s_axi_wstrb, + input wire [S_COUNT-1:0] s_axi_wlast, + input wire [S_COUNT*WUSER_WIDTH-1:0] s_axi_wuser, + input wire [S_COUNT-1:0] s_axi_wvalid, + output wire [S_COUNT-1:0] s_axi_wready, + output wire [S_COUNT*S_ID_WIDTH-1:0] s_axi_bid, + output wire [S_COUNT*2-1:0] s_axi_bresp, + output wire [S_COUNT*BUSER_WIDTH-1:0] s_axi_buser, + output wire [S_COUNT-1:0] s_axi_bvalid, + input wire [S_COUNT-1:0] s_axi_bready, + + /* + * AXI master interfaces + */ + output wire [M_COUNT*M_ID_WIDTH-1:0] m_axi_awid, + output wire [M_COUNT*ADDR_WIDTH-1:0] m_axi_awaddr, + output wire [M_COUNT*8-1:0] m_axi_awlen, + output wire [M_COUNT*3-1:0] m_axi_awsize, + output wire [M_COUNT*2-1:0] m_axi_awburst, + output wire [M_COUNT-1:0] m_axi_awlock, + output wire [M_COUNT*4-1:0] m_axi_awcache, + output wire [M_COUNT*3-1:0] m_axi_awprot, + output wire [M_COUNT*4-1:0] m_axi_awqos, + output wire [M_COUNT*4-1:0] m_axi_awregion, + output wire [M_COUNT*AWUSER_WIDTH-1:0] m_axi_awuser, + output wire [M_COUNT-1:0] m_axi_awvalid, + input wire [M_COUNT-1:0] m_axi_awready, + output wire [M_COUNT*DATA_WIDTH-1:0] m_axi_wdata, + output wire [M_COUNT*STRB_WIDTH-1:0] m_axi_wstrb, + output wire [M_COUNT-1:0] m_axi_wlast, + output wire [M_COUNT*WUSER_WIDTH-1:0] m_axi_wuser, + output wire [M_COUNT-1:0] m_axi_wvalid, + input wire [M_COUNT-1:0] m_axi_wready, + input wire [M_COUNT*M_ID_WIDTH-1:0] m_axi_bid, + input wire [M_COUNT*2-1:0] m_axi_bresp, + input wire [M_COUNT*BUSER_WIDTH-1:0] m_axi_buser, + input wire [M_COUNT-1:0] m_axi_bvalid, + output wire [M_COUNT-1:0] m_axi_bready +); + +parameter CL_S_COUNT = $clog2(S_COUNT); +parameter CL_M_COUNT = $clog2(M_COUNT); +parameter M_COUNT_P1 = M_COUNT+1; +parameter CL_M_COUNT_P1 = $clog2(M_COUNT_P1); + +integer i; + +// check configuration +initial begin + if (M_ID_WIDTH < S_ID_WIDTH+$clog2(S_COUNT)) begin + $error("Error: M_ID_WIDTH must be at least $clog2(S_COUNT) larger than S_ID_WIDTH (instance %m)"); + $finish; + end + + for (i = 0; i < M_COUNT*M_REGIONS; i = i + 1) begin + if (M_ADDR_WIDTH[i*32 +: 32] && (M_ADDR_WIDTH[i*32 +: 32] < 12 || M_ADDR_WIDTH[i*32 +: 32] > ADDR_WIDTH)) begin + $error("Error: value out of range (instance %m)"); + $finish; + end + end +end + +wire [S_COUNT*S_ID_WIDTH-1:0] int_s_axi_awid; +wire [S_COUNT*ADDR_WIDTH-1:0] int_s_axi_awaddr; +wire [S_COUNT*8-1:0] int_s_axi_awlen; +wire [S_COUNT*3-1:0] int_s_axi_awsize; +wire [S_COUNT*2-1:0] int_s_axi_awburst; +wire [S_COUNT-1:0] int_s_axi_awlock; +wire [S_COUNT*4-1:0] int_s_axi_awcache; +wire [S_COUNT*3-1:0] int_s_axi_awprot; +wire [S_COUNT*4-1:0] int_s_axi_awqos; +wire [S_COUNT*4-1:0] int_s_axi_awregion; +wire [S_COUNT*AWUSER_WIDTH-1:0] int_s_axi_awuser; +wire [S_COUNT-1:0] int_s_axi_awvalid; +wire [S_COUNT-1:0] int_s_axi_awready; + +wire [S_COUNT*M_COUNT-1:0] int_axi_awvalid; +wire [M_COUNT*S_COUNT-1:0] int_axi_awready; + +wire [S_COUNT*DATA_WIDTH-1:0] int_s_axi_wdata; +wire [S_COUNT*STRB_WIDTH-1:0] int_s_axi_wstrb; +wire [S_COUNT-1:0] int_s_axi_wlast; +wire [S_COUNT*WUSER_WIDTH-1:0] int_s_axi_wuser; +wire [S_COUNT-1:0] int_s_axi_wvalid; +wire [S_COUNT-1:0] int_s_axi_wready; + +wire [S_COUNT*M_COUNT-1:0] int_axi_wvalid; +wire [M_COUNT*S_COUNT-1:0] int_axi_wready; + +wire [M_COUNT*M_ID_WIDTH-1:0] int_m_axi_bid; +wire [M_COUNT*2-1:0] int_m_axi_bresp; +wire [M_COUNT*BUSER_WIDTH-1:0] int_m_axi_buser; +wire [M_COUNT-1:0] int_m_axi_bvalid; +wire [M_COUNT-1:0] int_m_axi_bready; + +wire [M_COUNT*S_COUNT-1:0] int_axi_bvalid; +wire [S_COUNT*M_COUNT-1:0] int_axi_bready; + +generate + + genvar m, n; + + for (m = 0; m < S_COUNT; m = m + 1) begin : s_ifaces + // address decode and admission control + wire [CL_M_COUNT-1:0] a_select; + + wire m_axi_avalid; + wire m_axi_aready; + + wire [CL_M_COUNT-1:0] m_wc_select; + wire m_wc_decerr; + wire m_wc_valid; + wire m_wc_ready; + + wire m_rc_decerr; + wire m_rc_valid; + wire m_rc_ready; + + wire [S_ID_WIDTH-1:0] s_cpl_id; + wire s_cpl_valid; + + axi_crossbar_addr #( + .S(m), + .S_COUNT(S_COUNT), + .M_COUNT(M_COUNT), + .ADDR_WIDTH(ADDR_WIDTH), + .ID_WIDTH(S_ID_WIDTH), + .S_THREADS(S_THREADS[m*32 +: 32]), + .S_ACCEPT(S_ACCEPT[m*32 +: 32]), + .M_REGIONS(M_REGIONS), + .M_BASE_ADDR(M_BASE_ADDR), + .M_ADDR_WIDTH(M_ADDR_WIDTH), + .M_CONNECT(M_CONNECT), + .M_SECURE(M_SECURE), + .WC_OUTPUT(1) + ) + addr_inst ( + .clk(clk), + .rst(rst), + + /* + * Address input + */ + .s_axi_aid(int_s_axi_awid[m*S_ID_WIDTH +: S_ID_WIDTH]), + .s_axi_aaddr(int_s_axi_awaddr[m*ADDR_WIDTH +: ADDR_WIDTH]), + .s_axi_aprot(int_s_axi_awprot[m*3 +: 3]), + .s_axi_aqos(int_s_axi_awqos[m*4 +: 4]), + .s_axi_avalid(int_s_axi_awvalid[m]), + .s_axi_aready(int_s_axi_awready[m]), + + /* + * Address output + */ + .m_axi_aregion(int_s_axi_awregion[m*4 +: 4]), + .m_select(a_select), + .m_axi_avalid(m_axi_avalid), + .m_axi_aready(m_axi_aready), + + /* + * Write command output + */ + .m_wc_select(m_wc_select), + .m_wc_decerr(m_wc_decerr), + .m_wc_valid(m_wc_valid), + .m_wc_ready(m_wc_ready), + + /* + * Response command output + */ + .m_rc_decerr(m_rc_decerr), + .m_rc_valid(m_rc_valid), + .m_rc_ready(m_rc_ready), + + /* + * Completion input + */ + .s_cpl_id(s_cpl_id), + .s_cpl_valid(s_cpl_valid) + ); + + assign int_axi_awvalid[m*M_COUNT +: M_COUNT] = m_axi_avalid << a_select; + assign m_axi_aready = int_axi_awready[a_select*S_COUNT+m]; + + // write command handling + reg [CL_M_COUNT-1:0] w_select_reg = 0, w_select_next; + reg w_drop_reg = 1'b0, w_drop_next; + reg w_select_valid_reg = 1'b0, w_select_valid_next; + + assign m_wc_ready = !w_select_valid_reg; + + always @* begin + w_select_next = w_select_reg; + w_drop_next = w_drop_reg && !(int_s_axi_wvalid[m] && int_s_axi_wready[m] && int_s_axi_wlast[m]); + w_select_valid_next = w_select_valid_reg && !(int_s_axi_wvalid[m] && int_s_axi_wready[m] && int_s_axi_wlast[m]); + + if (m_wc_valid && !w_select_valid_reg) begin + w_select_next = m_wc_select; + w_drop_next = m_wc_decerr; + w_select_valid_next = m_wc_valid; + end + end + + always @(posedge clk) begin + if (rst) begin + w_select_valid_reg <= 1'b0; + end else begin + w_select_valid_reg <= w_select_valid_next; + end + + w_select_reg <= w_select_next; + w_drop_reg <= w_drop_next; + end + + // write data forwarding + assign int_axi_wvalid[m*M_COUNT +: M_COUNT] = (int_s_axi_wvalid[m] && w_select_valid_reg && !w_drop_reg) << w_select_reg; + assign int_s_axi_wready[m] = int_axi_wready[w_select_reg*S_COUNT+m] || w_drop_reg; + + // decode error handling + reg [S_ID_WIDTH-1:0] decerr_m_axi_bid_reg = {S_ID_WIDTH{1'b0}}, decerr_m_axi_bid_next; + reg decerr_m_axi_bvalid_reg = 1'b0, decerr_m_axi_bvalid_next; + wire decerr_m_axi_bready; + + assign m_rc_ready = !decerr_m_axi_bvalid_reg; + + always @* begin + decerr_m_axi_bid_next = decerr_m_axi_bid_reg; + decerr_m_axi_bvalid_next = decerr_m_axi_bvalid_reg; + + if (decerr_m_axi_bvalid_reg) begin + if (decerr_m_axi_bready) begin + decerr_m_axi_bvalid_next = 1'b0; + end + end else if (m_rc_valid && m_rc_ready) begin + decerr_m_axi_bid_next = int_s_axi_awid[m*S_ID_WIDTH +: S_ID_WIDTH]; + decerr_m_axi_bvalid_next = 1'b1; + end + end + + always @(posedge clk) begin + if (rst) begin + decerr_m_axi_bvalid_reg <= 1'b0; + end else begin + decerr_m_axi_bvalid_reg <= decerr_m_axi_bvalid_next; + end + + decerr_m_axi_bid_reg <= decerr_m_axi_bid_next; + end + + // write response arbitration + wire [M_COUNT_P1-1:0] b_request; + wire [M_COUNT_P1-1:0] b_acknowledge; + wire [M_COUNT_P1-1:0] b_grant; + wire b_grant_valid; + wire [CL_M_COUNT_P1-1:0] b_grant_encoded; + + arbiter #( + .PORTS(M_COUNT_P1), + .ARB_TYPE_ROUND_ROBIN(1), + .ARB_BLOCK(1), + .ARB_BLOCK_ACK(1), + .ARB_LSB_HIGH_PRIORITY(1) + ) + b_arb_inst ( + .clk(clk), + .rst(rst), + .request(b_request), + .acknowledge(b_acknowledge), + .grant(b_grant), + .grant_valid(b_grant_valid), + .grant_encoded(b_grant_encoded) + ); + + // write response mux + wire [S_ID_WIDTH-1:0] m_axi_bid_mux = {decerr_m_axi_bid_reg, int_m_axi_bid} >> b_grant_encoded*M_ID_WIDTH; + wire [1:0] m_axi_bresp_mux = {2'b11, int_m_axi_bresp} >> b_grant_encoded*2; + wire [BUSER_WIDTH-1:0] m_axi_buser_mux = {{BUSER_WIDTH{1'b0}}, int_m_axi_buser} >> b_grant_encoded*BUSER_WIDTH; + wire m_axi_bvalid_mux = ({decerr_m_axi_bvalid_reg, int_m_axi_bvalid} >> b_grant_encoded) & b_grant_valid; + wire m_axi_bready_mux; + + assign int_axi_bready[m*M_COUNT +: M_COUNT] = (b_grant_valid && m_axi_bready_mux) << b_grant_encoded; + assign decerr_m_axi_bready = (b_grant_valid && m_axi_bready_mux) && (b_grant_encoded == M_COUNT_P1-1); + + for (n = 0; n < M_COUNT; n = n + 1) begin + assign b_request[n] = int_axi_bvalid[n*S_COUNT+m] && !b_grant[n]; + assign b_acknowledge[n] = b_grant[n] && int_axi_bvalid[n*S_COUNT+m] && m_axi_bready_mux; + end + + assign b_request[M_COUNT_P1-1] = decerr_m_axi_bvalid_reg && !b_grant[M_COUNT_P1-1]; + assign b_acknowledge[M_COUNT_P1-1] = b_grant[M_COUNT_P1-1] && decerr_m_axi_bvalid_reg && m_axi_bready_mux; + + assign s_cpl_id = m_axi_bid_mux; + assign s_cpl_valid = m_axi_bvalid_mux && m_axi_bready_mux; + + // S side register + axi_register_wr #( + .DATA_WIDTH(DATA_WIDTH), + .ADDR_WIDTH(ADDR_WIDTH), + .STRB_WIDTH(STRB_WIDTH), + .ID_WIDTH(S_ID_WIDTH), + .AWUSER_ENABLE(AWUSER_ENABLE), + .AWUSER_WIDTH(AWUSER_WIDTH), + .WUSER_ENABLE(WUSER_ENABLE), + .WUSER_WIDTH(WUSER_WIDTH), + .BUSER_ENABLE(BUSER_ENABLE), + .BUSER_WIDTH(BUSER_WIDTH), + .AW_REG_TYPE(S_AW_REG_TYPE[m*2 +: 2]), + .W_REG_TYPE(S_W_REG_TYPE[m*2 +: 2]), + .B_REG_TYPE(S_B_REG_TYPE[m*2 +: 2]) + ) + reg_inst ( + .clk(clk), + .rst(rst), + .s_axi_awid(s_axi_awid[m*S_ID_WIDTH +: S_ID_WIDTH]), + .s_axi_awaddr(s_axi_awaddr[m*ADDR_WIDTH +: ADDR_WIDTH]), + .s_axi_awlen(s_axi_awlen[m*8 +: 8]), + .s_axi_awsize(s_axi_awsize[m*3 +: 3]), + .s_axi_awburst(s_axi_awburst[m*2 +: 2]), + .s_axi_awlock(s_axi_awlock[m]), + .s_axi_awcache(s_axi_awcache[m*4 +: 4]), + .s_axi_awprot(s_axi_awprot[m*3 +: 3]), + .s_axi_awqos(s_axi_awqos[m*4 +: 4]), + .s_axi_awregion(4'd0), + .s_axi_awuser(s_axi_awuser[m*AWUSER_WIDTH +: AWUSER_WIDTH]), + .s_axi_awvalid(s_axi_awvalid[m]), + .s_axi_awready(s_axi_awready[m]), + .s_axi_wdata(s_axi_wdata[m*DATA_WIDTH +: DATA_WIDTH]), + .s_axi_wstrb(s_axi_wstrb[m*STRB_WIDTH +: STRB_WIDTH]), + .s_axi_wlast(s_axi_wlast[m]), + .s_axi_wuser(s_axi_wuser[m*WUSER_WIDTH +: WUSER_WIDTH]), + .s_axi_wvalid(s_axi_wvalid[m]), + .s_axi_wready(s_axi_wready[m]), + .s_axi_bid(s_axi_bid[m*S_ID_WIDTH +: S_ID_WIDTH]), + .s_axi_bresp(s_axi_bresp[m*2 +: 2]), + .s_axi_buser(s_axi_buser[m*BUSER_WIDTH +: BUSER_WIDTH]), + .s_axi_bvalid(s_axi_bvalid[m]), + .s_axi_bready(s_axi_bready[m]), + .m_axi_awid(int_s_axi_awid[m*S_ID_WIDTH +: S_ID_WIDTH]), + .m_axi_awaddr(int_s_axi_awaddr[m*ADDR_WIDTH +: ADDR_WIDTH]), + .m_axi_awlen(int_s_axi_awlen[m*8 +: 8]), + .m_axi_awsize(int_s_axi_awsize[m*3 +: 3]), + .m_axi_awburst(int_s_axi_awburst[m*2 +: 2]), + .m_axi_awlock(int_s_axi_awlock[m]), + .m_axi_awcache(int_s_axi_awcache[m*4 +: 4]), + .m_axi_awprot(int_s_axi_awprot[m*3 +: 3]), + .m_axi_awqos(int_s_axi_awqos[m*4 +: 4]), + .m_axi_awregion(), + .m_axi_awuser(int_s_axi_awuser[m*AWUSER_WIDTH +: AWUSER_WIDTH]), + .m_axi_awvalid(int_s_axi_awvalid[m]), + .m_axi_awready(int_s_axi_awready[m]), + .m_axi_wdata(int_s_axi_wdata[m*DATA_WIDTH +: DATA_WIDTH]), + .m_axi_wstrb(int_s_axi_wstrb[m*STRB_WIDTH +: STRB_WIDTH]), + .m_axi_wlast(int_s_axi_wlast[m]), + .m_axi_wuser(int_s_axi_wuser[m*WUSER_WIDTH +: WUSER_WIDTH]), + .m_axi_wvalid(int_s_axi_wvalid[m]), + .m_axi_wready(int_s_axi_wready[m]), + .m_axi_bid(m_axi_bid_mux), + .m_axi_bresp(m_axi_bresp_mux), + .m_axi_buser(m_axi_buser_mux), + .m_axi_bvalid(m_axi_bvalid_mux), + .m_axi_bready(m_axi_bready_mux) + ); + end // s_ifaces + + for (n = 0; n < M_COUNT; n = n + 1) begin : m_ifaces + // in-flight transaction count + wire trans_start; + wire trans_complete; + reg [$clog2(M_ISSUE[n*32 +: 32]+1)-1:0] trans_count_reg = 0; + + wire trans_limit = trans_count_reg >= M_ISSUE[n*32 +: 32] && !trans_complete; + + always @(posedge clk) begin + if (rst) begin + trans_count_reg <= 0; + end else begin + if (trans_start && !trans_complete) begin + trans_count_reg <= trans_count_reg + 1; + end else if (!trans_start && trans_complete) begin + trans_count_reg <= trans_count_reg - 1; + end + end + end + + // address arbitration + reg [CL_S_COUNT-1:0] w_select_reg = 0, w_select_next; + reg w_select_valid_reg = 1'b0, w_select_valid_next; + reg w_select_new_reg = 1'b0, w_select_new_next; + + wire [S_COUNT-1:0] a_request; + wire [S_COUNT-1:0] a_acknowledge; + wire [S_COUNT-1:0] a_grant; + wire a_grant_valid; + wire [CL_S_COUNT-1:0] a_grant_encoded; + + arbiter #( + .PORTS(S_COUNT), + .ARB_TYPE_ROUND_ROBIN(1), + .ARB_BLOCK(1), + .ARB_BLOCK_ACK(1), + .ARB_LSB_HIGH_PRIORITY(1) + ) + a_arb_inst ( + .clk(clk), + .rst(rst), + .request(a_request), + .acknowledge(a_acknowledge), + .grant(a_grant), + .grant_valid(a_grant_valid), + .grant_encoded(a_grant_encoded) + ); + + // address mux + wire [M_ID_WIDTH-1:0] s_axi_awid_mux = int_s_axi_awid[a_grant_encoded*S_ID_WIDTH +: S_ID_WIDTH] | (a_grant_encoded << S_ID_WIDTH); + wire [ADDR_WIDTH-1:0] s_axi_awaddr_mux = int_s_axi_awaddr[a_grant_encoded*ADDR_WIDTH +: ADDR_WIDTH]; + wire [7:0] s_axi_awlen_mux = int_s_axi_awlen[a_grant_encoded*8 +: 8]; + wire [2:0] s_axi_awsize_mux = int_s_axi_awsize[a_grant_encoded*3 +: 3]; + wire [1:0] s_axi_awburst_mux = int_s_axi_awburst[a_grant_encoded*2 +: 2]; + wire s_axi_awlock_mux = int_s_axi_awlock[a_grant_encoded]; + wire [3:0] s_axi_awcache_mux = int_s_axi_awcache[a_grant_encoded*4 +: 4]; + wire [2:0] s_axi_awprot_mux = int_s_axi_awprot[a_grant_encoded*3 +: 3]; + wire [3:0] s_axi_awqos_mux = int_s_axi_awqos[a_grant_encoded*4 +: 4]; + wire [3:0] s_axi_awregion_mux = int_s_axi_awregion[a_grant_encoded*4 +: 4]; + wire [AWUSER_WIDTH-1:0] s_axi_awuser_mux = int_s_axi_awuser[a_grant_encoded*AWUSER_WIDTH +: AWUSER_WIDTH]; + wire s_axi_awvalid_mux = int_axi_awvalid[a_grant_encoded*M_COUNT+n] && a_grant_valid; + wire s_axi_awready_mux; + + assign int_axi_awready[n*S_COUNT +: S_COUNT] = (a_grant_valid && s_axi_awready_mux) << a_grant_encoded; + + for (m = 0; m < S_COUNT; m = m + 1) begin + assign a_request[m] = int_axi_awvalid[m*M_COUNT+n] && !a_grant[m] && !trans_limit && !w_select_valid_next; + assign a_acknowledge[m] = a_grant[m] && int_axi_awvalid[m*M_COUNT+n] && s_axi_awready_mux; + end + + assign trans_start = s_axi_awvalid_mux && s_axi_awready_mux && a_grant_valid; + + // write data mux + wire [DATA_WIDTH-1:0] s_axi_wdata_mux = int_s_axi_wdata[w_select_reg*DATA_WIDTH +: DATA_WIDTH]; + wire [STRB_WIDTH-1:0] s_axi_wstrb_mux = int_s_axi_wstrb[w_select_reg*STRB_WIDTH +: STRB_WIDTH]; + wire s_axi_wlast_mux = int_s_axi_wlast[w_select_reg]; + wire [WUSER_WIDTH-1:0] s_axi_wuser_mux = int_s_axi_wuser[w_select_reg*WUSER_WIDTH +: WUSER_WIDTH]; + wire s_axi_wvalid_mux = int_axi_wvalid[w_select_reg*M_COUNT+n] && w_select_valid_reg; + wire s_axi_wready_mux; + + assign int_axi_wready[n*S_COUNT +: S_COUNT] = (w_select_valid_reg && s_axi_wready_mux) << w_select_reg; + + // write data routing + always @* begin + w_select_next = w_select_reg; + w_select_valid_next = w_select_valid_reg && !(s_axi_wvalid_mux && s_axi_wready_mux && s_axi_wlast_mux); + w_select_new_next = w_select_new_reg || !a_grant_valid || a_acknowledge; + + if (a_grant_valid && !w_select_valid_reg && w_select_new_reg) begin + w_select_next = a_grant_encoded; + w_select_valid_next = a_grant_valid; + w_select_new_next = 1'b0; + end + end + + always @(posedge clk) begin + if (rst) begin + w_select_valid_reg <= 1'b0; + w_select_new_reg <= 1'b1; + end else begin + w_select_valid_reg <= w_select_valid_next; + w_select_new_reg <= w_select_new_next; + end + + w_select_reg <= w_select_next; + end + + // write response forwarding + wire [CL_S_COUNT-1:0] b_select = m_axi_bid[n*M_ID_WIDTH +: M_ID_WIDTH] >> S_ID_WIDTH; + + assign int_axi_bvalid[n*S_COUNT +: S_COUNT] = int_m_axi_bvalid[n] << b_select; + assign int_m_axi_bready[n] = int_axi_bready[b_select*M_COUNT+n]; + + assign trans_complete = int_m_axi_bvalid[n] && int_m_axi_bready[n]; + + // M side register + axi_register_wr #( + .DATA_WIDTH(DATA_WIDTH), + .ADDR_WIDTH(ADDR_WIDTH), + .STRB_WIDTH(STRB_WIDTH), + .ID_WIDTH(M_ID_WIDTH), + .AWUSER_ENABLE(AWUSER_ENABLE), + .AWUSER_WIDTH(AWUSER_WIDTH), + .WUSER_ENABLE(WUSER_ENABLE), + .WUSER_WIDTH(WUSER_WIDTH), + .BUSER_ENABLE(BUSER_ENABLE), + .BUSER_WIDTH(BUSER_WIDTH), + .AW_REG_TYPE(M_AW_REG_TYPE[n*2 +: 2]), + .W_REG_TYPE(M_W_REG_TYPE[n*2 +: 2]), + .B_REG_TYPE(M_B_REG_TYPE[n*2 +: 2]) + ) + reg_inst ( + .clk(clk), + .rst(rst), + .s_axi_awid(s_axi_awid_mux), + .s_axi_awaddr(s_axi_awaddr_mux), + .s_axi_awlen(s_axi_awlen_mux), + .s_axi_awsize(s_axi_awsize_mux), + .s_axi_awburst(s_axi_awburst_mux), + .s_axi_awlock(s_axi_awlock_mux), + .s_axi_awcache(s_axi_awcache_mux), + .s_axi_awprot(s_axi_awprot_mux), + .s_axi_awqos(s_axi_awqos_mux), + .s_axi_awregion(s_axi_awregion_mux), + .s_axi_awuser(s_axi_awuser_mux), + .s_axi_awvalid(s_axi_awvalid_mux), + .s_axi_awready(s_axi_awready_mux), + .s_axi_wdata(s_axi_wdata_mux), + .s_axi_wstrb(s_axi_wstrb_mux), + .s_axi_wlast(s_axi_wlast_mux), + .s_axi_wuser(s_axi_wuser_mux), + .s_axi_wvalid(s_axi_wvalid_mux), + .s_axi_wready(s_axi_wready_mux), + .s_axi_bid(int_m_axi_bid[n*M_ID_WIDTH +: M_ID_WIDTH]), + .s_axi_bresp(int_m_axi_bresp[n*2 +: 2]), + .s_axi_buser(int_m_axi_buser[n*BUSER_WIDTH +: BUSER_WIDTH]), + .s_axi_bvalid(int_m_axi_bvalid[n]), + .s_axi_bready(int_m_axi_bready[n]), + .m_axi_awid(m_axi_awid[n*M_ID_WIDTH +: M_ID_WIDTH]), + .m_axi_awaddr(m_axi_awaddr[n*ADDR_WIDTH +: ADDR_WIDTH]), + .m_axi_awlen(m_axi_awlen[n*8 +: 8]), + .m_axi_awsize(m_axi_awsize[n*3 +: 3]), + .m_axi_awburst(m_axi_awburst[n*2 +: 2]), + .m_axi_awlock(m_axi_awlock[n]), + .m_axi_awcache(m_axi_awcache[n*4 +: 4]), + .m_axi_awprot(m_axi_awprot[n*3 +: 3]), + .m_axi_awqos(m_axi_awqos[n*4 +: 4]), + .m_axi_awregion(m_axi_awregion[n*4 +: 4]), + .m_axi_awuser(m_axi_awuser[n*AWUSER_WIDTH +: AWUSER_WIDTH]), + .m_axi_awvalid(m_axi_awvalid[n]), + .m_axi_awready(m_axi_awready[n]), + .m_axi_wdata(m_axi_wdata[n*DATA_WIDTH +: DATA_WIDTH]), + .m_axi_wstrb(m_axi_wstrb[n*STRB_WIDTH +: STRB_WIDTH]), + .m_axi_wlast(m_axi_wlast[n]), + .m_axi_wuser(m_axi_wuser[n*WUSER_WIDTH +: WUSER_WIDTH]), + .m_axi_wvalid(m_axi_wvalid[n]), + .m_axi_wready(m_axi_wready[n]), + .m_axi_bid(m_axi_bid[n*M_ID_WIDTH +: M_ID_WIDTH]), + .m_axi_bresp(m_axi_bresp[n*2 +: 2]), + .m_axi_buser(m_axi_buser[n*BUSER_WIDTH +: BUSER_WIDTH]), + .m_axi_bvalid(m_axi_bvalid[n]), + .m_axi_bready(m_axi_bready[n]) + ); + end // m_ifaces + +endgenerate + +endmodule + +`resetall diff --git a/xls/modules/zstd/external/axi_crossbar_wrapper.v b/xls/modules/zstd/external/axi_crossbar_wrapper.v new file mode 100644 index 0000000000..c244575e98 --- /dev/null +++ b/xls/modules/zstd/external/axi_crossbar_wrapper.v @@ -0,0 +1,564 @@ +/* + +Copyright (c) 2020 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 4x1 crossbar (wrapper) + */ +module axi_crossbar_wrapper # +( + // Width of data bus in bits + parameter DATA_WIDTH = 32, + // Width of address bus in bits + parameter ADDR_WIDTH = 32, + // Width of wstrb (width of data bus in words) + parameter STRB_WIDTH = (DATA_WIDTH/8), + // Input ID field width (from AXI masters) + parameter S_ID_WIDTH = 8, + // Output ID field width (towards AXI slaves) + // Additional bits required for response routing + parameter M_ID_WIDTH = S_ID_WIDTH+$clog2(S_COUNT), + // Propagate awuser signal + parameter AWUSER_ENABLE = 0, + // Width of awuser signal + parameter AWUSER_WIDTH = 1, + // Propagate wuser signal + parameter WUSER_ENABLE = 0, + // Width of wuser signal + parameter WUSER_WIDTH = 1, + // Propagate buser signal + parameter BUSER_ENABLE = 0, + // Width of buser signal + parameter BUSER_WIDTH = 1, + // Propagate aruser signal + parameter ARUSER_ENABLE = 0, + // Width of aruser signal + parameter ARUSER_WIDTH = 1, + // Propagate ruser signal + parameter RUSER_ENABLE = 0, + // Width of ruser signal + parameter RUSER_WIDTH = 1, + // Number of concurrent unique IDs + parameter S00_THREADS = 2, + // Number of concurrent operations + parameter S00_ACCEPT = 16, + // Number of concurrent unique IDs + parameter S01_THREADS = 2, + // Number of concurrent operations + parameter S01_ACCEPT = 16, + // Number of concurrent unique IDs + parameter S02_THREADS = 2, + // Number of concurrent operations + parameter S02_ACCEPT = 16, + // Number of concurrent unique IDs + parameter S03_THREADS = 2, + // Number of concurrent operations + parameter S03_ACCEPT = 16, + // Number of regions per master interface + parameter M_REGIONS = 1, + // Master interface base addresses + // M_REGIONS concatenated fields of ADDR_WIDTH bits + parameter M00_BASE_ADDR = 0, + // Master interface address widths + // M_REGIONS concatenated fields of 32 bits + parameter M00_ADDR_WIDTH = {M_REGIONS{32'd24}}, + // Read connections between interfaces + // S_COUNT bits + parameter M00_CONNECT_READ = 4'b1111, + // Write connections between interfaces + // S_COUNT bits + parameter M00_CONNECT_WRITE = 4'b1111, + // Number of concurrent operations for each master interface + parameter M00_ISSUE = 4, + // Secure master (fail operations based on awprot/arprot) + parameter M00_SECURE = 0, + // Slave interface AW channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S00_AW_REG_TYPE = 0, + // Slave interface W channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S00_W_REG_TYPE = 0, + // Slave interface B channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S00_B_REG_TYPE = 1, + // Slave interface AR channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S00_AR_REG_TYPE = 0, + // Slave interface R channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S00_R_REG_TYPE = 2, + // Slave interface AW channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S01_AW_REG_TYPE = 0, + // Slave interface W channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S01_W_REG_TYPE = 0, + // Slave interface B channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S01_B_REG_TYPE = 1, + // Slave interface AR channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S01_AR_REG_TYPE = 0, + // Slave interface R channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S01_R_REG_TYPE = 2, + // Slave interface AW channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S02_AW_REG_TYPE = 0, + // Slave interface W channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S02_W_REG_TYPE = 0, + // Slave interface B channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S02_B_REG_TYPE = 1, + // Slave interface AR channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S02_AR_REG_TYPE = 0, + // Slave interface R channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S02_R_REG_TYPE = 2, + // Slave interface AW channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S03_AW_REG_TYPE = 0, + // Slave interface W channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S03_W_REG_TYPE = 0, + // Slave interface B channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S03_B_REG_TYPE = 1, + // Slave interface AR channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S03_AR_REG_TYPE = 0, + // Slave interface R channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter S03_R_REG_TYPE = 2, + // Master interface AW channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M00_AW_REG_TYPE = 1, + // Master interface W channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M00_W_REG_TYPE = 2, + // Master interface B channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M00_B_REG_TYPE = 0, + // Master interface AR channel register type (output) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M00_AR_REG_TYPE = 1, + // Master interface R channel register type (input) + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter M00_R_REG_TYPE = 0 +) +( + input wire clk, + input wire rst, + + /* + * AXI slave interface + */ + input wire [S_ID_WIDTH-1:0] s00_axi_awid, + input wire [ADDR_WIDTH-1:0] s00_axi_awaddr, + input wire [7:0] s00_axi_awlen, + input wire [2:0] s00_axi_awsize, + input wire [1:0] s00_axi_awburst, + input wire s00_axi_awlock, + input wire [3:0] s00_axi_awcache, + input wire [2:0] s00_axi_awprot, + input wire [3:0] s00_axi_awqos, + input wire [AWUSER_WIDTH-1:0] s00_axi_awuser, + input wire s00_axi_awvalid, + output wire s00_axi_awready, + input wire [DATA_WIDTH-1:0] s00_axi_wdata, + input wire [STRB_WIDTH-1:0] s00_axi_wstrb, + input wire s00_axi_wlast, + input wire [WUSER_WIDTH-1:0] s00_axi_wuser, + input wire s00_axi_wvalid, + output wire s00_axi_wready, + output wire [S_ID_WIDTH-1:0] s00_axi_bid, + output wire [1:0] s00_axi_bresp, + output wire [BUSER_WIDTH-1:0] s00_axi_buser, + output wire s00_axi_bvalid, + input wire s00_axi_bready, + input wire [S_ID_WIDTH-1:0] s00_axi_arid, + input wire [ADDR_WIDTH-1:0] s00_axi_araddr, + input wire [7:0] s00_axi_arlen, + input wire [2:0] s00_axi_arsize, + input wire [1:0] s00_axi_arburst, + input wire s00_axi_arlock, + input wire [3:0] s00_axi_arcache, + input wire [2:0] s00_axi_arprot, + input wire [3:0] s00_axi_arqos, + input wire [ARUSER_WIDTH-1:0] s00_axi_aruser, + input wire s00_axi_arvalid, + output wire s00_axi_arready, + output wire [S_ID_WIDTH-1:0] s00_axi_rid, + output wire [DATA_WIDTH-1:0] s00_axi_rdata, + output wire [1:0] s00_axi_rresp, + output wire s00_axi_rlast, + output wire [RUSER_WIDTH-1:0] s00_axi_ruser, + output wire s00_axi_rvalid, + input wire s00_axi_rready, + + input wire [S_ID_WIDTH-1:0] s01_axi_awid, + input wire [ADDR_WIDTH-1:0] s01_axi_awaddr, + input wire [7:0] s01_axi_awlen, + input wire [2:0] s01_axi_awsize, + input wire [1:0] s01_axi_awburst, + input wire s01_axi_awlock, + input wire [3:0] s01_axi_awcache, + input wire [2:0] s01_axi_awprot, + input wire [3:0] s01_axi_awqos, + input wire [AWUSER_WIDTH-1:0] s01_axi_awuser, + input wire s01_axi_awvalid, + output wire s01_axi_awready, + input wire [DATA_WIDTH-1:0] s01_axi_wdata, + input wire [STRB_WIDTH-1:0] s01_axi_wstrb, + input wire s01_axi_wlast, + input wire [WUSER_WIDTH-1:0] s01_axi_wuser, + input wire s01_axi_wvalid, + output wire s01_axi_wready, + output wire [S_ID_WIDTH-1:0] s01_axi_bid, + output wire [1:0] s01_axi_bresp, + output wire [BUSER_WIDTH-1:0] s01_axi_buser, + output wire s01_axi_bvalid, + input wire s01_axi_bready, + input wire [S_ID_WIDTH-1:0] s01_axi_arid, + input wire [ADDR_WIDTH-1:0] s01_axi_araddr, + input wire [7:0] s01_axi_arlen, + input wire [2:0] s01_axi_arsize, + input wire [1:0] s01_axi_arburst, + input wire s01_axi_arlock, + input wire [3:0] s01_axi_arcache, + input wire [2:0] s01_axi_arprot, + input wire [3:0] s01_axi_arqos, + input wire [ARUSER_WIDTH-1:0] s01_axi_aruser, + input wire s01_axi_arvalid, + output wire s01_axi_arready, + output wire [S_ID_WIDTH-1:0] s01_axi_rid, + output wire [DATA_WIDTH-1:0] s01_axi_rdata, + output wire [1:0] s01_axi_rresp, + output wire s01_axi_rlast, + output wire [RUSER_WIDTH-1:0] s01_axi_ruser, + output wire s01_axi_rvalid, + input wire s01_axi_rready, + + input wire [S_ID_WIDTH-1:0] s02_axi_awid, + input wire [ADDR_WIDTH-1:0] s02_axi_awaddr, + input wire [7:0] s02_axi_awlen, + input wire [2:0] s02_axi_awsize, + input wire [1:0] s02_axi_awburst, + input wire s02_axi_awlock, + input wire [3:0] s02_axi_awcache, + input wire [2:0] s02_axi_awprot, + input wire [3:0] s02_axi_awqos, + input wire [AWUSER_WIDTH-1:0] s02_axi_awuser, + input wire s02_axi_awvalid, + output wire s02_axi_awready, + input wire [DATA_WIDTH-1:0] s02_axi_wdata, + input wire [STRB_WIDTH-1:0] s02_axi_wstrb, + input wire s02_axi_wlast, + input wire [WUSER_WIDTH-1:0] s02_axi_wuser, + input wire s02_axi_wvalid, + output wire s02_axi_wready, + output wire [S_ID_WIDTH-1:0] s02_axi_bid, + output wire [1:0] s02_axi_bresp, + output wire [BUSER_WIDTH-1:0] s02_axi_buser, + output wire s02_axi_bvalid, + input wire s02_axi_bready, + input wire [S_ID_WIDTH-1:0] s02_axi_arid, + input wire [ADDR_WIDTH-1:0] s02_axi_araddr, + input wire [7:0] s02_axi_arlen, + input wire [2:0] s02_axi_arsize, + input wire [1:0] s02_axi_arburst, + input wire s02_axi_arlock, + input wire [3:0] s02_axi_arcache, + input wire [2:0] s02_axi_arprot, + input wire [3:0] s02_axi_arqos, + input wire [ARUSER_WIDTH-1:0] s02_axi_aruser, + input wire s02_axi_arvalid, + output wire s02_axi_arready, + output wire [S_ID_WIDTH-1:0] s02_axi_rid, + output wire [DATA_WIDTH-1:0] s02_axi_rdata, + output wire [1:0] s02_axi_rresp, + output wire s02_axi_rlast, + output wire [RUSER_WIDTH-1:0] s02_axi_ruser, + output wire s02_axi_rvalid, + input wire s02_axi_rready, + + input wire [S_ID_WIDTH-1:0] s03_axi_awid, + input wire [ADDR_WIDTH-1:0] s03_axi_awaddr, + input wire [7:0] s03_axi_awlen, + input wire [2:0] s03_axi_awsize, + input wire [1:0] s03_axi_awburst, + input wire s03_axi_awlock, + input wire [3:0] s03_axi_awcache, + input wire [2:0] s03_axi_awprot, + input wire [3:0] s03_axi_awqos, + input wire [AWUSER_WIDTH-1:0] s03_axi_awuser, + input wire s03_axi_awvalid, + output wire s03_axi_awready, + input wire [DATA_WIDTH-1:0] s03_axi_wdata, + input wire [STRB_WIDTH-1:0] s03_axi_wstrb, + input wire s03_axi_wlast, + input wire [WUSER_WIDTH-1:0] s03_axi_wuser, + input wire s03_axi_wvalid, + output wire s03_axi_wready, + output wire [S_ID_WIDTH-1:0] s03_axi_bid, + output wire [1:0] s03_axi_bresp, + output wire [BUSER_WIDTH-1:0] s03_axi_buser, + output wire s03_axi_bvalid, + input wire s03_axi_bready, + input wire [S_ID_WIDTH-1:0] s03_axi_arid, + input wire [ADDR_WIDTH-1:0] s03_axi_araddr, + input wire [7:0] s03_axi_arlen, + input wire [2:0] s03_axi_arsize, + input wire [1:0] s03_axi_arburst, + input wire s03_axi_arlock, + input wire [3:0] s03_axi_arcache, + input wire [2:0] s03_axi_arprot, + input wire [3:0] s03_axi_arqos, + input wire [ARUSER_WIDTH-1:0] s03_axi_aruser, + input wire s03_axi_arvalid, + output wire s03_axi_arready, + output wire [S_ID_WIDTH-1:0] s03_axi_rid, + output wire [DATA_WIDTH-1:0] s03_axi_rdata, + output wire [1:0] s03_axi_rresp, + output wire s03_axi_rlast, + output wire [RUSER_WIDTH-1:0] s03_axi_ruser, + output wire s03_axi_rvalid, + input wire s03_axi_rready, + + /* + * AXI master interface + */ + output wire [M_ID_WIDTH-1:0] m00_axi_awid, + output wire [ADDR_WIDTH-1:0] m00_axi_awaddr, + output wire [7:0] m00_axi_awlen, + output wire [2:0] m00_axi_awsize, + output wire [1:0] m00_axi_awburst, + output wire m00_axi_awlock, + output wire [3:0] m00_axi_awcache, + output wire [2:0] m00_axi_awprot, + output wire [3:0] m00_axi_awqos, + output wire [3:0] m00_axi_awregion, + output wire [AWUSER_WIDTH-1:0] m00_axi_awuser, + output wire m00_axi_awvalid, + input wire m00_axi_awready, + output wire [DATA_WIDTH-1:0] m00_axi_wdata, + output wire [STRB_WIDTH-1:0] m00_axi_wstrb, + output wire m00_axi_wlast, + output wire [WUSER_WIDTH-1:0] m00_axi_wuser, + output wire m00_axi_wvalid, + input wire m00_axi_wready, + input wire [M_ID_WIDTH-1:0] m00_axi_bid, + input wire [1:0] m00_axi_bresp, + input wire [BUSER_WIDTH-1:0] m00_axi_buser, + input wire m00_axi_bvalid, + output wire m00_axi_bready, + output wire [M_ID_WIDTH-1:0] m00_axi_arid, + output wire [ADDR_WIDTH-1:0] m00_axi_araddr, + output wire [7:0] m00_axi_arlen, + output wire [2:0] m00_axi_arsize, + output wire [1:0] m00_axi_arburst, + output wire m00_axi_arlock, + output wire [3:0] m00_axi_arcache, + output wire [2:0] m00_axi_arprot, + output wire [3:0] m00_axi_arqos, + output wire [3:0] m00_axi_arregion, + output wire [ARUSER_WIDTH-1:0] m00_axi_aruser, + output wire m00_axi_arvalid, + input wire m00_axi_arready, + input wire [M_ID_WIDTH-1:0] m00_axi_rid, + input wire [DATA_WIDTH-1:0] m00_axi_rdata, + input wire [1:0] m00_axi_rresp, + input wire m00_axi_rlast, + input wire [RUSER_WIDTH-1:0] m00_axi_ruser, + input wire m00_axi_rvalid, + output wire m00_axi_rready +); + +localparam S_COUNT = 4; +localparam M_COUNT = 1; + +// parameter sizing helpers +function [ADDR_WIDTH*M_REGIONS-1:0] w_a_r(input [ADDR_WIDTH*M_REGIONS-1:0] val); + w_a_r = val; +endfunction + +function [32*M_REGIONS-1:0] w_32_r(input [32*M_REGIONS-1:0] val); + w_32_r = val; +endfunction + +function [S_COUNT-1:0] w_s(input [S_COUNT-1:0] val); + w_s = val; +endfunction + +function [31:0] w_32(input [31:0] val); + w_32 = val; +endfunction + +function [1:0] w_2(input [1:0] val); + w_2 = val; +endfunction + +function w_1(input val); + w_1 = val; +endfunction + +axi_crossbar #( + .S_COUNT(S_COUNT), + .M_COUNT(M_COUNT), + .DATA_WIDTH(DATA_WIDTH), + .ADDR_WIDTH(ADDR_WIDTH), + .STRB_WIDTH(STRB_WIDTH), + .S_ID_WIDTH(S_ID_WIDTH), + .M_ID_WIDTH(M_ID_WIDTH), + .AWUSER_ENABLE(AWUSER_ENABLE), + .AWUSER_WIDTH(AWUSER_WIDTH), + .WUSER_ENABLE(WUSER_ENABLE), + .WUSER_WIDTH(WUSER_WIDTH), + .BUSER_ENABLE(BUSER_ENABLE), + .BUSER_WIDTH(BUSER_WIDTH), + .ARUSER_ENABLE(ARUSER_ENABLE), + .ARUSER_WIDTH(ARUSER_WIDTH), + .RUSER_ENABLE(RUSER_ENABLE), + .RUSER_WIDTH(RUSER_WIDTH), + .S_THREADS({ w_32(S03_THREADS), w_32(S02_THREADS), w_32(S01_THREADS), w_32(S00_THREADS) }), + .S_ACCEPT({ w_32(S03_ACCEPT), w_32(S02_ACCEPT), w_32(S01_ACCEPT), w_32(S00_ACCEPT) }), + .M_REGIONS(M_REGIONS), + .M_BASE_ADDR({ w_a_r(M00_BASE_ADDR) }), + .M_ADDR_WIDTH({ w_32_r(M00_ADDR_WIDTH) }), + .M_CONNECT_READ({ w_s(M00_CONNECT_READ) }), + .M_CONNECT_WRITE({ w_s(M00_CONNECT_WRITE) }), + .M_ISSUE({ w_32(M00_ISSUE) }), + .M_SECURE({ w_1(M00_SECURE) }), + .S_AR_REG_TYPE({ w_2(S03_AR_REG_TYPE), w_2(S02_AR_REG_TYPE), w_2(S01_AR_REG_TYPE), w_2(S00_AR_REG_TYPE) }), + .S_R_REG_TYPE({ w_2(S03_R_REG_TYPE), w_2(S02_R_REG_TYPE), w_2(S01_R_REG_TYPE), w_2(S00_R_REG_TYPE) }), + .S_AW_REG_TYPE({ w_2(S03_AW_REG_TYPE), w_2(S02_AW_REG_TYPE), w_2(S01_AW_REG_TYPE), w_2(S00_AW_REG_TYPE) }), + .S_W_REG_TYPE({ w_2(S03_W_REG_TYPE), w_2(S02_W_REG_TYPE), w_2(S01_W_REG_TYPE), w_2(S00_W_REG_TYPE) }), + .S_B_REG_TYPE({ w_2(S03_B_REG_TYPE), w_2(S02_B_REG_TYPE), w_2(S01_B_REG_TYPE), w_2(S00_B_REG_TYPE) }), + .M_AR_REG_TYPE({ w_2(M00_AR_REG_TYPE) }), + .M_R_REG_TYPE({ w_2(M00_R_REG_TYPE) }), + .M_AW_REG_TYPE({ w_2(M00_AW_REG_TYPE) }), + .M_W_REG_TYPE({ w_2(M00_W_REG_TYPE) }), + .M_B_REG_TYPE({ w_2(M00_B_REG_TYPE) }) +) +axi_crossbar_inst ( + .clk(clk), + .rst(rst), + .s_axi_awid({ s03_axi_awid, s02_axi_awid, s01_axi_awid, s00_axi_awid }), + .s_axi_awaddr({ s03_axi_awaddr, s02_axi_awaddr, s01_axi_awaddr, s00_axi_awaddr }), + .s_axi_awlen({ s03_axi_awlen, s02_axi_awlen, s01_axi_awlen, s00_axi_awlen }), + .s_axi_awsize({ s03_axi_awsize, s02_axi_awsize, s01_axi_awsize, s00_axi_awsize }), + .s_axi_awburst({ s03_axi_awburst, s02_axi_awburst, s01_axi_awburst, s00_axi_awburst }), + .s_axi_awlock({ s03_axi_awlock, s02_axi_awlock, s01_axi_awlock, s00_axi_awlock }), + .s_axi_awcache({ s03_axi_awcache, s02_axi_awcache, s01_axi_awcache, s00_axi_awcache }), + .s_axi_awprot({ s03_axi_awprot, s02_axi_awprot, s01_axi_awprot, s00_axi_awprot }), + .s_axi_awqos({ s03_axi_awqos, s02_axi_awqos, s01_axi_awqos, s00_axi_awqos }), + .s_axi_awuser({ s03_axi_awuser, s02_axi_awuser, s01_axi_awuser, s00_axi_awuser }), + .s_axi_awvalid({ s03_axi_awvalid, s02_axi_awvalid, s01_axi_awvalid, s00_axi_awvalid }), + .s_axi_awready({ s03_axi_awready, s02_axi_awready, s01_axi_awready, s00_axi_awready }), + .s_axi_wdata({ s03_axi_wdata, s02_axi_wdata, s01_axi_wdata, s00_axi_wdata }), + .s_axi_wstrb({ s03_axi_wstrb, s02_axi_wstrb, s01_axi_wstrb, s00_axi_wstrb }), + .s_axi_wlast({ s03_axi_wlast, s02_axi_wlast, s01_axi_wlast, s00_axi_wlast }), + .s_axi_wuser({ s03_axi_wuser, s02_axi_wuser, s01_axi_wuser, s00_axi_wuser }), + .s_axi_wvalid({ s03_axi_wvalid, s02_axi_wvalid, s01_axi_wvalid, s00_axi_wvalid }), + .s_axi_wready({ s03_axi_wready, s02_axi_wready, s01_axi_wready, s00_axi_wready }), + .s_axi_bid({ s03_axi_bid, s02_axi_bid, s01_axi_bid, s00_axi_bid }), + .s_axi_bresp({ s03_axi_bresp, s02_axi_bresp, s01_axi_bresp, s00_axi_bresp }), + .s_axi_buser({ s03_axi_buser, s02_axi_buser, s01_axi_buser, s00_axi_buser }), + .s_axi_bvalid({ s03_axi_bvalid, s02_axi_bvalid, s01_axi_bvalid, s00_axi_bvalid }), + .s_axi_bready({ s03_axi_bready, s02_axi_bready, s01_axi_bready, s00_axi_bready }), + .s_axi_arid({ s03_axi_arid, s02_axi_arid, s01_axi_arid, s00_axi_arid }), + .s_axi_araddr({ s03_axi_araddr, s02_axi_araddr, s01_axi_araddr, s00_axi_araddr }), + .s_axi_arlen({ s03_axi_arlen, s02_axi_arlen, s01_axi_arlen, s00_axi_arlen }), + .s_axi_arsize({ s03_axi_arsize, s02_axi_arsize, s01_axi_arsize, s00_axi_arsize }), + .s_axi_arburst({ s03_axi_arburst, s02_axi_arburst, s01_axi_arburst, s00_axi_arburst }), + .s_axi_arlock({ s03_axi_arlock, s02_axi_arlock, s01_axi_arlock, s00_axi_arlock }), + .s_axi_arcache({ s03_axi_arcache, s02_axi_arcache, s01_axi_arcache, s00_axi_arcache }), + .s_axi_arprot({ s03_axi_arprot, s02_axi_arprot, s01_axi_arprot, s00_axi_arprot }), + .s_axi_arqos({ s03_axi_arqos, s02_axi_arqos, s01_axi_arqos, s00_axi_arqos }), + .s_axi_aruser({ s03_axi_aruser, s02_axi_aruser, s01_axi_aruser, s00_axi_aruser }), + .s_axi_arvalid({ s03_axi_arvalid, s02_axi_arvalid, s01_axi_arvalid, s00_axi_arvalid }), + .s_axi_arready({ s03_axi_arready, s02_axi_arready, s01_axi_arready, s00_axi_arready }), + .s_axi_rid({ s03_axi_rid, s02_axi_rid, s01_axi_rid, s00_axi_rid }), + .s_axi_rdata({ s03_axi_rdata, s02_axi_rdata, s01_axi_rdata, s00_axi_rdata }), + .s_axi_rresp({ s03_axi_rresp, s02_axi_rresp, s01_axi_rresp, s00_axi_rresp }), + .s_axi_rlast({ s03_axi_rlast, s02_axi_rlast, s01_axi_rlast, s00_axi_rlast }), + .s_axi_ruser({ s03_axi_ruser, s02_axi_ruser, s01_axi_ruser, s00_axi_ruser }), + .s_axi_rvalid({ s03_axi_rvalid, s02_axi_rvalid, s01_axi_rvalid, s00_axi_rvalid }), + .s_axi_rready({ s03_axi_rready, s02_axi_rready, s01_axi_rready, s00_axi_rready }), + .m_axi_awid({ m00_axi_awid }), + .m_axi_awaddr({ m00_axi_awaddr }), + .m_axi_awlen({ m00_axi_awlen }), + .m_axi_awsize({ m00_axi_awsize }), + .m_axi_awburst({ m00_axi_awburst }), + .m_axi_awlock({ m00_axi_awlock }), + .m_axi_awcache({ m00_axi_awcache }), + .m_axi_awprot({ m00_axi_awprot }), + .m_axi_awqos({ m00_axi_awqos }), + .m_axi_awregion({ m00_axi_awregion }), + .m_axi_awuser({ m00_axi_awuser }), + .m_axi_awvalid({ m00_axi_awvalid }), + .m_axi_awready({ m00_axi_awready }), + .m_axi_wdata({ m00_axi_wdata }), + .m_axi_wstrb({ m00_axi_wstrb }), + .m_axi_wlast({ m00_axi_wlast }), + .m_axi_wuser({ m00_axi_wuser }), + .m_axi_wvalid({ m00_axi_wvalid }), + .m_axi_wready({ m00_axi_wready }), + .m_axi_bid({ m00_axi_bid }), + .m_axi_bresp({ m00_axi_bresp }), + .m_axi_buser({ m00_axi_buser }), + .m_axi_bvalid({ m00_axi_bvalid }), + .m_axi_bready({ m00_axi_bready }), + .m_axi_arid({ m00_axi_arid }), + .m_axi_araddr({ m00_axi_araddr }), + .m_axi_arlen({ m00_axi_arlen }), + .m_axi_arsize({ m00_axi_arsize }), + .m_axi_arburst({ m00_axi_arburst }), + .m_axi_arlock({ m00_axi_arlock }), + .m_axi_arcache({ m00_axi_arcache }), + .m_axi_arprot({ m00_axi_arprot }), + .m_axi_arqos({ m00_axi_arqos }), + .m_axi_arregion({ m00_axi_arregion }), + .m_axi_aruser({ m00_axi_aruser }), + .m_axi_arvalid({ m00_axi_arvalid }), + .m_axi_arready({ m00_axi_arready }), + .m_axi_rid({ m00_axi_rid }), + .m_axi_rdata({ m00_axi_rdata }), + .m_axi_rresp({ m00_axi_rresp }), + .m_axi_rlast({ m00_axi_rlast }), + .m_axi_ruser({ m00_axi_ruser }), + .m_axi_rvalid({ m00_axi_rvalid }), + .m_axi_rready({ m00_axi_rready }) +); + +endmodule + +`resetall diff --git a/xls/modules/zstd/external/axi_register_rd.v b/xls/modules/zstd/external/axi_register_rd.v new file mode 100644 index 0000000000..c0df03a03f --- /dev/null +++ b/xls/modules/zstd/external/axi_register_rd.v @@ -0,0 +1,530 @@ +/* + +Copyright (c) 2018 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 register (read) + */ +module axi_register_rd # +( + // Width of data bus in bits + parameter DATA_WIDTH = 32, + // Width of address bus in bits + parameter ADDR_WIDTH = 32, + // Width of wstrb (width of data bus in words) + parameter STRB_WIDTH = (DATA_WIDTH/8), + // Width of ID signal + parameter ID_WIDTH = 8, + // Propagate aruser signal + parameter ARUSER_ENABLE = 0, + // Width of aruser signal + parameter ARUSER_WIDTH = 1, + // Propagate ruser signal + parameter RUSER_ENABLE = 0, + // Width of ruser signal + parameter RUSER_WIDTH = 1, + // AR channel register type + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter AR_REG_TYPE = 1, + // R channel register type + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter R_REG_TYPE = 2 +) +( + input wire clk, + input wire rst, + + /* + * AXI slave interface + */ + input wire [ID_WIDTH-1:0] s_axi_arid, + input wire [ADDR_WIDTH-1:0] s_axi_araddr, + input wire [7:0] s_axi_arlen, + input wire [2:0] s_axi_arsize, + input wire [1:0] s_axi_arburst, + input wire s_axi_arlock, + input wire [3:0] s_axi_arcache, + input wire [2:0] s_axi_arprot, + input wire [3:0] s_axi_arqos, + input wire [3:0] s_axi_arregion, + input wire [ARUSER_WIDTH-1:0] s_axi_aruser, + input wire s_axi_arvalid, + output wire s_axi_arready, + output wire [ID_WIDTH-1:0] s_axi_rid, + output wire [DATA_WIDTH-1:0] s_axi_rdata, + output wire [1:0] s_axi_rresp, + output wire s_axi_rlast, + output wire [RUSER_WIDTH-1:0] s_axi_ruser, + output wire s_axi_rvalid, + input wire s_axi_rready, + + /* + * AXI master interface + */ + output wire [ID_WIDTH-1:0] m_axi_arid, + output wire [ADDR_WIDTH-1:0] m_axi_araddr, + output wire [7:0] m_axi_arlen, + output wire [2:0] m_axi_arsize, + output wire [1:0] m_axi_arburst, + output wire m_axi_arlock, + output wire [3:0] m_axi_arcache, + output wire [2:0] m_axi_arprot, + output wire [3:0] m_axi_arqos, + output wire [3:0] m_axi_arregion, + output wire [ARUSER_WIDTH-1:0] m_axi_aruser, + output wire m_axi_arvalid, + input wire m_axi_arready, + input wire [ID_WIDTH-1:0] m_axi_rid, + input wire [DATA_WIDTH-1:0] m_axi_rdata, + input wire [1:0] m_axi_rresp, + input wire m_axi_rlast, + input wire [RUSER_WIDTH-1:0] m_axi_ruser, + input wire m_axi_rvalid, + output wire m_axi_rready +); + +generate + +// AR channel + +if (AR_REG_TYPE > 1) begin +// skid buffer, no bubble cycles + +// datapath registers +reg s_axi_arready_reg = 1'b0; + +reg [ID_WIDTH-1:0] m_axi_arid_reg = {ID_WIDTH{1'b0}}; +reg [ADDR_WIDTH-1:0] m_axi_araddr_reg = {ADDR_WIDTH{1'b0}}; +reg [7:0] m_axi_arlen_reg = 8'd0; +reg [2:0] m_axi_arsize_reg = 3'd0; +reg [1:0] m_axi_arburst_reg = 2'd0; +reg m_axi_arlock_reg = 1'b0; +reg [3:0] m_axi_arcache_reg = 4'd0; +reg [2:0] m_axi_arprot_reg = 3'd0; +reg [3:0] m_axi_arqos_reg = 4'd0; +reg [3:0] m_axi_arregion_reg = 4'd0; +reg [ARUSER_WIDTH-1:0] m_axi_aruser_reg = {ARUSER_WIDTH{1'b0}}; +reg m_axi_arvalid_reg = 1'b0, m_axi_arvalid_next; + +reg [ID_WIDTH-1:0] temp_m_axi_arid_reg = {ID_WIDTH{1'b0}}; +reg [ADDR_WIDTH-1:0] temp_m_axi_araddr_reg = {ADDR_WIDTH{1'b0}}; +reg [7:0] temp_m_axi_arlen_reg = 8'd0; +reg [2:0] temp_m_axi_arsize_reg = 3'd0; +reg [1:0] temp_m_axi_arburst_reg = 2'd0; +reg temp_m_axi_arlock_reg = 1'b0; +reg [3:0] temp_m_axi_arcache_reg = 4'd0; +reg [2:0] temp_m_axi_arprot_reg = 3'd0; +reg [3:0] temp_m_axi_arqos_reg = 4'd0; +reg [3:0] temp_m_axi_arregion_reg = 4'd0; +reg [ARUSER_WIDTH-1:0] temp_m_axi_aruser_reg = {ARUSER_WIDTH{1'b0}}; +reg temp_m_axi_arvalid_reg = 1'b0, temp_m_axi_arvalid_next; + +// datapath control +reg store_axi_ar_input_to_output; +reg store_axi_ar_input_to_temp; +reg store_axi_ar_temp_to_output; + +assign s_axi_arready = s_axi_arready_reg; + +assign m_axi_arid = m_axi_arid_reg; +assign m_axi_araddr = m_axi_araddr_reg; +assign m_axi_arlen = m_axi_arlen_reg; +assign m_axi_arsize = m_axi_arsize_reg; +assign m_axi_arburst = m_axi_arburst_reg; +assign m_axi_arlock = m_axi_arlock_reg; +assign m_axi_arcache = m_axi_arcache_reg; +assign m_axi_arprot = m_axi_arprot_reg; +assign m_axi_arqos = m_axi_arqos_reg; +assign m_axi_arregion = m_axi_arregion_reg; +assign m_axi_aruser = ARUSER_ENABLE ? m_axi_aruser_reg : {ARUSER_WIDTH{1'b0}}; +assign m_axi_arvalid = m_axi_arvalid_reg; + +// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) +wire s_axi_arready_early = m_axi_arready | (~temp_m_axi_arvalid_reg & (~m_axi_arvalid_reg | ~s_axi_arvalid)); + +always @* begin + // transfer sink ready state to source + m_axi_arvalid_next = m_axi_arvalid_reg; + temp_m_axi_arvalid_next = temp_m_axi_arvalid_reg; + + store_axi_ar_input_to_output = 1'b0; + store_axi_ar_input_to_temp = 1'b0; + store_axi_ar_temp_to_output = 1'b0; + + if (s_axi_arready_reg) begin + // input is ready + if (m_axi_arready | ~m_axi_arvalid_reg) begin + // output is ready or currently not valid, transfer data to output + m_axi_arvalid_next = s_axi_arvalid; + store_axi_ar_input_to_output = 1'b1; + end else begin + // output is not ready, store input in temp + temp_m_axi_arvalid_next = s_axi_arvalid; + store_axi_ar_input_to_temp = 1'b1; + end + end else if (m_axi_arready) begin + // input is not ready, but output is ready + m_axi_arvalid_next = temp_m_axi_arvalid_reg; + temp_m_axi_arvalid_next = 1'b0; + store_axi_ar_temp_to_output = 1'b1; + end +end + +always @(posedge clk) begin + if (rst) begin + s_axi_arready_reg <= 1'b0; + m_axi_arvalid_reg <= 1'b0; + temp_m_axi_arvalid_reg <= 1'b0; + end else begin + s_axi_arready_reg <= s_axi_arready_early; + m_axi_arvalid_reg <= m_axi_arvalid_next; + temp_m_axi_arvalid_reg <= temp_m_axi_arvalid_next; + end + + // datapath + if (store_axi_ar_input_to_output) begin + m_axi_arid_reg <= s_axi_arid; + m_axi_araddr_reg <= s_axi_araddr; + m_axi_arlen_reg <= s_axi_arlen; + m_axi_arsize_reg <= s_axi_arsize; + m_axi_arburst_reg <= s_axi_arburst; + m_axi_arlock_reg <= s_axi_arlock; + m_axi_arcache_reg <= s_axi_arcache; + m_axi_arprot_reg <= s_axi_arprot; + m_axi_arqos_reg <= s_axi_arqos; + m_axi_arregion_reg <= s_axi_arregion; + m_axi_aruser_reg <= s_axi_aruser; + end else if (store_axi_ar_temp_to_output) begin + m_axi_arid_reg <= temp_m_axi_arid_reg; + m_axi_araddr_reg <= temp_m_axi_araddr_reg; + m_axi_arlen_reg <= temp_m_axi_arlen_reg; + m_axi_arsize_reg <= temp_m_axi_arsize_reg; + m_axi_arburst_reg <= temp_m_axi_arburst_reg; + m_axi_arlock_reg <= temp_m_axi_arlock_reg; + m_axi_arcache_reg <= temp_m_axi_arcache_reg; + m_axi_arprot_reg <= temp_m_axi_arprot_reg; + m_axi_arqos_reg <= temp_m_axi_arqos_reg; + m_axi_arregion_reg <= temp_m_axi_arregion_reg; + m_axi_aruser_reg <= temp_m_axi_aruser_reg; + end + + if (store_axi_ar_input_to_temp) begin + temp_m_axi_arid_reg <= s_axi_arid; + temp_m_axi_araddr_reg <= s_axi_araddr; + temp_m_axi_arlen_reg <= s_axi_arlen; + temp_m_axi_arsize_reg <= s_axi_arsize; + temp_m_axi_arburst_reg <= s_axi_arburst; + temp_m_axi_arlock_reg <= s_axi_arlock; + temp_m_axi_arcache_reg <= s_axi_arcache; + temp_m_axi_arprot_reg <= s_axi_arprot; + temp_m_axi_arqos_reg <= s_axi_arqos; + temp_m_axi_arregion_reg <= s_axi_arregion; + temp_m_axi_aruser_reg <= s_axi_aruser; + end +end + +end else if (AR_REG_TYPE == 1) begin +// simple register, inserts bubble cycles + +// datapath registers +reg s_axi_arready_reg = 1'b0; + +reg [ID_WIDTH-1:0] m_axi_arid_reg = {ID_WIDTH{1'b0}}; +reg [ADDR_WIDTH-1:0] m_axi_araddr_reg = {ADDR_WIDTH{1'b0}}; +reg [7:0] m_axi_arlen_reg = 8'd0; +reg [2:0] m_axi_arsize_reg = 3'd0; +reg [1:0] m_axi_arburst_reg = 2'd0; +reg m_axi_arlock_reg = 1'b0; +reg [3:0] m_axi_arcache_reg = 4'd0; +reg [2:0] m_axi_arprot_reg = 3'd0; +reg [3:0] m_axi_arqos_reg = 4'd0; +reg [3:0] m_axi_arregion_reg = 4'd0; +reg [ARUSER_WIDTH-1:0] m_axi_aruser_reg = {ARUSER_WIDTH{1'b0}}; +reg m_axi_arvalid_reg = 1'b0, m_axi_arvalid_next; + +// datapath control +reg store_axi_ar_input_to_output; + +assign s_axi_arready = s_axi_arready_reg; + +assign m_axi_arid = m_axi_arid_reg; +assign m_axi_araddr = m_axi_araddr_reg; +assign m_axi_arlen = m_axi_arlen_reg; +assign m_axi_arsize = m_axi_arsize_reg; +assign m_axi_arburst = m_axi_arburst_reg; +assign m_axi_arlock = m_axi_arlock_reg; +assign m_axi_arcache = m_axi_arcache_reg; +assign m_axi_arprot = m_axi_arprot_reg; +assign m_axi_arqos = m_axi_arqos_reg; +assign m_axi_arregion = m_axi_arregion_reg; +assign m_axi_aruser = ARUSER_ENABLE ? m_axi_aruser_reg : {ARUSER_WIDTH{1'b0}}; +assign m_axi_arvalid = m_axi_arvalid_reg; + +// enable ready input next cycle if output buffer will be empty +wire s_axi_arready_early = !m_axi_arvalid_next; + +always @* begin + // transfer sink ready state to source + m_axi_arvalid_next = m_axi_arvalid_reg; + + store_axi_ar_input_to_output = 1'b0; + + if (s_axi_arready_reg) begin + m_axi_arvalid_next = s_axi_arvalid; + store_axi_ar_input_to_output = 1'b1; + end else if (m_axi_arready) begin + m_axi_arvalid_next = 1'b0; + end +end + +always @(posedge clk) begin + if (rst) begin + s_axi_arready_reg <= 1'b0; + m_axi_arvalid_reg <= 1'b0; + end else begin + s_axi_arready_reg <= s_axi_arready_early; + m_axi_arvalid_reg <= m_axi_arvalid_next; + end + + // datapath + if (store_axi_ar_input_to_output) begin + m_axi_arid_reg <= s_axi_arid; + m_axi_araddr_reg <= s_axi_araddr; + m_axi_arlen_reg <= s_axi_arlen; + m_axi_arsize_reg <= s_axi_arsize; + m_axi_arburst_reg <= s_axi_arburst; + m_axi_arlock_reg <= s_axi_arlock; + m_axi_arcache_reg <= s_axi_arcache; + m_axi_arprot_reg <= s_axi_arprot; + m_axi_arqos_reg <= s_axi_arqos; + m_axi_arregion_reg <= s_axi_arregion; + m_axi_aruser_reg <= s_axi_aruser; + end +end + +end else begin + + // bypass AR channel + assign m_axi_arid = s_axi_arid; + assign m_axi_araddr = s_axi_araddr; + assign m_axi_arlen = s_axi_arlen; + assign m_axi_arsize = s_axi_arsize; + assign m_axi_arburst = s_axi_arburst; + assign m_axi_arlock = s_axi_arlock; + assign m_axi_arcache = s_axi_arcache; + assign m_axi_arprot = s_axi_arprot; + assign m_axi_arqos = s_axi_arqos; + assign m_axi_arregion = s_axi_arregion; + assign m_axi_aruser = ARUSER_ENABLE ? s_axi_aruser : {ARUSER_WIDTH{1'b0}}; + assign m_axi_arvalid = s_axi_arvalid; + assign s_axi_arready = m_axi_arready; + +end + +// R channel + +if (R_REG_TYPE > 1) begin +// skid buffer, no bubble cycles + +// datapath registers +reg m_axi_rready_reg = 1'b0; + +reg [ID_WIDTH-1:0] s_axi_rid_reg = {ID_WIDTH{1'b0}}; +reg [DATA_WIDTH-1:0] s_axi_rdata_reg = {DATA_WIDTH{1'b0}}; +reg [1:0] s_axi_rresp_reg = 2'b0; +reg s_axi_rlast_reg = 1'b0; +reg [RUSER_WIDTH-1:0] s_axi_ruser_reg = {RUSER_WIDTH{1'b0}}; +reg s_axi_rvalid_reg = 1'b0, s_axi_rvalid_next; + +reg [ID_WIDTH-1:0] temp_s_axi_rid_reg = {ID_WIDTH{1'b0}}; +reg [DATA_WIDTH-1:0] temp_s_axi_rdata_reg = {DATA_WIDTH{1'b0}}; +reg [1:0] temp_s_axi_rresp_reg = 2'b0; +reg temp_s_axi_rlast_reg = 1'b0; +reg [RUSER_WIDTH-1:0] temp_s_axi_ruser_reg = {RUSER_WIDTH{1'b0}}; +reg temp_s_axi_rvalid_reg = 1'b0, temp_s_axi_rvalid_next; + +// datapath control +reg store_axi_r_input_to_output; +reg store_axi_r_input_to_temp; +reg store_axi_r_temp_to_output; + +assign m_axi_rready = m_axi_rready_reg; + +assign s_axi_rid = s_axi_rid_reg; +assign s_axi_rdata = s_axi_rdata_reg; +assign s_axi_rresp = s_axi_rresp_reg; +assign s_axi_rlast = s_axi_rlast_reg; +assign s_axi_ruser = RUSER_ENABLE ? s_axi_ruser_reg : {RUSER_WIDTH{1'b0}}; +assign s_axi_rvalid = s_axi_rvalid_reg; + +// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) +wire m_axi_rready_early = s_axi_rready | (~temp_s_axi_rvalid_reg & (~s_axi_rvalid_reg | ~m_axi_rvalid)); + +always @* begin + // transfer sink ready state to source + s_axi_rvalid_next = s_axi_rvalid_reg; + temp_s_axi_rvalid_next = temp_s_axi_rvalid_reg; + + store_axi_r_input_to_output = 1'b0; + store_axi_r_input_to_temp = 1'b0; + store_axi_r_temp_to_output = 1'b0; + + if (m_axi_rready_reg) begin + // input is ready + if (s_axi_rready | ~s_axi_rvalid_reg) begin + // output is ready or currently not valid, transfer data to output + s_axi_rvalid_next = m_axi_rvalid; + store_axi_r_input_to_output = 1'b1; + end else begin + // output is not ready, store input in temp + temp_s_axi_rvalid_next = m_axi_rvalid; + store_axi_r_input_to_temp = 1'b1; + end + end else if (s_axi_rready) begin + // input is not ready, but output is ready + s_axi_rvalid_next = temp_s_axi_rvalid_reg; + temp_s_axi_rvalid_next = 1'b0; + store_axi_r_temp_to_output = 1'b1; + end +end + +always @(posedge clk) begin + if (rst) begin + m_axi_rready_reg <= 1'b0; + s_axi_rvalid_reg <= 1'b0; + temp_s_axi_rvalid_reg <= 1'b0; + end else begin + m_axi_rready_reg <= m_axi_rready_early; + s_axi_rvalid_reg <= s_axi_rvalid_next; + temp_s_axi_rvalid_reg <= temp_s_axi_rvalid_next; + end + + // datapath + if (store_axi_r_input_to_output) begin + s_axi_rid_reg <= m_axi_rid; + s_axi_rdata_reg <= m_axi_rdata; + s_axi_rresp_reg <= m_axi_rresp; + s_axi_rlast_reg <= m_axi_rlast; + s_axi_ruser_reg <= m_axi_ruser; + end else if (store_axi_r_temp_to_output) begin + s_axi_rid_reg <= temp_s_axi_rid_reg; + s_axi_rdata_reg <= temp_s_axi_rdata_reg; + s_axi_rresp_reg <= temp_s_axi_rresp_reg; + s_axi_rlast_reg <= temp_s_axi_rlast_reg; + s_axi_ruser_reg <= temp_s_axi_ruser_reg; + end + + if (store_axi_r_input_to_temp) begin + temp_s_axi_rid_reg <= m_axi_rid; + temp_s_axi_rdata_reg <= m_axi_rdata; + temp_s_axi_rresp_reg <= m_axi_rresp; + temp_s_axi_rlast_reg <= m_axi_rlast; + temp_s_axi_ruser_reg <= m_axi_ruser; + end +end + +end else if (R_REG_TYPE == 1) begin +// simple register, inserts bubble cycles + +// datapath registers +reg m_axi_rready_reg = 1'b0; + +reg [ID_WIDTH-1:0] s_axi_rid_reg = {ID_WIDTH{1'b0}}; +reg [DATA_WIDTH-1:0] s_axi_rdata_reg = {DATA_WIDTH{1'b0}}; +reg [1:0] s_axi_rresp_reg = 2'b0; +reg s_axi_rlast_reg = 1'b0; +reg [RUSER_WIDTH-1:0] s_axi_ruser_reg = {RUSER_WIDTH{1'b0}}; +reg s_axi_rvalid_reg = 1'b0, s_axi_rvalid_next; + +// datapath control +reg store_axi_r_input_to_output; + +assign m_axi_rready = m_axi_rready_reg; + +assign s_axi_rid = s_axi_rid_reg; +assign s_axi_rdata = s_axi_rdata_reg; +assign s_axi_rresp = s_axi_rresp_reg; +assign s_axi_rlast = s_axi_rlast_reg; +assign s_axi_ruser = RUSER_ENABLE ? s_axi_ruser_reg : {RUSER_WIDTH{1'b0}}; +assign s_axi_rvalid = s_axi_rvalid_reg; + +// enable ready input next cycle if output buffer will be empty +wire m_axi_rready_early = !s_axi_rvalid_next; + +always @* begin + // transfer sink ready state to source + s_axi_rvalid_next = s_axi_rvalid_reg; + + store_axi_r_input_to_output = 1'b0; + + if (m_axi_rready_reg) begin + s_axi_rvalid_next = m_axi_rvalid; + store_axi_r_input_to_output = 1'b1; + end else if (s_axi_rready) begin + s_axi_rvalid_next = 1'b0; + end +end + +always @(posedge clk) begin + if (rst) begin + m_axi_rready_reg <= 1'b0; + s_axi_rvalid_reg <= 1'b0; + end else begin + m_axi_rready_reg <= m_axi_rready_early; + s_axi_rvalid_reg <= s_axi_rvalid_next; + end + + // datapath + if (store_axi_r_input_to_output) begin + s_axi_rid_reg <= m_axi_rid; + s_axi_rdata_reg <= m_axi_rdata; + s_axi_rresp_reg <= m_axi_rresp; + s_axi_rlast_reg <= m_axi_rlast; + s_axi_ruser_reg <= m_axi_ruser; + end +end + +end else begin + + // bypass R channel + assign s_axi_rid = m_axi_rid; + assign s_axi_rdata = m_axi_rdata; + assign s_axi_rresp = m_axi_rresp; + assign s_axi_rlast = m_axi_rlast; + assign s_axi_ruser = RUSER_ENABLE ? m_axi_ruser : {RUSER_WIDTH{1'b0}}; + assign s_axi_rvalid = m_axi_rvalid; + assign m_axi_rready = s_axi_rready; + +end + +endgenerate + +endmodule + +`resetall diff --git a/xls/modules/zstd/external/axi_register_wr.v b/xls/modules/zstd/external/axi_register_wr.v new file mode 100644 index 0000000000..9176d6ba95 --- /dev/null +++ b/xls/modules/zstd/external/axi_register_wr.v @@ -0,0 +1,691 @@ +/* + +Copyright (c) 2018 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * AXI4 register (write) + */ +module axi_register_wr # +( + // Width of data bus in bits + parameter DATA_WIDTH = 32, + // Width of address bus in bits + parameter ADDR_WIDTH = 32, + // Width of wstrb (width of data bus in words) + parameter STRB_WIDTH = (DATA_WIDTH/8), + // Width of ID signal + parameter ID_WIDTH = 8, + // Propagate awuser signal + parameter AWUSER_ENABLE = 0, + // Width of awuser signal + parameter AWUSER_WIDTH = 1, + // Propagate wuser signal + parameter WUSER_ENABLE = 0, + // Width of wuser signal + parameter WUSER_WIDTH = 1, + // Propagate buser signal + parameter BUSER_ENABLE = 0, + // Width of buser signal + parameter BUSER_WIDTH = 1, + // AW channel register type + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter AW_REG_TYPE = 1, + // W channel register type + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter W_REG_TYPE = 2, + // B channel register type + // 0 to bypass, 1 for simple buffer, 2 for skid buffer + parameter B_REG_TYPE = 1 +) +( + input wire clk, + input wire rst, + + /* + * AXI slave interface + */ + input wire [ID_WIDTH-1:0] s_axi_awid, + input wire [ADDR_WIDTH-1:0] s_axi_awaddr, + input wire [7:0] s_axi_awlen, + input wire [2:0] s_axi_awsize, + input wire [1:0] s_axi_awburst, + input wire s_axi_awlock, + input wire [3:0] s_axi_awcache, + input wire [2:0] s_axi_awprot, + input wire [3:0] s_axi_awqos, + input wire [3:0] s_axi_awregion, + input wire [AWUSER_WIDTH-1:0] s_axi_awuser, + input wire s_axi_awvalid, + output wire s_axi_awready, + input wire [DATA_WIDTH-1:0] s_axi_wdata, + input wire [STRB_WIDTH-1:0] s_axi_wstrb, + input wire s_axi_wlast, + input wire [WUSER_WIDTH-1:0] s_axi_wuser, + input wire s_axi_wvalid, + output wire s_axi_wready, + output wire [ID_WIDTH-1:0] s_axi_bid, + output wire [1:0] s_axi_bresp, + output wire [BUSER_WIDTH-1:0] s_axi_buser, + output wire s_axi_bvalid, + input wire s_axi_bready, + + /* + * AXI master interface + */ + output wire [ID_WIDTH-1:0] m_axi_awid, + output wire [ADDR_WIDTH-1:0] m_axi_awaddr, + output wire [7:0] m_axi_awlen, + output wire [2:0] m_axi_awsize, + output wire [1:0] m_axi_awburst, + output wire m_axi_awlock, + output wire [3:0] m_axi_awcache, + output wire [2:0] m_axi_awprot, + output wire [3:0] m_axi_awqos, + output wire [3:0] m_axi_awregion, + output wire [AWUSER_WIDTH-1:0] m_axi_awuser, + output wire m_axi_awvalid, + input wire m_axi_awready, + output wire [DATA_WIDTH-1:0] m_axi_wdata, + output wire [STRB_WIDTH-1:0] m_axi_wstrb, + output wire m_axi_wlast, + output wire [WUSER_WIDTH-1:0] m_axi_wuser, + output wire m_axi_wvalid, + input wire m_axi_wready, + input wire [ID_WIDTH-1:0] m_axi_bid, + input wire [1:0] m_axi_bresp, + input wire [BUSER_WIDTH-1:0] m_axi_buser, + input wire m_axi_bvalid, + output wire m_axi_bready +); + +generate + +// AW channel + +if (AW_REG_TYPE > 1) begin +// skid buffer, no bubble cycles + +// datapath registers +reg s_axi_awready_reg = 1'b0; + +reg [ID_WIDTH-1:0] m_axi_awid_reg = {ID_WIDTH{1'b0}}; +reg [ADDR_WIDTH-1:0] m_axi_awaddr_reg = {ADDR_WIDTH{1'b0}}; +reg [7:0] m_axi_awlen_reg = 8'd0; +reg [2:0] m_axi_awsize_reg = 3'd0; +reg [1:0] m_axi_awburst_reg = 2'd0; +reg m_axi_awlock_reg = 1'b0; +reg [3:0] m_axi_awcache_reg = 4'd0; +reg [2:0] m_axi_awprot_reg = 3'd0; +reg [3:0] m_axi_awqos_reg = 4'd0; +reg [3:0] m_axi_awregion_reg = 4'd0; +reg [AWUSER_WIDTH-1:0] m_axi_awuser_reg = {AWUSER_WIDTH{1'b0}}; +reg m_axi_awvalid_reg = 1'b0, m_axi_awvalid_next; + +reg [ID_WIDTH-1:0] temp_m_axi_awid_reg = {ID_WIDTH{1'b0}}; +reg [ADDR_WIDTH-1:0] temp_m_axi_awaddr_reg = {ADDR_WIDTH{1'b0}}; +reg [7:0] temp_m_axi_awlen_reg = 8'd0; +reg [2:0] temp_m_axi_awsize_reg = 3'd0; +reg [1:0] temp_m_axi_awburst_reg = 2'd0; +reg temp_m_axi_awlock_reg = 1'b0; +reg [3:0] temp_m_axi_awcache_reg = 4'd0; +reg [2:0] temp_m_axi_awprot_reg = 3'd0; +reg [3:0] temp_m_axi_awqos_reg = 4'd0; +reg [3:0] temp_m_axi_awregion_reg = 4'd0; +reg [AWUSER_WIDTH-1:0] temp_m_axi_awuser_reg = {AWUSER_WIDTH{1'b0}}; +reg temp_m_axi_awvalid_reg = 1'b0, temp_m_axi_awvalid_next; + +// datapath control +reg store_axi_aw_input_to_output; +reg store_axi_aw_input_to_temp; +reg store_axi_aw_temp_to_output; + +assign s_axi_awready = s_axi_awready_reg; + +assign m_axi_awid = m_axi_awid_reg; +assign m_axi_awaddr = m_axi_awaddr_reg; +assign m_axi_awlen = m_axi_awlen_reg; +assign m_axi_awsize = m_axi_awsize_reg; +assign m_axi_awburst = m_axi_awburst_reg; +assign m_axi_awlock = m_axi_awlock_reg; +assign m_axi_awcache = m_axi_awcache_reg; +assign m_axi_awprot = m_axi_awprot_reg; +assign m_axi_awqos = m_axi_awqos_reg; +assign m_axi_awregion = m_axi_awregion_reg; +assign m_axi_awuser = AWUSER_ENABLE ? m_axi_awuser_reg : {AWUSER_WIDTH{1'b0}}; +assign m_axi_awvalid = m_axi_awvalid_reg; + +// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) +wire s_axi_awready_early = m_axi_awready | (~temp_m_axi_awvalid_reg & (~m_axi_awvalid_reg | ~s_axi_awvalid)); + +always @* begin + // transfer sink ready state to source + m_axi_awvalid_next = m_axi_awvalid_reg; + temp_m_axi_awvalid_next = temp_m_axi_awvalid_reg; + + store_axi_aw_input_to_output = 1'b0; + store_axi_aw_input_to_temp = 1'b0; + store_axi_aw_temp_to_output = 1'b0; + + if (s_axi_awready_reg) begin + // input is ready + if (m_axi_awready | ~m_axi_awvalid_reg) begin + // output is ready or currently not valid, transfer data to output + m_axi_awvalid_next = s_axi_awvalid; + store_axi_aw_input_to_output = 1'b1; + end else begin + // output is not ready, store input in temp + temp_m_axi_awvalid_next = s_axi_awvalid; + store_axi_aw_input_to_temp = 1'b1; + end + end else if (m_axi_awready) begin + // input is not ready, but output is ready + m_axi_awvalid_next = temp_m_axi_awvalid_reg; + temp_m_axi_awvalid_next = 1'b0; + store_axi_aw_temp_to_output = 1'b1; + end +end + +always @(posedge clk) begin + if (rst) begin + s_axi_awready_reg <= 1'b0; + m_axi_awvalid_reg <= 1'b0; + temp_m_axi_awvalid_reg <= 1'b0; + end else begin + s_axi_awready_reg <= s_axi_awready_early; + m_axi_awvalid_reg <= m_axi_awvalid_next; + temp_m_axi_awvalid_reg <= temp_m_axi_awvalid_next; + end + + // datapath + if (store_axi_aw_input_to_output) begin + m_axi_awid_reg <= s_axi_awid; + m_axi_awaddr_reg <= s_axi_awaddr; + m_axi_awlen_reg <= s_axi_awlen; + m_axi_awsize_reg <= s_axi_awsize; + m_axi_awburst_reg <= s_axi_awburst; + m_axi_awlock_reg <= s_axi_awlock; + m_axi_awcache_reg <= s_axi_awcache; + m_axi_awprot_reg <= s_axi_awprot; + m_axi_awqos_reg <= s_axi_awqos; + m_axi_awregion_reg <= s_axi_awregion; + m_axi_awuser_reg <= s_axi_awuser; + end else if (store_axi_aw_temp_to_output) begin + m_axi_awid_reg <= temp_m_axi_awid_reg; + m_axi_awaddr_reg <= temp_m_axi_awaddr_reg; + m_axi_awlen_reg <= temp_m_axi_awlen_reg; + m_axi_awsize_reg <= temp_m_axi_awsize_reg; + m_axi_awburst_reg <= temp_m_axi_awburst_reg; + m_axi_awlock_reg <= temp_m_axi_awlock_reg; + m_axi_awcache_reg <= temp_m_axi_awcache_reg; + m_axi_awprot_reg <= temp_m_axi_awprot_reg; + m_axi_awqos_reg <= temp_m_axi_awqos_reg; + m_axi_awregion_reg <= temp_m_axi_awregion_reg; + m_axi_awuser_reg <= temp_m_axi_awuser_reg; + end + + if (store_axi_aw_input_to_temp) begin + temp_m_axi_awid_reg <= s_axi_awid; + temp_m_axi_awaddr_reg <= s_axi_awaddr; + temp_m_axi_awlen_reg <= s_axi_awlen; + temp_m_axi_awsize_reg <= s_axi_awsize; + temp_m_axi_awburst_reg <= s_axi_awburst; + temp_m_axi_awlock_reg <= s_axi_awlock; + temp_m_axi_awcache_reg <= s_axi_awcache; + temp_m_axi_awprot_reg <= s_axi_awprot; + temp_m_axi_awqos_reg <= s_axi_awqos; + temp_m_axi_awregion_reg <= s_axi_awregion; + temp_m_axi_awuser_reg <= s_axi_awuser; + end +end + +end else if (AW_REG_TYPE == 1) begin +// simple register, inserts bubble cycles + +// datapath registers +reg s_axi_awready_reg = 1'b0; + +reg [ID_WIDTH-1:0] m_axi_awid_reg = {ID_WIDTH{1'b0}}; +reg [ADDR_WIDTH-1:0] m_axi_awaddr_reg = {ADDR_WIDTH{1'b0}}; +reg [7:0] m_axi_awlen_reg = 8'd0; +reg [2:0] m_axi_awsize_reg = 3'd0; +reg [1:0] m_axi_awburst_reg = 2'd0; +reg m_axi_awlock_reg = 1'b0; +reg [3:0] m_axi_awcache_reg = 4'd0; +reg [2:0] m_axi_awprot_reg = 3'd0; +reg [3:0] m_axi_awqos_reg = 4'd0; +reg [3:0] m_axi_awregion_reg = 4'd0; +reg [AWUSER_WIDTH-1:0] m_axi_awuser_reg = {AWUSER_WIDTH{1'b0}}; +reg m_axi_awvalid_reg = 1'b0, m_axi_awvalid_next; + +// datapath control +reg store_axi_aw_input_to_output; + +assign s_axi_awready = s_axi_awready_reg; + +assign m_axi_awid = m_axi_awid_reg; +assign m_axi_awaddr = m_axi_awaddr_reg; +assign m_axi_awlen = m_axi_awlen_reg; +assign m_axi_awsize = m_axi_awsize_reg; +assign m_axi_awburst = m_axi_awburst_reg; +assign m_axi_awlock = m_axi_awlock_reg; +assign m_axi_awcache = m_axi_awcache_reg; +assign m_axi_awprot = m_axi_awprot_reg; +assign m_axi_awqos = m_axi_awqos_reg; +assign m_axi_awregion = m_axi_awregion_reg; +assign m_axi_awuser = AWUSER_ENABLE ? m_axi_awuser_reg : {AWUSER_WIDTH{1'b0}}; +assign m_axi_awvalid = m_axi_awvalid_reg; + +// enable ready input next cycle if output buffer will be empty +wire s_axi_awready_eawly = !m_axi_awvalid_next; + +always @* begin + // transfer sink ready state to source + m_axi_awvalid_next = m_axi_awvalid_reg; + + store_axi_aw_input_to_output = 1'b0; + + if (s_axi_awready_reg) begin + m_axi_awvalid_next = s_axi_awvalid; + store_axi_aw_input_to_output = 1'b1; + end else if (m_axi_awready) begin + m_axi_awvalid_next = 1'b0; + end +end + +always @(posedge clk) begin + if (rst) begin + s_axi_awready_reg <= 1'b0; + m_axi_awvalid_reg <= 1'b0; + end else begin + s_axi_awready_reg <= s_axi_awready_eawly; + m_axi_awvalid_reg <= m_axi_awvalid_next; + end + + // datapath + if (store_axi_aw_input_to_output) begin + m_axi_awid_reg <= s_axi_awid; + m_axi_awaddr_reg <= s_axi_awaddr; + m_axi_awlen_reg <= s_axi_awlen; + m_axi_awsize_reg <= s_axi_awsize; + m_axi_awburst_reg <= s_axi_awburst; + m_axi_awlock_reg <= s_axi_awlock; + m_axi_awcache_reg <= s_axi_awcache; + m_axi_awprot_reg <= s_axi_awprot; + m_axi_awqos_reg <= s_axi_awqos; + m_axi_awregion_reg <= s_axi_awregion; + m_axi_awuser_reg <= s_axi_awuser; + end +end + +end else begin + + // bypass AW channel + assign m_axi_awid = s_axi_awid; + assign m_axi_awaddr = s_axi_awaddr; + assign m_axi_awlen = s_axi_awlen; + assign m_axi_awsize = s_axi_awsize; + assign m_axi_awburst = s_axi_awburst; + assign m_axi_awlock = s_axi_awlock; + assign m_axi_awcache = s_axi_awcache; + assign m_axi_awprot = s_axi_awprot; + assign m_axi_awqos = s_axi_awqos; + assign m_axi_awregion = s_axi_awregion; + assign m_axi_awuser = AWUSER_ENABLE ? s_axi_awuser : {AWUSER_WIDTH{1'b0}}; + assign m_axi_awvalid = s_axi_awvalid; + assign s_axi_awready = m_axi_awready; + +end + +// W channel + +if (W_REG_TYPE > 1) begin +// skid buffer, no bubble cycles + +// datapath registers +reg s_axi_wready_reg = 1'b0; + +reg [DATA_WIDTH-1:0] m_axi_wdata_reg = {DATA_WIDTH{1'b0}}; +reg [STRB_WIDTH-1:0] m_axi_wstrb_reg = {STRB_WIDTH{1'b0}}; +reg m_axi_wlast_reg = 1'b0; +reg [WUSER_WIDTH-1:0] m_axi_wuser_reg = {WUSER_WIDTH{1'b0}}; +reg m_axi_wvalid_reg = 1'b0, m_axi_wvalid_next; + +reg [DATA_WIDTH-1:0] temp_m_axi_wdata_reg = {DATA_WIDTH{1'b0}}; +reg [STRB_WIDTH-1:0] temp_m_axi_wstrb_reg = {STRB_WIDTH{1'b0}}; +reg temp_m_axi_wlast_reg = 1'b0; +reg [WUSER_WIDTH-1:0] temp_m_axi_wuser_reg = {WUSER_WIDTH{1'b0}}; +reg temp_m_axi_wvalid_reg = 1'b0, temp_m_axi_wvalid_next; + +// datapath control +reg store_axi_w_input_to_output; +reg store_axi_w_input_to_temp; +reg store_axi_w_temp_to_output; + +assign s_axi_wready = s_axi_wready_reg; + +assign m_axi_wdata = m_axi_wdata_reg; +assign m_axi_wstrb = m_axi_wstrb_reg; +assign m_axi_wlast = m_axi_wlast_reg; +assign m_axi_wuser = WUSER_ENABLE ? m_axi_wuser_reg : {WUSER_WIDTH{1'b0}}; +assign m_axi_wvalid = m_axi_wvalid_reg; + +// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) +wire s_axi_wready_early = m_axi_wready | (~temp_m_axi_wvalid_reg & (~m_axi_wvalid_reg | ~s_axi_wvalid)); + +always @* begin + // transfer sink ready state to source + m_axi_wvalid_next = m_axi_wvalid_reg; + temp_m_axi_wvalid_next = temp_m_axi_wvalid_reg; + + store_axi_w_input_to_output = 1'b0; + store_axi_w_input_to_temp = 1'b0; + store_axi_w_temp_to_output = 1'b0; + + if (s_axi_wready_reg) begin + // input is ready + if (m_axi_wready | ~m_axi_wvalid_reg) begin + // output is ready or currently not valid, transfer data to output + m_axi_wvalid_next = s_axi_wvalid; + store_axi_w_input_to_output = 1'b1; + end else begin + // output is not ready, store input in temp + temp_m_axi_wvalid_next = s_axi_wvalid; + store_axi_w_input_to_temp = 1'b1; + end + end else if (m_axi_wready) begin + // input is not ready, but output is ready + m_axi_wvalid_next = temp_m_axi_wvalid_reg; + temp_m_axi_wvalid_next = 1'b0; + store_axi_w_temp_to_output = 1'b1; + end +end + +always @(posedge clk) begin + if (rst) begin + s_axi_wready_reg <= 1'b0; + m_axi_wvalid_reg <= 1'b0; + temp_m_axi_wvalid_reg <= 1'b0; + end else begin + s_axi_wready_reg <= s_axi_wready_early; + m_axi_wvalid_reg <= m_axi_wvalid_next; + temp_m_axi_wvalid_reg <= temp_m_axi_wvalid_next; + end + + // datapath + if (store_axi_w_input_to_output) begin + m_axi_wdata_reg <= s_axi_wdata; + m_axi_wstrb_reg <= s_axi_wstrb; + m_axi_wlast_reg <= s_axi_wlast; + m_axi_wuser_reg <= s_axi_wuser; + end else if (store_axi_w_temp_to_output) begin + m_axi_wdata_reg <= temp_m_axi_wdata_reg; + m_axi_wstrb_reg <= temp_m_axi_wstrb_reg; + m_axi_wlast_reg <= temp_m_axi_wlast_reg; + m_axi_wuser_reg <= temp_m_axi_wuser_reg; + end + + if (store_axi_w_input_to_temp) begin + temp_m_axi_wdata_reg <= s_axi_wdata; + temp_m_axi_wstrb_reg <= s_axi_wstrb; + temp_m_axi_wlast_reg <= s_axi_wlast; + temp_m_axi_wuser_reg <= s_axi_wuser; + end +end + +end else if (W_REG_TYPE == 1) begin +// simple register, inserts bubble cycles + +// datapath registers +reg s_axi_wready_reg = 1'b0; + +reg [DATA_WIDTH-1:0] m_axi_wdata_reg = {DATA_WIDTH{1'b0}}; +reg [STRB_WIDTH-1:0] m_axi_wstrb_reg = {STRB_WIDTH{1'b0}}; +reg m_axi_wlast_reg = 1'b0; +reg [WUSER_WIDTH-1:0] m_axi_wuser_reg = {WUSER_WIDTH{1'b0}}; +reg m_axi_wvalid_reg = 1'b0, m_axi_wvalid_next; + +// datapath control +reg store_axi_w_input_to_output; + +assign s_axi_wready = s_axi_wready_reg; + +assign m_axi_wdata = m_axi_wdata_reg; +assign m_axi_wstrb = m_axi_wstrb_reg; +assign m_axi_wlast = m_axi_wlast_reg; +assign m_axi_wuser = WUSER_ENABLE ? m_axi_wuser_reg : {WUSER_WIDTH{1'b0}}; +assign m_axi_wvalid = m_axi_wvalid_reg; + +// enable ready input next cycle if output buffer will be empty +wire s_axi_wready_ewly = !m_axi_wvalid_next; + +always @* begin + // transfer sink ready state to source + m_axi_wvalid_next = m_axi_wvalid_reg; + + store_axi_w_input_to_output = 1'b0; + + if (s_axi_wready_reg) begin + m_axi_wvalid_next = s_axi_wvalid; + store_axi_w_input_to_output = 1'b1; + end else if (m_axi_wready) begin + m_axi_wvalid_next = 1'b0; + end +end + +always @(posedge clk) begin + if (rst) begin + s_axi_wready_reg <= 1'b0; + m_axi_wvalid_reg <= 1'b0; + end else begin + s_axi_wready_reg <= s_axi_wready_ewly; + m_axi_wvalid_reg <= m_axi_wvalid_next; + end + + // datapath + if (store_axi_w_input_to_output) begin + m_axi_wdata_reg <= s_axi_wdata; + m_axi_wstrb_reg <= s_axi_wstrb; + m_axi_wlast_reg <= s_axi_wlast; + m_axi_wuser_reg <= s_axi_wuser; + end +end + +end else begin + + // bypass W channel + assign m_axi_wdata = s_axi_wdata; + assign m_axi_wstrb = s_axi_wstrb; + assign m_axi_wlast = s_axi_wlast; + assign m_axi_wuser = WUSER_ENABLE ? s_axi_wuser : {WUSER_WIDTH{1'b0}}; + assign m_axi_wvalid = s_axi_wvalid; + assign s_axi_wready = m_axi_wready; + +end + +// B channel + +if (B_REG_TYPE > 1) begin +// skid buffer, no bubble cycles + +// datapath registers +reg m_axi_bready_reg = 1'b0; + +reg [ID_WIDTH-1:0] s_axi_bid_reg = {ID_WIDTH{1'b0}}; +reg [1:0] s_axi_bresp_reg = 2'b0; +reg [BUSER_WIDTH-1:0] s_axi_buser_reg = {BUSER_WIDTH{1'b0}}; +reg s_axi_bvalid_reg = 1'b0, s_axi_bvalid_next; + +reg [ID_WIDTH-1:0] temp_s_axi_bid_reg = {ID_WIDTH{1'b0}}; +reg [1:0] temp_s_axi_bresp_reg = 2'b0; +reg [BUSER_WIDTH-1:0] temp_s_axi_buser_reg = {BUSER_WIDTH{1'b0}}; +reg temp_s_axi_bvalid_reg = 1'b0, temp_s_axi_bvalid_next; + +// datapath control +reg store_axi_b_input_to_output; +reg store_axi_b_input_to_temp; +reg store_axi_b_temp_to_output; + +assign m_axi_bready = m_axi_bready_reg; + +assign s_axi_bid = s_axi_bid_reg; +assign s_axi_bresp = s_axi_bresp_reg; +assign s_axi_buser = BUSER_ENABLE ? s_axi_buser_reg : {BUSER_WIDTH{1'b0}}; +assign s_axi_bvalid = s_axi_bvalid_reg; + +// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) +wire m_axi_bready_early = s_axi_bready | (~temp_s_axi_bvalid_reg & (~s_axi_bvalid_reg | ~m_axi_bvalid)); + +always @* begin + // transfer sink ready state to source + s_axi_bvalid_next = s_axi_bvalid_reg; + temp_s_axi_bvalid_next = temp_s_axi_bvalid_reg; + + store_axi_b_input_to_output = 1'b0; + store_axi_b_input_to_temp = 1'b0; + store_axi_b_temp_to_output = 1'b0; + + if (m_axi_bready_reg) begin + // input is ready + if (s_axi_bready | ~s_axi_bvalid_reg) begin + // output is ready or currently not valid, transfer data to output + s_axi_bvalid_next = m_axi_bvalid; + store_axi_b_input_to_output = 1'b1; + end else begin + // output is not ready, store input in temp + temp_s_axi_bvalid_next = m_axi_bvalid; + store_axi_b_input_to_temp = 1'b1; + end + end else if (s_axi_bready) begin + // input is not ready, but output is ready + s_axi_bvalid_next = temp_s_axi_bvalid_reg; + temp_s_axi_bvalid_next = 1'b0; + store_axi_b_temp_to_output = 1'b1; + end +end + +always @(posedge clk) begin + if (rst) begin + m_axi_bready_reg <= 1'b0; + s_axi_bvalid_reg <= 1'b0; + temp_s_axi_bvalid_reg <= 1'b0; + end else begin + m_axi_bready_reg <= m_axi_bready_early; + s_axi_bvalid_reg <= s_axi_bvalid_next; + temp_s_axi_bvalid_reg <= temp_s_axi_bvalid_next; + end + + // datapath + if (store_axi_b_input_to_output) begin + s_axi_bid_reg <= m_axi_bid; + s_axi_bresp_reg <= m_axi_bresp; + s_axi_buser_reg <= m_axi_buser; + end else if (store_axi_b_temp_to_output) begin + s_axi_bid_reg <= temp_s_axi_bid_reg; + s_axi_bresp_reg <= temp_s_axi_bresp_reg; + s_axi_buser_reg <= temp_s_axi_buser_reg; + end + + if (store_axi_b_input_to_temp) begin + temp_s_axi_bid_reg <= m_axi_bid; + temp_s_axi_bresp_reg <= m_axi_bresp; + temp_s_axi_buser_reg <= m_axi_buser; + end +end + +end else if (B_REG_TYPE == 1) begin +// simple register, inserts bubble cycles + +// datapath registers +reg m_axi_bready_reg = 1'b0; + +reg [ID_WIDTH-1:0] s_axi_bid_reg = {ID_WIDTH{1'b0}}; +reg [1:0] s_axi_bresp_reg = 2'b0; +reg [BUSER_WIDTH-1:0] s_axi_buser_reg = {BUSER_WIDTH{1'b0}}; +reg s_axi_bvalid_reg = 1'b0, s_axi_bvalid_next; + +// datapath control +reg store_axi_b_input_to_output; + +assign m_axi_bready = m_axi_bready_reg; + +assign s_axi_bid = s_axi_bid_reg; +assign s_axi_bresp = s_axi_bresp_reg; +assign s_axi_buser = BUSER_ENABLE ? s_axi_buser_reg : {BUSER_WIDTH{1'b0}}; +assign s_axi_bvalid = s_axi_bvalid_reg; + +// enable ready input next cycle if output buffer will be empty +wire m_axi_bready_early = !s_axi_bvalid_next; + +always @* begin + // transfer sink ready state to source + s_axi_bvalid_next = s_axi_bvalid_reg; + + store_axi_b_input_to_output = 1'b0; + + if (m_axi_bready_reg) begin + s_axi_bvalid_next = m_axi_bvalid; + store_axi_b_input_to_output = 1'b1; + end else if (s_axi_bready) begin + s_axi_bvalid_next = 1'b0; + end +end + +always @(posedge clk) begin + if (rst) begin + m_axi_bready_reg <= 1'b0; + s_axi_bvalid_reg <= 1'b0; + end else begin + m_axi_bready_reg <= m_axi_bready_early; + s_axi_bvalid_reg <= s_axi_bvalid_next; + end + + // datapath + if (store_axi_b_input_to_output) begin + s_axi_bid_reg <= m_axi_bid; + s_axi_bresp_reg <= m_axi_bresp; + s_axi_buser_reg <= m_axi_buser; + end +end + +end else begin + + // bypass B channel + assign s_axi_bid = m_axi_bid; + assign s_axi_bresp = m_axi_bresp; + assign s_axi_buser = BUSER_ENABLE ? m_axi_buser : {BUSER_WIDTH{1'b0}}; + assign s_axi_bvalid = m_axi_bvalid; + assign m_axi_bready = s_axi_bready; + +end + +endgenerate + +endmodule + +`resetall diff --git a/xls/modules/zstd/external/priority_encoder.v b/xls/modules/zstd/external/priority_encoder.v new file mode 100644 index 0000000000..cf82512ba8 --- /dev/null +++ b/xls/modules/zstd/external/priority_encoder.v @@ -0,0 +1,92 @@ +/* + +Copyright (c) 2014-2021 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * Priority encoder module + */ +module priority_encoder # +( + parameter WIDTH = 4, + // LSB priority selection + parameter LSB_HIGH_PRIORITY = 0 +) +( + input wire [WIDTH-1:0] input_unencoded, + output wire output_valid, + output wire [$clog2(WIDTH)-1:0] output_encoded, + output wire [WIDTH-1:0] output_unencoded +); + +parameter LEVELS = WIDTH > 2 ? $clog2(WIDTH) : 1; +parameter W = 2**LEVELS; + +// pad input to even power of two +wire [W-1:0] input_padded = {{W-WIDTH{1'b0}}, input_unencoded}; + +wire [W/2-1:0] stage_valid[LEVELS-1:0]; +wire [W/2-1:0] stage_enc[LEVELS-1:0]; + +generate + genvar l, n; + + // process input bits; generate valid bit and encoded bit for each pair + for (n = 0; n < W/2; n = n + 1) begin : loop_in + assign stage_valid[0][n] = |input_padded[n*2+1:n*2]; + if (LSB_HIGH_PRIORITY) begin + // bit 0 is highest priority + assign stage_enc[0][n] = !input_padded[n*2+0]; + end else begin + // bit 0 is lowest priority + assign stage_enc[0][n] = input_padded[n*2+1]; + end + end + + // compress down to single valid bit and encoded bus + for (l = 1; l < LEVELS; l = l + 1) begin : loop_levels + for (n = 0; n < W/(2*2**l); n = n + 1) begin : loop_compress + assign stage_valid[l][n] = |stage_valid[l-1][n*2+1:n*2]; + if (LSB_HIGH_PRIORITY) begin + // bit 0 is highest priority + assign stage_enc[l][(n+1)*(l+1)-1:n*(l+1)] = stage_valid[l-1][n*2+0] ? {1'b0, stage_enc[l-1][(n*2+1)*l-1:(n*2+0)*l]} : {1'b1, stage_enc[l-1][(n*2+2)*l-1:(n*2+1)*l]}; + end else begin + // bit 0 is lowest priority + assign stage_enc[l][(n+1)*(l+1)-1:n*(l+1)] = stage_valid[l-1][n*2+1] ? {1'b1, stage_enc[l-1][(n*2+2)*l-1:(n*2+1)*l]} : {1'b0, stage_enc[l-1][(n*2+1)*l-1:(n*2+0)*l]}; + end + end + end +endgenerate + +assign output_valid = stage_valid[LEVELS-1]; +assign output_encoded = stage_enc[LEVELS-1]; +assign output_unencoded = 1 << output_encoded; + +endmodule + +`resetall diff --git a/xls/modules/zstd/frame_header.x b/xls/modules/zstd/frame_header.x deleted file mode 100644 index 858d64ac53..0000000000 --- a/xls/modules/zstd/frame_header.x +++ /dev/null @@ -1,692 +0,0 @@ -// Copyright 2024 The XLS Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This file contains utilities related to ZSTD Frame Header parsing. -// More information about the ZSTD Frame Header can be found in: -// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.1 - -import std; -import xls.modules.zstd.buffer as buff; - -type Buffer = buff::Buffer; -type BufferStatus = buff::BufferStatus; -type BufferResult = buff::BufferResult; - -pub type WindowSize = u64; -type FrameContentSize = u64; -type DictionaryId = u32; - -// Maximal mantissa value for calculating maximal accepted window_size -// as per https://datatracker.ietf.org/doc/html/rfc8878#name-window-descriptor -const MAX_MANTISSA = WindowSize:0b111; - -// Structure for holding ZSTD Frame_Header_Descriptor data, as in: -// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.1.1 -pub struct FrameHeaderDescriptor { - frame_content_size_flag: u2, - single_segment_flag: u1, - unused: u1, - reserved: u1, - content_checksum_flag: u1, - dictionary_id_flag: u2, -} - -// Structure for data obtained from decoding the Frame_Header_Descriptor -pub struct FrameHeader { - window_size: WindowSize, - frame_content_size: FrameContentSize, - dictionary_id: DictionaryId, - content_checksum_flag: u1, -} - -// Status values reported by the frame header parsing function -pub enum FrameHeaderStatus: u2 { - OK = 0, - CORRUPTED = 1, - NO_ENOUGH_DATA = 2, - UNSUPPORTED_WINDOW_SIZE = 3, -} - -// structure for returning results of parsing a frame header -pub struct FrameHeaderResult { - status: FrameHeaderStatus, - header: FrameHeader, - buffer: Buffer, -} - -// Auxiliary constant that can be used to initialize Proc's state -// with empty FrameHeader, because `zero!` cannot be used in that context -pub const ZERO_FRAME_HEADER = zero!(); -pub const FRAME_CONTENT_SIZE_NOT_PROVIDED_VALUE = FrameContentSize::MAX; - -// Extracts Frame_Header_Descriptor fields from 8-bit chunk of data -// that is assumed to be a valid Frame_Header_Descriptor -fn extract_frame_header_descriptor(data:u8) -> FrameHeaderDescriptor { - FrameHeaderDescriptor { - frame_content_size_flag: data[6:8], - single_segment_flag: data[5:6], - unused: data[4:5], - reserved: data[3:4], - content_checksum_flag: data[2:3], - dictionary_id_flag: data[0:2], - } -} - -#[test] -fn test_extract_frame_header_descriptor() { - assert_eq( - extract_frame_header_descriptor(u8:0xA4), - FrameHeaderDescriptor { - frame_content_size_flag: u2:0x2, - single_segment_flag: u1:0x1, - unused: u1:0x0, - reserved: u1:0x0, - content_checksum_flag: u1:0x1, - dictionary_id_flag: u2:0x0 - } - ); - - assert_eq( - extract_frame_header_descriptor(u8:0x0), - FrameHeaderDescriptor { - frame_content_size_flag: u2:0x0, - single_segment_flag: u1:0x0, - unused: u1:0x0, - reserved: u1:0x0, - content_checksum_flag: u1:0x0, - dictionary_id_flag: u2:0x0 - } - ); -} - -// Parses a Buffer and extracts information from the Frame_Header_Descriptor. -// The Buffer is assumed to contain a valid Frame_Header_Descriptor. The function -// returns BufferResult with the outcome of the operations on the buffer and -// information extracted from the Frame_Header_Descriptor -fn parse_frame_header_descriptor(buffer: Buffer) -> (BufferResult, FrameHeaderDescriptor) { - let (result, data) = buff::buffer_fixed_pop_checked(buffer); - match result.status { - BufferStatus::OK => { - let frame_header_desc = extract_frame_header_descriptor(data); - (result, frame_header_desc) - }, - _ => (result, zero!()) - } -} - -#[test] -fn test_parse_frame_header_descriptor() { - let buffer = Buffer { content: u32:0xA4, length: u32:8 }; - let (result, header) = parse_frame_header_descriptor(buffer); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u32:0, length: u32:0 }, - }); - assert_eq(header, FrameHeaderDescriptor { - frame_content_size_flag: u2:0x2, - single_segment_flag: u1:0x1, - unused: u1:0x0, - reserved: u1:0x0, - content_checksum_flag: u1:0x1, - dictionary_id_flag: u2:0x0 - }); - - let buffer = Buffer { content: u32:0x0, length: u32:8 }; - let (result, header) = parse_frame_header_descriptor(buffer); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u32:0, length: u32:0 }, - }); - assert_eq(header, FrameHeaderDescriptor { - frame_content_size_flag: u2:0x0, - single_segment_flag: u1:0x0, - unused: u1:0x0, - reserved: u1:0x0, - content_checksum_flag: u1:0x0, - dictionary_id_flag: u2:0x0 - }); - - let buffer = Buffer { content: u32:0x0, length: u32:0 }; - let (result, header) = parse_frame_header_descriptor(buffer); - assert_eq(result, BufferResult { - status: BufferStatus::NO_ENOUGH_DATA, - buffer: Buffer { content: u32:0, length: u32:0 }, - }); - assert_eq(header, zero!()); -} - -// Returns a boolean showing if the Window_Descriptor section exists -// for the frame with the given FrameHeaderDescriptor -fn window_descriptor_exists(desc: FrameHeaderDescriptor) -> bool { - desc.single_segment_flag == u1:0 -} - -#[test] -fn test_window_descriptor_exists() { - let zero_desc = zero!(); - - let desc_with_ss = FrameHeaderDescriptor {single_segment_flag: u1:1, ..zero_desc}; - assert_eq(window_descriptor_exists(desc_with_ss), false); - - let desc_without_ss = FrameHeaderDescriptor {single_segment_flag: u1:0, ..zero_desc}; - assert_eq(window_descriptor_exists(desc_without_ss), true); -} - -// Extracts window size from 8-bit chunk of data -// that is assumed to be a valid Window_Descriptor -fn extract_window_size_from_window_descriptor(data: u8) -> u64 { - let exponent = data >> u8:3; - let mantissa = data & u8:7; - - let window_base = u64:1 << (u64:10 + exponent as u64); - let window_add = (window_base >> u64:3) * (mantissa as u64); - - window_base + window_add -} - -#[test] -fn test_extract_window_size_from_window_descriptor() { - assert_eq(extract_window_size_from_window_descriptor(u8:0x0), u64:0x400); - assert_eq(extract_window_size_from_window_descriptor(u8:0x9), u64:0x900); - assert_eq(extract_window_size_from_window_descriptor(u8:0xFF), u64:0x3c000000000); -} - -// Parses a Buffer with data and extracts information from the Window_Descriptor -// The buffer is assumed to contain a valid Window_Descriptor that is related to -// the same frame as the provided FrameHeaderDescriptor. The function returns -// BufferResult with the outcome of the operations on the buffer and window size. -fn parse_window_descriptor(buffer: Buffer, desc: FrameHeaderDescriptor) -> (BufferResult, WindowSize) { - assert!(window_descriptor_exists(desc), "window_descriptor_does_not_exist"); - - let (result, data) = buff::buffer_fixed_pop_checked(buffer); - match result.status { - BufferStatus::OK => { - let window_size = extract_window_size_from_window_descriptor(data); - (result, window_size) - }, - _ => (result, u64:0) - } -} - -#[test] -fn test_parse_window_descriptor() { - let zero_desc = zero!(); - let desc_without_ss = FrameHeaderDescriptor {single_segment_flag: u1:0, ..zero_desc}; - - let buffer = Buffer { content: u32:0xF, length: u32:0x4 }; - let (result, window_size) = parse_window_descriptor(buffer, desc_without_ss); - assert_eq(result, BufferResult { - status: BufferStatus::NO_ENOUGH_DATA, - buffer: Buffer { content: u32:0xF, length: u32:0x4 }, - }); - assert_eq(window_size, u64:0); - - let buffer = Buffer { content: u32:0x0, length: u32:0x8 }; - let (result, window_size) = parse_window_descriptor(buffer, desc_without_ss); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u32:0x0, length: u32:0 }, - }); - assert_eq(window_size, u64:0x400); - - let buffer = Buffer { content: u32:0x9, length: u32:0x8 }; - let (result, window_size) = parse_window_descriptor(buffer, desc_without_ss); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u32:0x0, length: u32:0 }, - }); - assert_eq(window_size, u64:0x900); - - let buffer = Buffer { content: u32:0xFF, length: u32:0x8 }; - let (result, window_size) = parse_window_descriptor(buffer, desc_without_ss); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u32:0x0, length: u32:0 }, - }); - assert_eq(window_size, u64:0x3c000000000); -} - -// Parses a Buffer with data and extracts information from the Dictionary_ID -// The buffer is assumed to contain a valid Dictionary_ID that is related to -// the same frame as the provided FrameHeaderDescriptor. The function returns -// BufferResult with the outcome of the operations on the buffer and dictionary ID -fn parse_dictionary_id(buffer: Buffer, desc: FrameHeaderDescriptor) -> (BufferResult, DictionaryId) { - let bytes = match desc.dictionary_id_flag { - u2:0 => u32:0, - u2:1 => u32:1, - u2:2 => u32:2, - u2:3 => u32:4, - _ => fail!("not_possible", u32:0) - }; - - let (result, data) = buff::buffer_pop_checked(buffer, bytes * u32:8); - match result.status { - BufferStatus::OK => (result, data as u32), - _ => (result, u32:0) - } -} - -#[test] -fn test_parse_dictionary_id() { - let zero_desc = zero!(); - - let buffer = Buffer { content: u32:0x0, length: u32:0x0 }; - let frame_header_desc = FrameHeaderDescriptor { dictionary_id_flag: u2:0, ..zero_desc}; - let (result, dictionary_id) = parse_dictionary_id(buffer, frame_header_desc); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u32:0x0, length: u32:0x0 }, - }); - assert_eq(dictionary_id, u32:0); - - let buffer = Buffer { content: u32:0x12, length: u32:0x8 }; - let frame_header_desc = FrameHeaderDescriptor { dictionary_id_flag: u2:0x1, ..zero_desc}; - let (result, dictionary_id) = parse_dictionary_id(buffer, frame_header_desc); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u32:0x0, length: u32:0 }, - }); - assert_eq(dictionary_id, u32:0x12); - - let buffer = Buffer { content: u32:0x1234, length: u32:0x10 }; - let frame_header_desc = FrameHeaderDescriptor { dictionary_id_flag: u2:0x2, ..zero_desc}; - let (result, dictionary_id) = parse_dictionary_id(buffer, frame_header_desc); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u32:0x0, length: u32:0 }, - }); - assert_eq(dictionary_id, u32:0x1234); - - let buffer = Buffer { content: u32:0x12345678, length: u32:0x20 }; - let frame_header_desc = FrameHeaderDescriptor { dictionary_id_flag: u2:0x3, ..zero_desc}; - let (result, dictionary_id) = parse_dictionary_id(buffer, frame_header_desc); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u32:0x0, length: u32:0 }, - }); - assert_eq(dictionary_id, u32:0x12345678); - - let buffer = Buffer { content: u32:0x1234, length: u32:0x10 }; - let frame_header_desc = FrameHeaderDescriptor { dictionary_id_flag: u2:0x3, ..zero_desc}; - let (result, dictionary_id) = parse_dictionary_id(buffer, frame_header_desc); - assert_eq(result, BufferResult { - status: BufferStatus::NO_ENOUGH_DATA, - buffer: Buffer { content: u32:0x1234, length: u32:0x10 }, - }); - assert_eq(dictionary_id, u32:0x0); -} - -// Returns boolean showing if the Frame_Content_Size section exists for -// the frame with the given FrameHeaderDescriptor. -fn frame_content_size_exists(desc: FrameHeaderDescriptor) -> bool { - desc.single_segment_flag != u1:0 || desc.frame_content_size_flag != u2:0 -} - -#[test] -fn test_frame_content_size_exists() { - let zero_desc = zero!(); - - let desc = FrameHeaderDescriptor {single_segment_flag: u1:0, frame_content_size_flag: u2:0, ..zero_desc}; - assert_eq(frame_content_size_exists(desc), false); - - let desc = FrameHeaderDescriptor {single_segment_flag: u1:0, frame_content_size_flag: u2:2, ..zero_desc}; - assert_eq(frame_content_size_exists(desc), true); - - let desc = FrameHeaderDescriptor {single_segment_flag: u1:1, frame_content_size_flag: u2:0, ..zero_desc}; - assert_eq(frame_content_size_exists(desc), true); - - let desc = FrameHeaderDescriptor {single_segment_flag: u1:1, frame_content_size_flag: u2:3, ..zero_desc}; - assert_eq(frame_content_size_exists(desc), true); -} - -// Parses a Buffer with data and extracts information from the Frame_Content_Size -// The buffer is assumed to contain a valid Frame_Content_Size that is related to -// the same frame as the provided FrameHeaderDescriptor. The function returns -// BufferResult with the outcome of the operations on the buffer and frame content size. -fn parse_frame_content_size(buffer: Buffer, desc: FrameHeaderDescriptor) -> (BufferResult, FrameContentSize) { - assert!(frame_content_size_exists(desc), "frame_content_size_does_not_exist"); - - let bytes = match desc.frame_content_size_flag { - u2:0 => u32:1, - u2:1 => u32:2, - u2:2 => u32:4, - u2:3 => u32:8, - _ => fail!("not_possible", u32:0) - }; - - let (result, data) = buff::buffer_pop_checked(buffer, bytes * u32:8); - match (result.status, bytes) { - (BufferStatus::OK, u32:2) => (result, data as u64 + u64:256), - (BufferStatus::OK, _) => (result, data as u64), - (_, _) => (result, u64:0) - } -} - -#[test] -fn test_parse_frame_content_size() { - let zero_desc = zero!(); - - let buffer = Buffer { content: u64:0x12, length: u32:8 }; - let frame_header_desc = FrameHeaderDescriptor { - frame_content_size_flag: u2:0, - single_segment_flag: u1:1, - ..zero_desc - }; - let (result, frame_content_size) = parse_frame_content_size(buffer, frame_header_desc); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u64:0x0, length: u32:0x0 }, - }); - assert_eq(frame_content_size, u64:0x12); - - let buffer = Buffer { content: u64:0x1234, length: u32:0x10 }; - let frame_header_desc = FrameHeaderDescriptor { frame_content_size_flag: u2:1, ..zero_desc}; - let (result, frame_content_size) = parse_frame_content_size(buffer, frame_header_desc); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u64:0x0, length: u32:0x0 }, - }); - assert_eq(frame_content_size, u64:0x1234 + u64:256); - - let buffer = Buffer { content: u64:0x12345678, length: u32:0x20 }; - let frame_header_desc = FrameHeaderDescriptor { frame_content_size_flag: u2:2, ..zero_desc}; - let (result, frame_content_size) = parse_frame_content_size(buffer, frame_header_desc); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u64:0x0, length: u32:0x0 }, - }); - assert_eq(frame_content_size, u64:0x12345678); - - let buffer = Buffer { content: u64:0x1234567890ABCDEF, length: u32:0x40 }; - let frame_header_desc = FrameHeaderDescriptor { frame_content_size_flag: u2:3, ..zero_desc}; - let (result, frame_content_size) = parse_frame_content_size(buffer, frame_header_desc); - assert_eq(result, BufferResult { - status: BufferStatus::OK, - buffer: Buffer { content: u64:0x0, length: u32:0x0 }, - }); - assert_eq(frame_content_size, u64:0x1234567890ABCDEF); - - let buffer = Buffer { content: u32:0x12345678, length: u32:0x20 }; - let frame_header_desc = FrameHeaderDescriptor { frame_content_size_flag: u2:0x3, ..zero_desc}; - let (result, frame_content_size) = parse_frame_content_size(buffer, frame_header_desc); - assert_eq(result, BufferResult { - status: BufferStatus::NO_ENOUGH_DATA, - buffer: Buffer { content: u32:0x12345678, length: u32:0x20 }, - }); - assert_eq(frame_content_size, u64:0x0); -} - -// Calculate maximal accepted window_size for given WINDOW_LOG_MAX and return whether given -// window_size should be accepted or discarded. -// Based on window_size calculation from: RFC 8878 -// https://datatracker.ietf.org/doc/html/rfc8878#name-window-descriptor -fn window_size_valid(window_size: WindowSize) -> bool { - let max_window_size = (WindowSize:1 << WINDOW_LOG_MAX) + (((WindowSize:1 << WINDOW_LOG_MAX) >> WindowSize:3) * MAX_MANTISSA); - - window_size <= max_window_size -} - -// Parses a Buffer with data and extracts Frame_Header information. The buffer -// is assumed to contain a valid Frame_Header The function returns FrameHeaderResult -// with BufferResult that contains outcome of the operations on the Buffer, -// FrameHeader with the extracted frame header if the parsing was successful, -// and the status of the operation in FrameHeaderStatus. On failure, the returned -// buffer is the same as the input buffer. -// WINDOW_LOG_MAX is the base 2 logarithm used for calculating the maximal allowed -// window_size. Frame header parsing function must discard all frames that -// have window_size above the maximal allowed window_size. -// CAPACITY is the buffer capacity -pub fn parse_frame_header(buffer: Buffer) -> FrameHeaderResult { - trace_fmt!("parse_frame_header: ==== Parsing ==== \n"); - trace_fmt!("parse_frame_header: initial buffer: {:#x}", buffer); - - let (result, desc) = parse_frame_header_descriptor(buffer); - trace_fmt!("parse_frame_header: buffer after parsing header descriptor: {:#x}", result.buffer); - - let (result, header) = match result.status { - BufferStatus::OK => { - let (result, window_size) = if window_descriptor_exists(desc) { - trace_fmt!("parse_frame_header: window_descriptor exists, parse it"); - parse_window_descriptor(result.buffer, desc) - } else { - trace_fmt!("parse_frame_header: window_descriptor does not exist, skip parsing it"); - (result, u64:0) - }; - trace_fmt!("parse_frame_header: buffer after parsing window_descriptor: {:#x}", result.buffer); - - match result.status { - BufferStatus::OK => { - trace_fmt!("parse_frame_header: parse dictionary_id"); - let (result, dictionary_id) = parse_dictionary_id(result.buffer, desc); - trace_fmt!("parse_frame_header: buffer after parsing dictionary_id: {:#x}", result.buffer); - - match result.status { - BufferStatus::OK => { - let (result, frame_content_size) = if frame_content_size_exists(desc) { - trace_fmt!("parse_frame_header: frame_content_size exists, parse it"); - parse_frame_content_size(result.buffer, desc) - } else { - trace_fmt!("parse_frame_header: frame_content_size does not exist, skip parsing it"); - (result, FRAME_CONTENT_SIZE_NOT_PROVIDED_VALUE) - }; - trace_fmt!("parse_frame_header: buffer after parsing frame_content_size: {:#x}", result.buffer); - - match result.status { - BufferStatus::OK => { - trace_fmt!("parse_frame_header: calculate frame header!"); - let window_size = match window_descriptor_exists(desc) { - true => window_size, - _ => frame_content_size, - }; - - ( - result, - FrameHeader { - window_size: window_size, - frame_content_size: frame_content_size, - dictionary_id: dictionary_id, - content_checksum_flag: desc.content_checksum_flag, - } - ) - }, - _ => { - trace_fmt!("parse_frame_header: Not enough data to parse frame_content_size!"); - (result, zero!()) - } - } - }, - _ => { - trace_fmt!("parse_frame_header: Not enough data to parse dictionary_id!"); - (result, zero!()) - } - } - }, - _ => { - trace_fmt!("parse_frame_header: Not enough data to parse window_descriptor!"); - (result, zero!()) - } - } - }, - _ => { - trace_fmt!("parse_frame_header: Not enough data to parse frame_header_descriptor!"); - (result, zero!()) - } - }; - - let (status, buffer) = match result.status { - BufferStatus::OK => (FrameHeaderStatus::OK, result.buffer), - _ => (FrameHeaderStatus::NO_ENOUGH_DATA, buffer) - }; - - let frame_header_result = FrameHeaderResult { status: status, header: header, buffer: buffer }; - - // libzstd always reports NO_ENOUGH_DATA errors before CORRUPTED caused by - // reserved bit being set - if (desc.reserved == u1:1 && frame_header_result.status != FrameHeaderStatus::NO_ENOUGH_DATA) { - trace_fmt!("parse_frame_header: frame descriptor corrupted!"); - // Critical failure - requires resetting the whole decoder - FrameHeaderResult { - status: FrameHeaderStatus::CORRUPTED, - buffer: zero!(), - header: zero!(), - } - } else if (!window_size_valid(header.window_size)) { - trace_fmt!("parse_frame_header: frame discarded: window_size to big: {}", header.window_size); - FrameHeaderResult { - status: FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE, - buffer: zero!(), - header: zero!(), - } - } else { - frame_header_result - } -} - -// The largest allowed WindowLog for DSLX tests -pub const TEST_WINDOW_LOG_MAX = WindowSize:22; - -#[test] -fn test_parse_frame_header() { - // normal cases - let buffer = Buffer { content: bits[128]:0x1234567890ABCDEF_CAFE_09_C2, length: u32:96 }; - let frame_header_result = parse_frame_header(buffer); - assert_eq(frame_header_result, FrameHeaderResult { - status: FrameHeaderStatus::OK, - buffer: Buffer { - content: bits[128]:0x0, - length: u32:0, - }, - header: FrameHeader { - window_size: u64:0x900, - frame_content_size: u64:0x1234567890ABCDEF, - dictionary_id: u32:0xCAFE, - content_checksum_flag: u1:0, - } - }); - - // SingleSegmentFlag is set and FrameContentSize is bigger than accepted window_size - let buffer = Buffer { content: bits[128]:0x1234567890ABCDEF_CAFE_E2, length: u32:88 }; - let frame_header_result = parse_frame_header(buffer); - assert_eq(frame_header_result, FrameHeaderResult { - status: FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE, - buffer: Buffer { content: bits[128]:0x0, length: u32:0 }, - header: zero!() - }); - - let buffer = Buffer { content: bits[128]:0xaa20, length: u32:16 }; - let frame_header_result = parse_frame_header(buffer); - assert_eq(frame_header_result, FrameHeaderResult { - status: FrameHeaderStatus::OK, - buffer: Buffer { - content: bits[128]:0x0, - length: u32:0, - }, - header: FrameHeader { - window_size: u64:0xaa, - frame_content_size: u64:0xaa, - dictionary_id: u32:0x0, - content_checksum_flag: u1:0, - }, - }); - - // when buffer is too short - let buffer = Buffer { content: bits[128]:0x0, length: u32:0 }; - let frame_header_result = parse_frame_header(buffer); - assert_eq(frame_header_result, FrameHeaderResult { - status: FrameHeaderStatus::NO_ENOUGH_DATA, - buffer: buffer, - header: zero!() - }); - - let buffer = Buffer { content: bits[128]:0xC2, length: u32:8 }; - let frame_header_result = parse_frame_header(buffer); - assert_eq(frame_header_result, FrameHeaderResult { - status: FrameHeaderStatus::NO_ENOUGH_DATA, - buffer: buffer, - header: zero!() - }); - - let buffer = Buffer { content: bits[128]:0x09_C2, length: u32:16 }; - let frame_header_result = parse_frame_header(buffer); - assert_eq(frame_header_result, FrameHeaderResult { - status: FrameHeaderStatus::NO_ENOUGH_DATA, - buffer: buffer, - header: zero!() - }); - - let buffer = Buffer { content: bits[128]:0x1234_09_C2, length: u32:32 }; - let frame_header_result = parse_frame_header(buffer); - assert_eq(frame_header_result, FrameHeaderResult { - status: FrameHeaderStatus::NO_ENOUGH_DATA, - buffer: buffer, - header: zero!() - }); - - let buffer = Buffer { content: bits[128]:0x1234_09_C2, length: u32:32 }; - let frame_header_result = parse_frame_header(buffer); - assert_eq(frame_header_result, FrameHeaderResult { - status: FrameHeaderStatus::NO_ENOUGH_DATA, - buffer: buffer, - header: zero!() - }); - - // when frame header descriptor is corrupted - let buffer = Buffer { content: bits[128]:0x1234567890ABCDEF_1234_09_CA, length: u32:96 }; - let frame_header_result = parse_frame_header(buffer); - assert_eq(frame_header_result, FrameHeaderResult { - status: FrameHeaderStatus::CORRUPTED, - buffer: Buffer { content: bits[128]:0x0, length: u32:0 }, - header: zero!() - }); - - // Frame Header is discarded because Window size required by frame is too big for given decoder - // configuration - let buffer = Buffer { content: bits[128]:0xd310, length: u32:16 }; - let frame_header_result = parse_frame_header(buffer); - assert_eq(frame_header_result, FrameHeaderResult { - status: FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE, - buffer: Buffer { content: bits[128]:0x0, length: u32:0 }, - header: zero!() - }); - - // Frame Header is discarded because Frame Content Size required by frame is too big for given decoder - // configuration - let buffer = Buffer { content: bits[128]:0xf45b5b5b0db1, length: u32:48 }; - let frame_header_result = parse_frame_header(buffer); - assert_eq(frame_header_result, FrameHeaderResult { - status: FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE, - buffer: Buffer { content: bits[128]:0x0, length: u32:0 }, - header: FrameHeader { - window_size: u64:0x0, - frame_content_size: u64:0x0, - dictionary_id: u32:0x0, - content_checksum_flag: u1:0, - }, - }); - - // Frame Header is discarded because Frame Content Size required by frame is too big (above 64bits) for given decoder - // configuration - let buffer = Buffer { content: bits[128]:0xc0659db6813a16b33f3da53a79e4, length: u32:112 }; - let frame_header_result = parse_frame_header(buffer); - assert_eq(frame_header_result, FrameHeaderResult { - status: FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE, - buffer: Buffer { content: bits[128]:0x0, length: u32:0 }, - header: FrameHeader { - window_size: u64:0x0, - frame_content_size: u64:0x0, - dictionary_id: u32:0x0, - content_checksum_flag: u1:0, - }, - }); -} diff --git a/xls/modules/zstd/frame_header_dec.x b/xls/modules/zstd/frame_header_dec.x new file mode 100644 index 0000000000..8647435996 --- /dev/null +++ b/xls/modules/zstd/frame_header_dec.x @@ -0,0 +1,670 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains utilities related to ZSTD Frame Header parsing. +// More information about the ZSTD Frame Header can be found in: +// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.1 + +import std; +import xls.modules.zstd.memory.mem_reader; + +pub type WindowSize = u64; +pub type FrameContentSize = u64; +pub type DictionaryId = u32; + +// Structure for data obtained from decoding the Frame_Header_Descriptor +pub struct FrameHeader { + window_size: WindowSize, + frame_content_size: FrameContentSize, + dictionary_id: DictionaryId, + content_checksum_flag: u1, +} + +// Status values reported by the frame header parsing function +pub enum FrameHeaderDecoderStatus: u2 { + OKAY = 0, + CORRUPTED = 1, + UNSUPPORTED_WINDOW_SIZE = 2, +} + +pub struct FrameHeaderDecoderReq { + addr: uN[ADDR_W], +} + +pub struct FrameHeaderDecoderResp { + status: FrameHeaderDecoderStatus, + header: FrameHeader, + length: u5, +} + +// Maximal mantissa value for calculating maximal accepted window_size +// as per https://datatracker.ietf.org/doc/html/rfc8878#name-window-descriptor +const MAX_MANTISSA = WindowSize:0b111; + +// Structure for holding ZSTD Frame_Header_Descriptor data, as in: +// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.1.1 +pub struct FrameHeaderDescriptor { + frame_content_size_flag: u2, + single_segment_flag: u1, + unused: u1, + reserved: u1, + content_checksum_flag: u1, + dictionary_id_flag: u2, +} + +// Auxiliary constant that can be used to initialize Proc's state +// with empty FrameHeader, because `zero!` cannot be used in that context +pub const ZERO_FRAME_HEADER = zero!(); +pub const FRAME_CONTENT_SIZE_NOT_PROVIDED_VALUE = FrameContentSize::MAX; + +// Extracts Frame_Header_Descriptor fields from 8-bit chunk of data +// that is assumed to be a valid Frame_Header_Descriptor +fn extract_frame_header_descriptor(data:u8) -> FrameHeaderDescriptor { + FrameHeaderDescriptor { + frame_content_size_flag: data[6:8], + single_segment_flag: data[5:6], + unused: data[4:5], + reserved: data[3:4], + content_checksum_flag: data[2:3], + dictionary_id_flag: data[0:2], + } +} + +#[test] +fn test_extract_frame_header_descriptor() { + assert_eq( + extract_frame_header_descriptor(u8:0xA4), + FrameHeaderDescriptor { + frame_content_size_flag: u2:0x2, + single_segment_flag: u1:0x1, + unused: u1:0x0, + reserved: u1:0x0, + content_checksum_flag: u1:0x1, + dictionary_id_flag: u2:0x0 + } + ); + + assert_eq( + extract_frame_header_descriptor(u8:0x0), + FrameHeaderDescriptor { + frame_content_size_flag: u2:0x0, + single_segment_flag: u1:0x0, + unused: u1:0x0, + reserved: u1:0x0, + content_checksum_flag: u1:0x0, + dictionary_id_flag: u2:0x0 + } + ); +} + +// Returns a boolean showing if the Window_Descriptor section exists +// for the frame with the given FrameHeaderDescriptor +fn window_descriptor_exists(desc: FrameHeaderDescriptor) -> bool { + desc.single_segment_flag == u1:0 +} + +#[test] +fn test_window_descriptor_exists() { + let zero_desc = zero!(); + + let desc_with_ss = FrameHeaderDescriptor {single_segment_flag: u1:1, ..zero_desc}; + assert_eq(window_descriptor_exists(desc_with_ss), false); + + let desc_without_ss = FrameHeaderDescriptor {single_segment_flag: u1:0, ..zero_desc}; + assert_eq(window_descriptor_exists(desc_without_ss), true); +} + +// Extracts window size from 8-bit chunk of data +// that is assumed to be a valid Window_Descriptor +fn extract_window_size_from_window_descriptor(data: u8) -> u64 { + let exponent = data[3:8]; + let mantissa = data[0:3]; + + let window_base = (u42:1 << (u6:10 + exponent as u6)); + let window_base_add = (window_base >> u2:3) as u42; + // optimization: perform multiplication by a 3-bit value with adds and shifts + // because XLS only allows multiplying operands of the same width + let window_add = match mantissa { + u3:0 => u42:0, + u3:1 => window_base_add, // u39 + u3:2 => window_base_add + window_base_add, // u39 + u39 = u40 + u3:3 => (window_base_add << u1:1) + window_base_add, // u40 + u39 = u41 + u3:4 => (window_base_add << u1:1) + (window_base_add << u1:1), // u40 + u40 = u41 + u3:5 => (window_base_add << u2:2) + window_base_add, // u41 + u39 = u42 + u3:6 => (window_base_add << u2:2) + (window_base_add << u2:1), // u41 + u40 = u42 + u3:7 => (window_base_add << u2:3) - window_base_add, // u42 - u39 = u42 + _ => fail!("extract_window_size_from_window_descriptor_unreachable", u42:0), + }; + + window_base as u64 + window_add as u64 +} + +#[test] +fn test_extract_window_size_from_window_descriptor() { + assert_eq(extract_window_size_from_window_descriptor(u8:0x0), u64:0x400); + assert_eq(extract_window_size_from_window_descriptor(u8:0x9), u64:0x900); + assert_eq(extract_window_size_from_window_descriptor(u8:0xFF), u64:0x3c000000000); +} + +// Returns boolean showing if the Frame_Content_Size section exists for +// the frame with the given FrameHeaderDescriptor. +fn frame_content_size_exists(desc: FrameHeaderDescriptor) -> bool { + desc.single_segment_flag != u1:0 || desc.frame_content_size_flag != u2:0 +} + +#[test] +fn test_frame_content_size_exists() { + let zero_desc = zero!(); + + let desc = FrameHeaderDescriptor {single_segment_flag: u1:0, frame_content_size_flag: u2:0, ..zero_desc}; + assert_eq(frame_content_size_exists(desc), false); + + let desc = FrameHeaderDescriptor {single_segment_flag: u1:0, frame_content_size_flag: u2:2, ..zero_desc}; + assert_eq(frame_content_size_exists(desc), true); + + let desc = FrameHeaderDescriptor {single_segment_flag: u1:1, frame_content_size_flag: u2:0, ..zero_desc}; + assert_eq(frame_content_size_exists(desc), true); + + let desc = FrameHeaderDescriptor {single_segment_flag: u1:1, frame_content_size_flag: u2:3, ..zero_desc}; + assert_eq(frame_content_size_exists(desc), true); +} + + +// Calculate maximal accepted window_size for given WINDOW_LOG_MAX and return whether given +// window_size should be accepted or discarded. +// Based on window_size calculation from: RFC 8878 +// https://datatracker.ietf.org/doc/html/rfc8878#name-window-descriptor +fn window_size_valid(window_size: WindowSize) -> bool { + let max_window_size = (WindowSize:1 << WINDOW_LOG_MAX) + (((WindowSize:1 << WINDOW_LOG_MAX) >> WindowSize:3) * MAX_MANTISSA); + + window_size <= max_window_size +} + + +pub fn parse_frame_header(header_raw: uN[112]) -> (FrameHeader, u4, u1) { + let fhd_raw = header_raw[0:8]; + let fhd = extract_frame_header_descriptor(fhd_raw); + // RFC8878 Section 3.1.1.1.1.4 + // "This [reserved] bit is reserved for some future feature. Its value + // must be zero. A decoder compliant with this specification version must + // ensure it is not set." + let header_ok = !fhd.reserved; + + let window_descriptor_start = u32:1; + // RFC8878 Section 3.1.1.1.2 + // "When Single_Segment_Flag is set, Window_Descriptor is not present." + let window_descriptor_len = match fhd.single_segment_flag { + u1:0 => u1:1, + u1:1 => u1:0, + _ => fail!("window_descriptor_len_unreachable", u1:0), + }; + let window_descriptor_raw = header_raw[u32:8*window_descriptor_start+:u8]; + let window_size = extract_window_size_from_window_descriptor(window_descriptor_raw); + + let dictionary_id_start = window_descriptor_start + window_descriptor_len as u32; + let dictionary_id_len = match fhd.dictionary_id_flag { + u2:0 => u32:0, + u2:1 => u32:1, + u2:2 => u32:2, + u2:3 => u32:4, + _ => fail!("dictionary_id_len_unreachable", u32:0), + }; + let dictionary_id_raw = header_raw[u32:8*dictionary_id_start+:u32]; + let dictionary_id = dictionary_id_raw & match fhd.dictionary_id_flag { + u2:0 => u32:0x0000_0000, + u2:1 => u32:0x0000_00ff, + u2:2 => u32:0x0000_ffff, + u2:3 => u32:0xffff_ffff, + _ => fail!("dictionary_id_unreachable", u32:0), + }; + + let frame_content_size_start = dictionary_id_start + dictionary_id_len; + // RFC8878 Section 3.1.1.1.1.1 + // "When Frame_Content_Size_Flag is 0, FCS_Field_Size depends on + // Single_Segment_Flag: If Single_Segment_Flag is set, FCS_Field_Siz + // is 1. Otherwise, FCS_Field_Size is 0;" + let frame_content_size_len = match (fhd.frame_content_size_flag, fhd.single_segment_flag) { + (u2:0, u1:0) => u32:0, + (u2:0, u1:1) => u32:1, + (u2:1, _) => u32:2, + (u2:2, _) => u32:4, + (u2:3, _) => u32:8, + _ => fail!("frame_content_size_len_unreachable", u32:0), + }; + + let frame_content_size_raw = header_raw[u32:8*frame_content_size_start+:u64]; + let frame_content_size_masked = frame_content_size_raw & match frame_content_size_len { + u32:0 => u64:0x0000_0000_0000_0000, + u32:1 => u64:0x0000_0000_0000_00ff, + u32:2 => u64:0x0000_0000_0000_ffff, + u32:4 => u64:0x0000_0000_ffff_ffff, + u32:8 => u64:0xffff_ffff_ffff_ffff, + _ => fail!("frame_content_size_masked_unreachable", u64:0), + }; + + // RFC8878 Section 3.1.1.1.4 + // "When FCS_Field_Size is 2, the offset of 256 is added." + let frame_content_size = frame_content_size_masked + match frame_content_size_len { + u32:2 => u64:256, + _ => u64:0, + }; + + // RFC8878 Section 3.1.1.1.2 + // "When Single_Segment_Flag is set, Window_Descriptor is not present. + // In this case, Window_Size is Frame_Content_Size [...]" + let window_size = if (window_descriptor_exists(fhd)) { + window_size + } else if (frame_content_size_exists(fhd)) { + frame_content_size + } else { + WindowSize:0 + }; + + let total_header_len = (frame_content_size_start + frame_content_size_len) as u4; + + (FrameHeader { + window_size: window_size, + frame_content_size: if frame_content_size_len != u32:0 { frame_content_size } else { FrameContentSize:0 }, + dictionary_id: if dictionary_id_len != u32:0 { dictionary_id } else { DictionaryId:0 }, + content_checksum_flag: fhd.content_checksum_flag, + }, total_header_len, header_ok) +} + + +#[test] +fn test_parse_frame_header() { + // normal case + let test_vec = uN[112]:0x1234567890ABCDEF_CAFE_09_C2; + let (frame_header_result, len, ok) = parse_frame_header(test_vec); + assert_eq(frame_header_result, FrameHeader { + window_size: u64:0x900, + frame_content_size: u64:0x1234567890ABCDEF, + dictionary_id: u32:0xCAFE, + content_checksum_flag: u1:0, + }); + assert_eq(len, u4:12); + assert_eq(ok, u1:1); + + // SingleSegmentFlag is set + let test_vec = uN[112]:0xaa20; + let (frame_header_result, len, ok) = parse_frame_header(test_vec); + assert_eq(frame_header_result, FrameHeader { + window_size: u64:0xaa, + frame_content_size: u64:0xaa, + dictionary_id: u32:0x0, + content_checksum_flag: u1:0, + }); + assert_eq(len, u4:2); + assert_eq(ok, u1:1); + + // SingleSegmentFlag is set and FrameContentSize is bigger than accepted window_size + let test_vec = uN[112]:0x1234567890ABCDEF_CAFE_E2; + let (frame_header_result, len, ok) = parse_frame_header(test_vec); + assert_eq(frame_header_result, FrameHeader { + window_size: u64:0x1234567890ABCDEF, + frame_content_size: u64:0x1234567890ABCDEF, + dictionary_id: u32:0xCAFE, + content_checksum_flag: u1:0, + }); + assert_eq(len, u4:11); + assert_eq(ok, u1:1); + + // Frame header descriptor is corrupted (we don't check frame header and length) + let test_vec = uN[112]:0x1234567890ABCDEF_1234_09_CA; + let (_, _, ok) = parse_frame_header(test_vec); + assert_eq(ok, u1:0); + + // Large window size + let test_vec = uN[112]:0xd310; + let (frame_header_result, len, ok) = parse_frame_header(test_vec); + assert_eq(frame_header_result, FrameHeader { + window_size: u64:0x1600000000, + ..zero!() + }); + assert_eq(len, u4:2); + assert_eq(ok, u1:1); + + // Large window size + let test_vec = uN[112]:0xf45b5b5b0db1; + let (frame_header_result, len, ok) = parse_frame_header(test_vec); + assert_eq(frame_header_result, FrameHeader { + window_size: u64:0xf45b5b5b, + frame_content_size: u64:0xf45b5b5b, + dictionary_id: u32:0xD, + content_checksum_flag: u1:0, + }); + assert_eq(len, u4:6); + assert_eq(ok, u1:1); + + // Large window size + let test_vec = uN[112]:0xc0659db6813a16b33f3da53a79e4; + let (frame_header_result, len, ok) = parse_frame_header(test_vec); + assert_eq(frame_header_result, FrameHeader { + window_size: u64:0x3a16b33f3da53a79, + frame_content_size: u64:0x3a16b33f3da53a79, + dictionary_id: u32:0, + content_checksum_flag: u1:1, + }); + assert_eq(len, u4:9); + assert_eq(ok, u1:1); +} + + +enum FrameHeaderDecoderFsm: u1 { + RECV = 0, + RESP = 1 +} + +// Magic number value, as in: +// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1 +const MAGIC_NUMBER = u32:0xFD2FB528; +const MAGIC_NUMBER_LEN = u32:4; + +const MAX_HEADER_LEN = u32:14; +const MAX_MAGIC_PLUS_HEADER_LEN = MAGIC_NUMBER_LEN + MAX_HEADER_LEN; + +struct FrameHeaderDecoderState { + fsm: FrameHeaderDecoderFsm, + xfers: u32, + raw_header: uN[XFER_SIZE][XFER_COUNT], +} + +pub proc FrameHeaderDecoder< + WINDOW_LOG_MAX: u32, + DATA_W: u32, + ADDR_W: u32, + XFERS_FOR_HEADER: u32 = {((MAX_MAGIC_PLUS_HEADER_LEN * u32:8) / DATA_W) + u32:1}, +> { + type State = FrameHeaderDecoderState; + type Fsm = FrameHeaderDecoderFsm; + type Req = FrameHeaderDecoderReq; + type Resp = FrameHeaderDecoderResp; + type ReaderReq = mem_reader::MemReaderReq; + type ReaderResp = mem_reader::MemReaderResp; + + reader_req_s: chan out; + reader_resp_r: chan in; + + decode_req_r: chan in; + decode_resp_s: chan out; + + config( + reader_req_s: chan out, + reader_resp_r: chan in, + decode_req_r: chan in, + decode_resp_s: chan out + ) { + (reader_req_s, reader_resp_r, decode_req_r, decode_resp_s) + } + + init { zero!() } + + next(state: State) { + type ReaderReq = mem_reader::MemReaderReq; + type State = FrameHeaderDecoderState; + + let tok0 = join(); + let (tok_req, req, do_req) = recv_non_blocking(tok0, decode_req_r, zero!()); + send_if(tok_req, reader_req_s, do_req, ReaderReq { addr: req.addr, length: MAX_MAGIC_PLUS_HEADER_LEN as uN[ADDR_W] }); + + let do_recv = (state.fsm == Fsm::RECV); + let (tok, resp, recvd) = recv_if_non_blocking(tok0, reader_resp_r, do_recv, zero!()); + + let do_resp = (state.fsm == Fsm::RESP); + let raw_header_bits = state.raw_header as uN[DATA_W * XFERS_FOR_HEADER]; + let raw_magic_number = raw_header_bits[:s32:8 * MAGIC_NUMBER_LEN as s32]; + let raw_header = raw_header_bits[s32:8 * MAGIC_NUMBER_LEN as s32 : s32:8 * MAX_MAGIC_PLUS_HEADER_LEN as s32]; + let magic_number_ok = raw_magic_number == MAGIC_NUMBER; + let (decoded_header, header_len, header_ok) = parse_frame_header(raw_header); + + let status = if (!header_ok || !magic_number_ok) { + FrameHeaderDecoderStatus::CORRUPTED + } else if (!window_size_valid(decoded_header.window_size)) { + FrameHeaderDecoderStatus::UNSUPPORTED_WINDOW_SIZE + } else { + FrameHeaderDecoderStatus::OKAY + }; + + let header_result = FrameHeaderDecoderResp { + status: status, + header: decoded_header, + length: header_len as u5 + MAGIC_NUMBER_LEN as u5, + }; + + send_if(tok0, decode_resp_s, do_resp, header_result); + + let next_state = match (state.fsm) { + Fsm::RECV => { + if (recvd) { + // raw_header is updated from the highest to lowest index because + // highest index in an array contains least significant bytes when + // casting to a bit vector + let update_idx = XFERS_FOR_HEADER - state.xfers - u32:1; + let next_raw_header = update(state.raw_header, update_idx, resp.data); + if (resp.last) { + State { raw_header: next_raw_header, fsm: Fsm::RESP, ..state } + } else { + State { raw_header: next_raw_header, xfers: state.xfers + u32:1, ..state } + } + } else { + state + } + }, + Fsm::RESP => { + State { fsm: Fsm::RECV, xfers: u32:0, ..state } + }, + _ => fail!("FrameHeaderDecoder_fsm_unreachable", zero!()) + }; + + next_state + } +} + +// The largest allowed WindowLog for DSLX tests +pub const TEST_WINDOW_LOG_MAX = u32:22; +pub const TEST_DATA_W = u32:32; +pub const TEST_ADDR_W = u32:16; +pub const TEST_XFERS_FOR_HEADER = ((MAX_MAGIC_PLUS_HEADER_LEN * u32:8) / TEST_DATA_W) + u32:1; + +#[test_proc] +proc FrameHeaderDecoderTest { + type Req = FrameHeaderDecoderReq; + type Resp = FrameHeaderDecoderResp; + type ReaderReq = mem_reader::MemReaderReq; + type ReaderResp = mem_reader::MemReaderResp; + + terminator: chan out; + + reader_req_r: chan in; + reader_resp_s: chan out; + + decode_req_s: chan out; + decode_resp_r: chan in; + + config(terminator: chan out) { + let (reader_req_s, reader_req_r) = chan("reader_req"); + let (reader_resp_s, reader_resp_r) = chan("reader_resp"); + let (decode_req_s, decode_req_r) = chan("decode_req"); + let (decode_resp_s, decode_resp_r) = chan("decode_resp"); + spawn FrameHeaderDecoder( + reader_req_s, + reader_resp_r, + decode_req_r, + decode_resp_s + ); + (terminator, reader_req_r, reader_resp_s, decode_req_s, decode_resp_r) + } + + init {} + + next(state: ()) { + let tok = join(); + let tests: (u32[TEST_XFERS_FOR_HEADER], FrameHeaderDecoderResp)[7] = [ + ( + // normal case + [u32:0xFD2FB528, u32:0xCAFE_09_C2, u32:0x90ABCDEF, u32:0x12345678, u32:0x0], + FrameHeaderDecoderResp { + header: FrameHeader { + window_size: u64:0x900, + frame_content_size: u64:0x1234567890ABCDEF, + dictionary_id: u32:0xCAFE, + content_checksum_flag: u1:0, + }, + status: FrameHeaderDecoderStatus::OKAY, + length: u5:16 + }, + ), ( + // SingleSegmentFlag is set + [u32:0xFD2FB528, u32:0xAA20, u32:0x0, u32:0x0, u32:0x0], + FrameHeaderDecoderResp { + header: FrameHeader { + window_size: u64:0xaa, + frame_content_size: u64:0xaa, + dictionary_id: u32:0x0, + content_checksum_flag: u1:0, + }, + status: FrameHeaderDecoderStatus::OKAY, + length: u5:6 + }, + ), ( + // SingleSegmentFlag is set and FrameContentSize is bigger than accepted window_size + [u32:0xFD2FB528, u32:0xEF_CAFE_E2, u32:0x7890ABCD, u32:0x123456, u32:0x0], + FrameHeaderDecoderResp { + header: FrameHeader { + window_size: u64:0x1234567890ABCDEF, + frame_content_size: u64:0x1234567890ABCDEF, + dictionary_id: u32:0xCAFE, + content_checksum_flag: u1:0, + }, + status: FrameHeaderDecoderStatus::UNSUPPORTED_WINDOW_SIZE, + length: u5:15 + }, + ), ( + // Frame header descriptor is corrupted (we don't check 'header' and 'length' fields) + [u32:0xFD2FB528, u32:0x1234_09_CA, u32:0x90ABCDEF, u32:0x12345678, u32:0x0], + FrameHeaderDecoderResp { + header: zero!(), + status: FrameHeaderDecoderStatus::CORRUPTED, + length: u5:0 + }, + ), ( + // Window size required by frame is too big for given decoder configuration + [u32:0xFD2FB528, u32:0xD310, u32:0x0, u32:0x0, u32:0x0], + FrameHeaderDecoderResp { + header: FrameHeader { + window_size: u64:0x1600000000, + ..zero!() + }, + status: FrameHeaderDecoderStatus::UNSUPPORTED_WINDOW_SIZE, + length: u5:6 + }, + ), ( + // Window size required by frame is too big for given decoder configuration + [u32:0xFD2FB528, u32:0x5B5B0DB1, u32:0xF45B, u32:0x0, u32:0x0], + FrameHeaderDecoderResp { + header: FrameHeader { + window_size: u64:0xf45b5b5b, + frame_content_size: u64:0xf45b5b5b, + dictionary_id: u32:0xD, + content_checksum_flag: u1:0, + }, + status: FrameHeaderDecoderStatus::UNSUPPORTED_WINDOW_SIZE, + length: u5:10 + }, + ), ( + // Window size required by frame is too big for given decoder configuration + [u32:0xFD2FB528, u32:0xA53A79E4, u32:0x16B33F3D, u32:0x9DB6813A, u32:0xC065], + FrameHeaderDecoderResp { + header: FrameHeader { + window_size: u64:0x3a16b33f3da53a79, + frame_content_size: u64:0x3a16b33f3da53a79, + dictionary_id: u32:0, + content_checksum_flag: u1:1, + }, + status: FrameHeaderDecoderStatus::UNSUPPORTED_WINDOW_SIZE, + length: u5:13 + } + ) + ]; + + const ADDR = u16:0x1234; + let tok = for ((_, (test_vec, expected)), tok): ((u32, (u32[TEST_XFERS_FOR_HEADER], FrameHeaderDecoderResp)), token) in enumerate(tests) { + let tok = send(tok, decode_req_s, FrameHeaderDecoderReq { addr: ADDR }); + let (tok, recv_data) = recv(tok, reader_req_r); + + assert_eq(recv_data, ReaderReq { addr: ADDR, length: MAX_MAGIC_PLUS_HEADER_LEN as u16 }); + + let tok = for ((j, word), tok): ((u32, u32), token) in enumerate(test_vec) { + let last = j + u32:1 == array_size(test_vec); + send(tok, reader_resp_s, ReaderResp { + status: mem_reader::MemReaderStatus::OKAY, + data: word, + length: if !last { (TEST_DATA_W / u32:8) as u16 } else { (MAX_MAGIC_PLUS_HEADER_LEN % TEST_XFERS_FOR_HEADER) as u16 }, + last: last, + }) + }(tok); + + let (tok, recv_data) = recv(tok, decode_resp_r); + if (recv_data.status == FrameHeaderDecoderStatus::OKAY || recv_data.status == FrameHeaderDecoderStatus::UNSUPPORTED_WINDOW_SIZE) { + assert_eq(recv_data, expected); + } else { + // if the header is corrupted we don't offer any guarantees + // about its contents so we just check that the status matches + assert_eq(recv_data.status, expected.status); + }; + + tok + }(tok); + + send(tok, terminator, true); + } +} + + +// Largest allowed WindowLog accepted by libzstd decompression function +// https://github.com/facebook/zstd/blob/v1.4.7/lib/decompress/zstd_decompress.c#L296 +// Use only in C++ tests when comparing DSLX ZSTD Decoder with libzstd +pub const TEST_WINDOW_LOG_MAX_LIBZSTD = u32:30; + +proc FrameHeaderDecoderInst { + type Req = FrameHeaderDecoderReq; + type Resp = FrameHeaderDecoderResp; + type ReaderReq = mem_reader::MemReaderReq; + type ReaderResp = mem_reader::MemReaderResp; + + reader_req_s: chan out; + reader_resp_r: chan in; + + decode_req_r: chan in; + decode_resp_s: chan out; + + config( + reader_req_s: chan out, + reader_resp_r: chan in, + decode_req_r: chan in, + decode_resp_s: chan out, + ) { + spawn FrameHeaderDecoder( + reader_req_s, + reader_resp_r, + decode_req_r, + decode_resp_s + ); + (reader_req_s, reader_resp_r, decode_req_r, decode_resp_s) + } + + init {} + + next(state: ()) {} +} diff --git a/xls/modules/zstd/frame_header_test.cc b/xls/modules/zstd/frame_header_test.cc deleted file mode 100644 index 55530c80f5..0000000000 --- a/xls/modules/zstd/frame_header_test.cc +++ /dev/null @@ -1,407 +0,0 @@ -// Copyright 2024 The XLS Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this kFile except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include - -// NOLINTBEGIN(build/include_order) - Silence include order warnings. -#include "xls/simulation/sim_test_base.h" -#define ZSTD_STATIC_LINKING_ONLY 1 - -#include -#include -#include -#include -#include // NOLINT -#include -#include -#include -// NOLINTEND(build/include_order) - Silence include order warnings. - -#include "gtest/gtest.h" -#include "xls/common/fuzzing/fuzztest.h" -#include "absl/container/flat_hash_map.h" -#include "absl/status/statusor.h" -#include "absl/types/span.h" -#include "xls/common/file/filesystem.h" -#include "xls/common/file/get_runfile_path.h" -#include "xls/common/status/matchers.h" -#include "xls/common/status/ret_check.h" -#include "xls/dslx/create_import_data.h" -#include "xls/dslx/import_data.h" -#include "xls/dslx/ir_convert/convert_options.h" -#include "xls/dslx/ir_convert/ir_converter.h" -#include "xls/dslx/parse_and_typecheck.h" -#include "xls/dslx/type_system/parametric_env.h" -#include "xls/ir/bits.h" -#include "xls/ir/value.h" -#include "xls/modules/zstd/data_generator.h" -#include "external/zstd/lib/zstd.h" -#include "external/zstd/lib/zstd_errors.h" - -namespace xls { -namespace { - -// Must be in sync with FrameHeaderStatus from -// xls/modules/zstd/frame_header.x -enum FrameHeaderStatus : uint8_t { - OK, - CORRUPTED, - NO_ENOUGH_DATA, - UNSUPPORTED_WINDOW_SIZE -}; - -class ZstdFrameHeader { - public: - absl::Span buffer() const { - return absl::MakeConstSpan(buffer_); - } - - ZSTD_frameHeader header() const { return header_; } - - size_t result() const { return result_; } - - ZstdFrameHeader(absl::Span buffer, ZSTD_frameHeader h, - size_t r) - : header_(h), result_(r) { - std::vector v(buffer.begin(), buffer.end()); - buffer_ = v; - } - // Parse a frame header from an arbitrary buffer with the ZSTD library. - static absl::StatusOr Parse( - absl::Span buffer) { - XLS_RET_CHECK(!buffer.empty()); - XLS_RET_CHECK(buffer.data() != nullptr); - ZSTD_frameHeader zstd_fh; - size_t result = ZSTD_getFrameHeader_advanced( - &zstd_fh, buffer.data(), buffer.size(), ZSTD_f_zstd1_magicless); - return ZstdFrameHeader(buffer, zstd_fh, result); - } - - private: - std::vector buffer_; - ZSTD_frameHeader header_; - size_t result_; -}; - -class FrameHeaderTest : public xls::SimTestBase { - public: - // Prepare simulation environment - void SetUp() override { - XLS_ASSERT_OK_AND_ASSIGN(std::filesystem::path path, - xls::GetXlsRunfilePath(this->kFile)); - XLS_ASSERT_OK_AND_ASSIGN(std::string module_text, - xls::GetFileContents(path)); - - auto import_data = xls::dslx::CreateImportDataForTest(); - XLS_ASSERT_OK_AND_ASSIGN( - xls::dslx::TypecheckedModule checked_module, - xls::dslx::ParseAndTypecheck(module_text, this->kFileName, - this->kModuleName, &import_data)); - - auto options = xls::dslx::ConvertOptions{}; - /* FIXME: The following code should work with a parametrized version of - * the `parse_frame_header` function. However, it seems that - * the symbolic_bindings are not correctly propagated inside - * ConvertOneFunction. To leverage the problem, a simple specialization - * of the function is used (`parse_frame_header_128`). - * Once the problem is solved, we can restore the code below. - */ - // auto symbolic_bindings = xls::dslx::ParametricEnv( - // absl::flat_hash_map{ - // {"CAPACITY", xls::dslx::InterpValue::MakeUBits(/*bit_count=*/32, - // /*value=*/32)}}); - dslx::ParametricEnv* symbolic_bindings = nullptr; - XLS_ASSERT_OK_AND_ASSIGN( - this->converted, xls::dslx::ConvertOneFunction( - checked_module.module, kFunctionName, &import_data, - symbolic_bindings, options)); - } - - // Prepare inputs for DSLX simulation based on the given zstd header, - // form the expected output from the simulation, - // run the simulation of frame header parser and compare the results against - // expected values. - void RunAndExpectFrameHeader(const ZstdFrameHeader& zstd_frame_header) { - // Extend buffer contents to 128 bits if necessary. - const absl::Span buffer = zstd_frame_header.buffer(); - std::vector buffer_extended(kDslxBufferSizeBytes, 0); - absl::Span input_buffer; - if (buffer.size() < kDslxBufferSizeBytes) { - std::copy(buffer.begin(), buffer.end(), buffer_extended.begin()); - input_buffer = absl::MakeSpan(buffer_extended); - } else { - input_buffer = buffer; - } - - // Decide on the expected status - ZSTD_frameHeader zstd_fh = zstd_frame_header.header(); - size_t result = zstd_frame_header.result(); - FrameHeaderStatus expected_status = FrameHeaderStatus::OK; - if (result != 0) { - if (ZSTD_isError(result)) { - switch (ZSTD_getErrorCode(result)) { - case ZSTD_error_frameParameter_windowTooLarge: - expected_status = FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE; - break; - case ZSTD_error_frameParameter_unsupported: - // Occurs when reserved_bit == 1, should result in CORRUPTED state - default: - // Provided data is corrupted. Unable to correctly parse ZSTD frame. - expected_status = FrameHeaderStatus::CORRUPTED; - break; - } - } else { - // Provided data is to small to correctly parse ZSTD frame, should - // have `result` bytes, got `buffer.size()` bytes. - expected_status = FrameHeaderStatus::NO_ENOUGH_DATA; - } - // Make sure that the FCS does not exceed max window buffer size - // Frame Header decoding failed - Special case - difference between the - // reference library and the decoder - } else if (!window_size_valid(zstd_fh.windowSize)) { - expected_status = FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE; - } - - auto input = CreateDslxSimulationInput(buffer.size(), input_buffer); - absl::flat_hash_map hashed_input = {{"buffer", input}}; - - auto expected_frame_header_result = CreateExpectedFrameHeaderResult( - &zstd_fh, input, buffer, expected_status); - - RunAndExpectEq(hashed_input, expected_frame_header_result, this->converted, - true, true); - } - - const std::string_view kFile = "xls/modules/zstd/frame_header_test.x"; - const std::string_view kModuleName = "frame_header_test"; - const std::string_view kFileName = "frame_header_test.x"; - const std::string_view kFunctionName = "parse_frame_header_128"; - std::string converted; - - private: - static const size_t kDslxBufferSize = 128; - static const size_t kDslxBufferSizeBytes = - (kDslxBufferSize + CHAR_BIT - 1) / CHAR_BIT; - - // Largest allowed WindowLog accepted by libzstd decompression function - // https://github.com/facebook/zstd/blob/v1.5.6/lib/decompress/zstd_decompress.c#L515 - // Use only in C++ tests when comparing DSLX ZSTD Decoder with libzstd - // Must be in sync with kTestWindowLogMaxLibZstd in frame_header_test.x - const uint64_t kTestWindowLogMaxLibZstd = 30; - - // Maximal mantissa value for calculating maximal accepted window_size - // as per https://datatracker.ietf.org/doc/html/rfc8878#name-window-descriptor - const uint64_t kMaxMantissa = 0b111; - - // Calculate maximal accepted window_size for given WINDOW_LOG_MAX and return - // whether given window_size should be accepted or discarded. Based on - // window_size calculation from: RFC 8878 - // https://datatracker.ietf.org/doc/html/rfc8878#name-window-descriptor - bool window_size_valid(uint64_t window_size) { - auto max_window_size = - (1 << kTestWindowLogMaxLibZstd) + - (((1 << kTestWindowLogMaxLibZstd) >> 3) * kMaxMantissa); - - return window_size <= max_window_size; - } - - // Form DSLX Value representing ZSTD Frame header based on data parsed with - // ZSTD library. Represents DSLX struct `FrameHeader`. - Value CreateExpectedFrameHeader(ZSTD_frameHeader* fh, - FrameHeaderStatus expected_status) { - if (expected_status == FrameHeaderStatus::CORRUPTED || - expected_status == FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE) { - return Value::Tuple({ - /*window_size=*/Value(UBits(0, 64)), - /*frame_content_size=*/Value(UBits(0, 64)), - /*dictionary_id=*/Value(UBits(0, 32)), - /*content_checksum_flag=*/Value(UBits(0, 1)), - }); - } - return Value::Tuple({ - /*window_size=*/Value(UBits(fh->windowSize, 64)), - /*frame_content_size=*/Value(UBits(fh->frameContentSize, 64)), - /*dictionary_id=*/Value(UBits(fh->dictID, 32)), - /*content_checksum_flag=*/Value(UBits(fh->checksumFlag, 1)), - }); - } - - // Create DSLX Value representing Buffer contents after parsing frame header - // in simulation. Represents DSLX struct `Buffer`. - Value CreateExpectedBuffer(Value dslx_simulation_input, - absl::Span input_buffer, - size_t consumed_bytes_count, - FrameHeaderStatus expected_status) { - // Return original buffer contents - if (expected_status == FrameHeaderStatus::NO_ENOUGH_DATA) { - return dslx_simulation_input; - } - // Critical failure - return empty buffer - if (expected_status == FrameHeaderStatus::CORRUPTED || - expected_status == FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE) { - return Value::Tuple({/*contents:*/ Value(UBits(0, kDslxBufferSize)), - /*length:*/ Value(UBits(0, 32))}); - } - - // Frame Header parsing succeeded. Expect output buffer contents with - // removed first `consumed_bytes_count` bytes and extended to - // kDslxBufferSize if necessary - size_t bytes_to_extend = - kDslxBufferSizeBytes - (input_buffer.size() - consumed_bytes_count); - std::vector output_buffer(input_buffer.begin() + consumed_bytes_count, - input_buffer.end()); - for (int i = 0; i < bytes_to_extend; i++) { - output_buffer.push_back(0); - } - - auto expected_buffer_contents = - Value(Bits::FromBytes(output_buffer, kDslxBufferSize)); - size_t output_buffer_size_bits = - (input_buffer.size() - consumed_bytes_count) * CHAR_BIT; - size_t expected_buffer_size = output_buffer_size_bits > kDslxBufferSize - ? kDslxBufferSize - : output_buffer_size_bits; - - return Value::Tuple({/*contents:*/ expected_buffer_contents, - /*length:*/ Value(UBits(expected_buffer_size, 32))}); - } - - // Prepare DSLX Value representing Full Result of frame header parsing - // simulation. It consists of expected status, parsing result and buffer - // contents after parsing. Represents DSLX struct `FrameHeaderResult`. - Value CreateExpectedFrameHeaderResult(ZSTD_frameHeader* fh, - Value dslx_simulation_input, - absl::Span input_buffer, - FrameHeaderStatus expected_status) { - auto expected_buffer = - CreateExpectedBuffer(std::move(dslx_simulation_input), input_buffer, - fh->headerSize, expected_status); - auto expected_frame_header = CreateExpectedFrameHeader(fh, expected_status); - return Value::Tuple({/*status:*/ Value(UBits(expected_status, 2)), - /*header:*/ expected_frame_header, - /*buffer:*/ expected_buffer}); - } - - // Return DSLX Value used as input argument for running frame header parsing - // simulation. Represents DSLX struct `Buffer`. - Value CreateDslxSimulationInput(size_t buffer_size, - absl::Span input_buffer) { - size_t size = buffer_size; - - // ignore buffer contents that won't fit into specialized buffer - if (buffer_size > kDslxBufferSizeBytes) { - size = kDslxBufferSizeBytes; - } - - return Value::Tuple( - {/*contents:*/ Value(Bits::FromBytes(input_buffer, kDslxBufferSize)), - /*length:*/ Value(UBits(size * CHAR_BIT, 32))}); - } -}; - -/* TESTS */ - -TEST_F(FrameHeaderTest, Success) { - XLS_ASSERT_OK_AND_ASSIGN( - auto header, - ZstdFrameHeader::Parse({0xC2, 0x09, 0xFE, 0xCA, 0xEF, 0xCD, 0xAB, 0x90, - 0x78, 0x56, 0x34, 0x12})); - this->RunAndExpectFrameHeader(header); -} - -TEST_F(FrameHeaderTest, FailCorruptedReservedBit) { - XLS_ASSERT_OK_AND_ASSIGN( - auto header, ZstdFrameHeader::Parse({0xEA, 0xFE, 0xCA, 0xEF, 0xCD, 0xAB, - 0x90, 0x78, 0x56, 0x34, 0x12})); - this->RunAndExpectFrameHeader(header); -} - -TEST_F(FrameHeaderTest, FailUnsupportedWindowSizeTooBig) { - XLS_ASSERT_OK_AND_ASSIGN(auto header, ZstdFrameHeader::Parse({0x10, 0xD3})); - this->RunAndExpectFrameHeader(header); -} - -TEST_F(FrameHeaderTest, FailNoEnoughData) { - XLS_ASSERT_OK_AND_ASSIGN(auto header, ZstdFrameHeader::Parse({0xD3, 0xED})); - this->RunAndExpectFrameHeader(header); -} - -// NO_ENOUGH_DATA has priority over CORRUPTED from reserved bit -TEST_F(FrameHeaderTest, FailNoEnoughDataReservedBit) { - XLS_ASSERT_OK_AND_ASSIGN(auto header, ZstdFrameHeader::Parse({0xED, 0xD3})); - this->RunAndExpectFrameHeader(header); -} - -TEST_F(FrameHeaderTest, FailUnsupportedFrameContentSizeThroughSingleSegment) { - XLS_ASSERT_OK_AND_ASSIGN( - auto header, ZstdFrameHeader::Parse({0261, 015, 91, 91, 91, 0364})); - this->RunAndExpectFrameHeader(header); -} - -TEST_F(FrameHeaderTest, - FailUnsupportedVeryLargeFrameContentSizeThroughSingleSegment) { - XLS_ASSERT_OK_AND_ASSIGN( - auto header, - ZstdFrameHeader::Parse({0344, 'y', ':', 0245, '=', '?', 0263, 0026, ':', - 0201, 0266, 0235, 'e', 0300})); - this->RunAndExpectFrameHeader(header); -} - -TEST_F(FrameHeaderTest, FailUnsupportedWindowSize) { - XLS_ASSERT_OK_AND_ASSIGN( - auto header, - ZstdFrameHeader::Parse({'S', 0301, 'i', 0320, 0, 0256, 'd', 'D', 0226, - 'F', 'Z', 'Z', 0332, 0370, 'A'})); - this->RunAndExpectFrameHeader(header); -} - -class FrameHeaderSeededTest : public FrameHeaderTest, - public ::testing::WithParamInterface { - public: - static const uint32_t random_headers_count = 50; -}; - -// Test `random_headers_count` instances of randomly generated valid -// frame headers, generated with `decodecorpus` tool. -TEST_P(FrameHeaderSeededTest, ParseMultipleFrameHeaders) { - auto seed = GetParam(); - XLS_ASSERT_OK_AND_ASSIGN(auto buffer, zstd::GenerateFrameHeader(seed, false)); - XLS_ASSERT_OK_AND_ASSIGN(auto frame_header, ZstdFrameHeader::Parse(buffer)); - this->RunAndExpectFrameHeader(frame_header); -} - -INSTANTIATE_TEST_SUITE_P( - FrameHeaderSeededTest, FrameHeaderSeededTest, - ::testing::Range(0, FrameHeaderSeededTest::random_headers_count)); - -class FrameHeaderFuzzTest - : public fuzztest::PerFuzzTestFixtureAdapter { - public: - void ParseMultipleRandomFrameHeaders(const std::vector& buffer) { - auto frame_header = ZstdFrameHeader::Parse(buffer); - XLS_ASSERT_OK(frame_header); - this->RunAndExpectFrameHeader(frame_header.value()); - } -}; - -// Perform UNDETERMINISTIC FuzzTests with input vectors of variable length and -// contents. Frame Headers generated by FuzzTests can be invalid. -// This test checks if negative cases are handled correctly. -FUZZ_TEST_F(FrameHeaderFuzzTest, ParseMultipleRandomFrameHeaders) - .WithDomains(fuzztest::Arbitrary>() - .WithMinSize(1) - .WithMaxSize(16)); - -} // namespace -} // namespace xls diff --git a/xls/modules/zstd/frame_header_test.x b/xls/modules/zstd/frame_header_test.x deleted file mode 100644 index 9216dfab8d..0000000000 --- a/xls/modules/zstd/frame_header_test.x +++ /dev/null @@ -1,30 +0,0 @@ -// Copyright 2024 The XLS Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -import std; -import xls.modules.zstd.buffer as buff; -import xls.modules.zstd.frame_header as frame_header; - -type Buffer = buff::Buffer; -type FrameHeaderResult = frame_header::FrameHeaderResult; -type WindowSize = frame_header::WindowSize; - -// Largest allowed WindowLog accepted by libzstd decompression function -// https://github.com/facebook/zstd/blob/v1.4.7/lib/decompress/zstd_decompress.c#L296 -// Use only in C++ tests when comparing DSLX ZSTD Decoder with libzstd -pub const TEST_WINDOW_LOG_MAX_LIBZSTD = WindowSize:30; - -pub fn parse_frame_header_128(buffer: Buffer<128>) -> FrameHeaderResult<128> { - frame_header::parse_frame_header(buffer) -} diff --git a/xls/modules/zstd/hash_table.x b/xls/modules/zstd/hash_table.x new file mode 100644 index 0000000000..4ed9211397 --- /dev/null +++ b/xls/modules/zstd/hash_table.x @@ -0,0 +1,475 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import std; + +import xls.examples.ram; + +// FIXME: Use default parameter value when fixed (https://github.com/google/xls/issues/1425) +// SIZE_W:u32 = {std::clog2(SIZE + u32:1)} +pub struct HashTableReadReq< + KEY_W: u32, SIZE: u32, SIZE_W:u32 +> { + num_entries_log2: uN[SIZE_W], // number of HashTable entries used in the runtime + key: uN[KEY_W], +} + +pub struct HashTableReadResp { + is_match: bool, + value: uN[VALUE_W] +} + +// FIXME: Use default parameter value when fixed (https://github.com/google/xls/issues/1425) +// SIZE_W:u32 = {std::clog2(SIZE + u32:1)} +pub struct HashTableWriteReq< + KEY_W: u32, VALUE_W: u32, SIZE: u32, SIZE_W:u32 +> { + num_entries_log2: uN[SIZE_W], // number of HashTable entries used in the runtime + key: uN[KEY_W], + value: uN[VALUE_W], +} + +pub struct HashTableWriteResp {} + +fn knuth_hash_slow(key: uN[KEY_W]) -> uN[HASH_W] { + (((key * CONSTANT) as u32) >> (u32:32 - HASH_W)) as uN[HASH_W] +} + +fn knuth_hash(key: uN[KEY_W]) -> uN[HASH_W] { + let result = for (i, result): (u32, uN[KEY_W]) in range(u32:0, u32:32) { + if (CONSTANT >> i) as u1 { result + (key << i) } else { result } + }(uN[KEY_W]:0); + + (result >> (u32:32 - HASH_W)) as uN[HASH_W] +} + +#[test] +fn knuth_hash_check() { + const KNUTH_CONSTANT = u32:0x1e35a7bd; + const HASH_W = u32:32; + + for (i, ()) in range(u32:0, u32:1 << u32:7) { + let hash_slow = knuth_hash_slow(i); + let hash_fast = knuth_hash(i); + assert_eq(hash_slow, hash_fast); + }(()); +} + +struct RamData { + value: uN[VALUE_W], + valid: bool, +} + +proc HashTableReadReqHandler< + KEY_W: u32, VALUE_W: u32, SIZE: u32, KNUTH_CONSTANT: u32, + SIZE_W: u32 = {std::clog2(SIZE + u32:1)}, + HASH_W: u32 = {std::clog2(SIZE)}, + RAM_DATA_W: u32 = {VALUE_W + u32:1}, + RAM_NUM_PARTITIONS: u32 = {ram::num_partitions(u32:1, RAM_DATA_W)} +> { + type ReadReq = HashTableReadReq; + type RamReadReq = ram::ReadReq; + + read_req_r: chan in; + ram_read_req_s: chan out; + + config( + read_req_r: chan in, + ram_read_req_s: chan out + ) { + (read_req_r, ram_read_req_s) + } + + init { } + + next(state: ()) { + let tok = join(); + + let (tok_read, read_req, read_req_valid) = + recv_non_blocking(tok, read_req_r, zero!()); + + let ram_read_req = if read_req_valid { + let hash_mask = (uN[HASH_W]:1 << read_req.num_entries_log2) - uN[HASH_W]:1; + let hash = knuth_hash(read_req.key) & hash_mask; + RamReadReq { addr: hash, mask: !uN[RAM_NUM_PARTITIONS]:0 } + } else { + zero!() + }; + + send_if(tok_read, ram_read_req_s, read_req_valid, ram_read_req); + } +} + +proc HashTableReadRespHandler< + VALUE_W: u32, + RAM_DATA_W: u32 = {VALUE_W + u32:1} // value width + data valid width, +> { + type RamReadResp = ram::ReadResp; + type ReadResp = HashTableReadResp; + + ram_read_resp_r: chan in; + read_resp_s: chan out; + + config( + ram_read_resp_r: chan in, + read_resp_s: chan out + ) { + (ram_read_resp_r, read_resp_s) + } + + init { } + + next(state: ()) { + let tok = join(); + + let (tok, ram_read_resp, ram_read_resp_valid) = + recv_non_blocking(tok, ram_read_resp_r, zero!()); + + let read_resp = if ram_read_resp_valid { + let ram_data = RamData { + value: (ram_read_resp.data >> u32:1) as uN[VALUE_W], + valid: ram_read_resp.data as u1, + }; + ReadResp { + is_match: ram_data.valid, + value: ram_data.value + } + } else { + zero!() + }; + + send_if(tok, read_resp_s, ram_read_resp_valid, read_resp); + } +} + +proc HashTableWriteReqHandler< + KEY_W: u32, VALUE_W: u32, SIZE: u32, KNUTH_CONSTANT: u32, + SIZE_W: u32 = {std::clog2(SIZE + u32:1)}, + HASH_W: u32 = {std::clog2(SIZE)}, + RAM_DATA_W: u32 = {VALUE_W + u32:1}, + RAM_NUM_PARTITIONS: u32 = {ram::num_partitions(u32:1, RAM_DATA_W)} +> { + type WriteReq = HashTableWriteReq; + type RamWriteReq = ram::WriteReq; + + write_req_r: chan in; + ram_write_req_s: chan out; + + config( + write_req_r: chan in, + ram_write_req_s: chan out + ) { + (write_req_r, ram_write_req_s) + } + + init { } + + next(state: ()) { + let tok = join(); + + let (tok_write, write_req, write_req_valid) = + recv_non_blocking(tok, write_req_r, zero!()); + + let ram_write_req = if write_req_valid { + let hash_mask = (uN[HASH_W]:1 << write_req.num_entries_log2) - uN[HASH_W]:1; + let hash = knuth_hash(write_req.key) & hash_mask; + let data = write_req.value ++ true; + RamWriteReq { addr: hash, data, mask: !uN[RAM_NUM_PARTITIONS]:0 } + } else { + zero!() + }; + + send_if(tok_write, ram_write_req_s, write_req_valid, ram_write_req); + } +} + +proc HashTableWriteRespHandler { + type RamWriteResp = ram::WriteResp; + type WriteResp = HashTableWriteResp; + + ram_write_resp_r: chan in; + write_resp_s: chan out; + + config( + ram_write_resp_r: chan in, + write_resp_s: chan out + ) { + (ram_write_resp_r, write_resp_s) + } + + init { } + + next(state: ()) { + let tok = join(); + + let (tok, _, ram_write_resp_valid) = + recv_non_blocking(tok, ram_write_resp_r, zero!()); + + send_if(tok, write_resp_s, ram_write_resp_valid, WriteResp {}); + } +} + +pub proc HashTable< + KEY_W: u32, VALUE_W: u32, SIZE: u32, + SIZE_W: u32 = {std::clog2(SIZE + u32:1)}, + HASH_W: u32 = {std::clog2(SIZE)}, + KNUTH_CONSTANT: u32 = {u32:0x1e35a7bd}, + RAM_DATA_W: u32 = {VALUE_W + u32:1}, + RAM_NUM_PARTITIONS: u32 = {ram::num_partitions(u32:1, RAM_DATA_W)} +> { + type ReadReq = HashTableReadReq; + type ReadResp = HashTableReadResp; + type WriteReq = HashTableWriteReq; + type WriteResp = HashTableWriteResp; + + type RamReadReq = ram::ReadReq; + type RamReadResp = ram::ReadResp; + type RamWriteReq = ram::WriteReq; + type RamWriteResp = ram::WriteResp; + + config( + read_req_r: chan in, + read_resp_s: chan out, + write_req_r: chan in, + write_resp_s: chan out, + ram_read_req_s: chan out, + ram_read_resp_r: chan in, + ram_write_req_s: chan out, + ram_write_resp_r: chan in + ) { + spawn HashTableReadReqHandler< + KEY_W, VALUE_W, SIZE, KNUTH_CONSTANT, + // FIXME: Remove below parameters when resolving default values is fixed + SIZE_W, HASH_W, RAM_DATA_W, RAM_NUM_PARTITIONS, + >( + read_req_r, ram_read_req_s + ); + spawn HashTableReadRespHandler< + VALUE_W, + // FIXME: Remove below parameters when resolving default values is fixed + RAM_DATA_W, + >( + ram_read_resp_r, read_resp_s + ); + spawn HashTableWriteReqHandler< + KEY_W, VALUE_W, SIZE, KNUTH_CONSTANT, + // FIXME: Remove below parameters when resolving default values is fixed + SIZE_W, HASH_W, RAM_DATA_W, RAM_NUM_PARTITIONS, + >( + write_req_r, ram_write_req_s + ); + spawn HashTableWriteRespHandler( + ram_write_resp_r, write_resp_s + ); + } + + init { } + + next(state: ()) { } +} + +const INST_KEY_W = u32:32; +const INST_VALUE_W = u32:32; +const INST_SIZE = u32:512; +const INST_SIZE_W = std::clog2(INST_SIZE + u32:1); +const INST_HASH_W = std::clog2(INST_SIZE); +const INST_RAM_DATA_W = INST_VALUE_W + u32:1; +const INST_RAM_NUM_PARTITIONS = ram::num_partitions(u32:1, INST_RAM_DATA_W); + +proc HashTableInst { + type InstReadReq = HashTableReadReq; + type InstReadResp = HashTableReadResp; + type InstWriteReq = HashTableWriteReq; + type InstWriteResp = HashTableWriteResp; + + type InstRamReadReq = ram::ReadReq; + type InstRamReadResp = ram::ReadResp; + type InstRamWriteReq = ram::WriteReq; + type InstRamWriteResp = ram::WriteResp; + + config( + read_req_r: chan in, + read_resp_s: chan out, + write_req_r: chan in, + write_resp_s: chan out, + ram_read_req_s: chan out, + ram_read_resp_r: chan in, + ram_write_req_s: chan out, + ram_write_resp_r: chan in + ) { + spawn HashTable( + read_req_r, read_resp_s, + write_req_r, write_resp_s, + ram_read_req_s, ram_read_resp_r, + ram_write_req_s, ram_write_resp_r + ); + } + + init { } + + next(state: ()) { } +} + +const TEST_KEY_W = u32:32; +const TEST_VALUE_W = u32:32; +const TEST_SIZE = u32:512; +const TEST_SIZE_W = std::clog2(TEST_SIZE + u32:1); +const TEST_HASH_W = std::clog2(TEST_SIZE); +const TEST_RAM_DATA_W = TEST_VALUE_W + u32:1; +const TEST_WORD_PARTITION_SIZE = u32:1; +const TEST_RAM_NUM_PARTITIONS = ram::num_partitions(TEST_WORD_PARTITION_SIZE, TEST_RAM_DATA_W); +const TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR = ram::SimultaneousReadWriteBehavior::READ_BEFORE_WRITE; +const TEST_INITIALIZED = true; + +type TestReadReq = HashTableReadReq; +type TestReadResp = HashTableReadResp; +type TestWriteReq = HashTableWriteReq; +type TestWriteResp = HashTableWriteResp; + +type TestRamReadReq = ram::ReadReq; +type TestRamReadResp = ram::ReadResp; +type TestRamWriteReq = ram::WriteReq; +type TestRamWriteResp = ram::WriteResp; + +struct TestData { + num_entries_log2: uN[TEST_SIZE_W], + key: uN[TEST_KEY_W], + value: uN[TEST_VALUE_W] +} + +const TEST_DATA = TestData[32]:[ + TestData {num_entries_log2: uN[TEST_SIZE_W]:6, key: uN[TEST_KEY_W]:0x6109d84c, value: uN[TEST_VALUE_W]:0xdb370dd7}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:7, key: uN[TEST_KEY_W]:0xe773dc7f, value: uN[TEST_VALUE_W]:0xc8f9f817}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:8, key: uN[TEST_KEY_W]:0xd2254d4a, value: uN[TEST_VALUE_W]:0xa0b4c4bd}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:6, key: uN[TEST_KEY_W]:0x4c794548, value: uN[TEST_VALUE_W]:0x8a3e6693}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:3, key: uN[TEST_KEY_W]:0xed1884be, value: uN[TEST_VALUE_W]:0x1787d635}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:5, key: uN[TEST_KEY_W]:0x6c40cc5d, value: uN[TEST_VALUE_W]:0x1e0916a3}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:6, key: uN[TEST_KEY_W]:0xa7ad798c, value: uN[TEST_VALUE_W]:0x6efa1a96}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:3, key: uN[TEST_KEY_W]:0x8e3bb720, value: uN[TEST_VALUE_W]:0x6d0a7d57}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:6, key: uN[TEST_KEY_W]:0xbf9f7bd4, value: uN[TEST_VALUE_W]:0x46ff026c}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:3, key: uN[TEST_KEY_W]:0xd8c1cd03, value: uN[TEST_VALUE_W]:0xdb5b0ded}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:9, key: uN[TEST_KEY_W]:0xd1b33035, value: uN[TEST_VALUE_W]:0x7a21e0ed}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:5, key: uN[TEST_KEY_W]:0x8d512e0c, value: uN[TEST_VALUE_W]:0x708a536b}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:9, key: uN[TEST_KEY_W]:0x1a950036, value: uN[TEST_VALUE_W]:0x9097f883}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:3, key: uN[TEST_KEY_W]:0x00707a86, value: uN[TEST_VALUE_W]:0xbcb29fa7}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:6, key: uN[TEST_KEY_W]:0x2fcd78a1, value: uN[TEST_VALUE_W]:0x71bae380}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:8, key: uN[TEST_KEY_W]:0x34d8adc5, value: uN[TEST_VALUE_W]:0xdff20f62}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:5, key: uN[TEST_KEY_W]:0xd04ebdda, value: uN[TEST_VALUE_W]:0x9c785523}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:5, key: uN[TEST_KEY_W]:0x9b419a1a, value: uN[TEST_VALUE_W]:0xf1d27361}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:6, key: uN[TEST_KEY_W]:0x9eb7784d, value: uN[TEST_VALUE_W]:0x58a9d8f2}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:7, key: uN[TEST_KEY_W]:0x6d7499ef, value: uN[TEST_VALUE_W]:0x40387b18}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:8, key: uN[TEST_KEY_W]:0xb255d705, value: uN[TEST_VALUE_W]:0x73ecbb7b}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:8, key: uN[TEST_KEY_W]:0x132c9499, value: uN[TEST_VALUE_W]:0x48b85084}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:9, key: uN[TEST_KEY_W]:0xd3acf006, value: uN[TEST_VALUE_W]:0xbbd2f2b9}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:2, key: uN[TEST_KEY_W]:0x0dd951cd, value: uN[TEST_VALUE_W]:0x975ab3fe}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:6, key: uN[TEST_KEY_W]:0x3d6cd6b1, value: uN[TEST_VALUE_W]:0xe18f2e83}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:5, key: uN[TEST_KEY_W]:0xf511fadb, value: uN[TEST_VALUE_W]:0xb99e2ab4}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:2, key: uN[TEST_KEY_W]:0x90bea2bb, value: uN[TEST_VALUE_W]:0xc88b54c2}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:9, key: uN[TEST_KEY_W]:0xf2513572, value: uN[TEST_VALUE_W]:0x42ef67d9}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:9, key: uN[TEST_KEY_W]:0x2dd80b55, value: uN[TEST_VALUE_W]:0x3b399d05}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:9, key: uN[TEST_KEY_W]:0x823af460, value: uN[TEST_VALUE_W]:0x89d154ba}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:9, key: uN[TEST_KEY_W]:0x8ab8897e, value: uN[TEST_VALUE_W]:0xb30eb8c5}, + TestData {num_entries_log2: uN[TEST_SIZE_W]:2, key: uN[TEST_KEY_W]:0xe9499524, value: uN[TEST_VALUE_W]:0xb4a30d68}, +]; + + +#[test_proc] +proc HashTable_test { + terminator_s: chan out; + read_req_s: chan out; + read_resp_r: chan in; + write_req_s: chan out; + write_resp_r: chan in; + + config(terminator_s: chan out) { + let (read_req_s, read_req_r) = chan("read_req"); + let (read_resp_s, read_resp_r) = chan("read_resp"); + let (write_req_s, write_req_r) = chan("write_req"); + let (write_resp_s, write_resp_r) = chan("write_resp"); + + let (ram_read_req_s, ram_read_req_r) = chan("ram_read_req"); + let (ram_read_resp_s, ram_read_resp_r) = chan("ram_read_resp"); + let (ram_write_req_s, ram_write_req_r) = chan("ram_write_req"); + let (ram_write_resp_s, ram_write_resp_r) = chan("ram_write_resp"); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_SIZE, TEST_WORD_PARTITION_SIZE, + TEST_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_INITIALIZED + >(ram_read_req_r, ram_read_resp_s, ram_write_req_r, ram_write_resp_s); + + spawn HashTable( + read_req_r, read_resp_s, + write_req_r, write_resp_s, + ram_read_req_s, ram_read_resp_r, + ram_write_req_s, ram_write_resp_r + ); + + ( + terminator_s, + read_req_s, read_resp_r, + write_req_s, write_resp_r + ) + } + + init { } + + next(state: ()) { + let tok = join(); + + let tok = for ((i, test_data), tok): ((u32, TestData), token) in enumerate(TEST_DATA) { + // try to read data that was not written + let read_req = TestReadReq { + num_entries_log2: test_data.num_entries_log2, + key: test_data.key + }; + let tok = send(tok, read_req_s, read_req); + trace_fmt!("Sent #{}.1 read request {:#x}", i + u32:1, read_req); + + let (tok, read_resp) = recv(tok, read_resp_r); + trace_fmt!("Received #{}.1 read response {:#x}", i + u32:1, read_resp); + if read_resp.is_match { + // there may be match in case of a conflict + assert_eq(false, test_data.value == read_resp.value); + } else { }; + + // write data + let write_req = TestWriteReq { + num_entries_log2: test_data.num_entries_log2, + key: test_data.key, + value: test_data.value, + }; + let tok = send(tok, write_req_s, write_req); + trace_fmt!("Sent #{} write request {:#x}", i + u32:1, write_req); + + let (tok, write_resp) = recv(tok, write_resp_r); + trace_fmt!("Received #{} write response {:#x}", i + u32:1, write_resp); + + // read data after it was written + let read_req = TestReadReq { + num_entries_log2: test_data.num_entries_log2, + key: test_data.key + }; + let tok = send(tok, read_req_s, read_req); + trace_fmt!("Sent #{}.2 read request {:#x}", i + u32:1, read_req); + + let (tok, read_resp) = recv(tok, read_resp_r); + trace_fmt!("Received #{}.2 read response {:#x}", i + u32:1, read_resp); + assert_eq(TestReadResp { is_match: true, value: test_data.value }, read_resp); + + tok + }(tok); + + send(tok, terminator_s, true); + } +} diff --git a/xls/modules/zstd/history_buffer.x b/xls/modules/zstd/history_buffer.x new file mode 100644 index 0000000000..bbb012d250 --- /dev/null +++ b/xls/modules/zstd/history_buffer.x @@ -0,0 +1,491 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains implementation of HistoryBuffer. + + +import std; + +import xls.examples.ram as ram; +import xls.modules.zstd.aligned_parallel_ram as aligned_parallel_ram; + +const RAM_NUM = aligned_parallel_ram::RAM_NUM; +const RAM_NUM_W = aligned_parallel_ram::RAM_NUM_W; + + +pub struct HistoryBufferReadReq { + offset: uN[OFFSET_W], +} + +pub struct HistoryBufferReadResp { + data: uN[DATA_W], +} + +pub struct HistoryBufferWriteReq { + data: uN[DATA_W], +} + +pub struct HistoryBufferWriteResp {} + +struct HistoryBufferState { + curr_offset: uN[OFFSET_W], + length: uN[OFFSET_W], +} + +proc HistoryBufferReadRespHandler { + type ReadResp = HistoryBufferReadResp; + type ParallelRamReadResp = aligned_parallel_ram::AlignedParallelRamReadResp; + + parallel_ram_read_resp_r: chan in; + read_resp_s: chan out; + + config( + parallel_ram_read_resp_r: chan in, + read_resp_s: chan out, + ) { + ( + parallel_ram_read_resp_r, + read_resp_s, + ) + } + + init { } + + next (state: ()) { + let (tok, parallel_ram_read_resp, parallel_ram_read_resp_valid) = recv_non_blocking( + join(), parallel_ram_read_resp_r, zero!() + ); + + let read_resp = ReadResp { + data: parallel_ram_read_resp.data, + }; + + send_if(tok, read_resp_s, parallel_ram_read_resp_valid, read_resp); + } +} + +proc HistoryBufferWriteRespHandler { + type WriteResp = HistoryBufferWriteResp; + type ParallelRamWriteResp = aligned_parallel_ram::AlignedParallelRamWriteResp; + + parallel_ram_write_resp_r: chan in; + write_resp_s: chan out; + + config( + parallel_ram_write_resp_r: chan in, + write_resp_s: chan out, + ) { + ( + parallel_ram_write_resp_r, + write_resp_s, + ) + } + + init { } + + next (state: ()) { + let (tok, _, parallel_ram_write_resp_valid) = recv_non_blocking( + join(), parallel_ram_write_resp_r, zero!() + ); + + let write_resp = WriteResp { }; + + send_if(tok, write_resp_s, parallel_ram_write_resp_valid, write_resp); + } +} + +pub proc HistoryBuffer< + SIZE: u32, + DATA_W: u32, + OFFSET_W: u32 = {std::clog2(SIZE)}, + RAM_SIZE: u32 = {SIZE / RAM_NUM}, + RAM_DATA_W: u32 = {DATA_W / RAM_NUM}, + RAM_ADDR_W: u32 = {std::clog2(RAM_SIZE)}, + RAM_PARTITION_SIZE: u32 = {RAM_DATA_W}, + RAM_NUM_PARTITIONS: u32 = {ram::num_partitions(RAM_PARTITION_SIZE, RAM_DATA_W)}, +>{ + type ReadReq = HistoryBufferReadReq; + type ReadResp = HistoryBufferReadResp; + type WriteReq = HistoryBufferWriteReq; + type WriteResp = HistoryBufferWriteResp; + + type ParallelRamReadReq = aligned_parallel_ram::AlignedParallelRamReadReq; + type ParallelRamReadResp = aligned_parallel_ram::AlignedParallelRamReadResp; + type ParallelRamWriteReq = aligned_parallel_ram::AlignedParallelRamWriteReq; + type ParallelRamWriteResp = aligned_parallel_ram::AlignedParallelRamWriteResp; + + type RamReadReq = ram::ReadReq; + type RamReadResp = ram::ReadResp; + type RamWriteReq = ram::WriteReq; + type RamWriteResp = ram::WriteResp; + + type State = HistoryBufferState; + type Offset = uN[OFFSET_W]; + + read_req_r: chan in; + write_req_r: chan in; + + // RAM interface + parallel_ram_read_req_s: chan out; + parallel_ram_write_req_s: chan out; + + config ( + read_req_r: chan in, + read_resp_s: chan out, + write_req_r: chan in, + write_resp_s: chan out, + ram_read_req_s: chan[RAM_NUM] out, + ram_read_resp_r: chan[RAM_NUM] in, + ram_write_req_s: chan[RAM_NUM] out, + ram_write_resp_r: chan[RAM_NUM] in, + ) { + let (parallel_ram_read_req_s, parallel_ram_read_req_r) = chan("parallel_ram_read_req"); + let (parallel_ram_read_resp_s, parallel_ram_read_resp_r) = chan("parallel_ram_read_resp"); + let (parallel_ram_write_req_s, parallel_ram_write_req_r) = chan("parallel_ram_write_req"); + let (parallel_ram_write_resp_s, parallel_ram_write_resp_r) = chan("parallel_ram_write_resp"); + + spawn HistoryBufferReadRespHandler ( + parallel_ram_read_resp_r, read_resp_s, + ); + + spawn HistoryBufferWriteRespHandler ( + parallel_ram_write_resp_r, write_resp_s, + ); + + spawn aligned_parallel_ram::AlignedParallelRam( + parallel_ram_read_req_r, parallel_ram_read_resp_s, + parallel_ram_write_req_r, parallel_ram_write_resp_s, + ram_read_req_s, ram_read_resp_r, + ram_write_req_s, ram_write_resp_r, + ); + + ( + read_req_r, + write_req_r, + parallel_ram_read_req_s, + parallel_ram_write_req_s, + ) + } + + init { zero!() } + + next (state: State) { + const ONE_TRANSFER_WIDTH = ((DATA_W as uN[OFFSET_W]) >> u32:3); + const MAX_OFFSET = zero!(); + + // handle read request + let (tok, read_req, read_req_valid) = recv_non_blocking(join(), read_req_r, zero!()); + let offset_invalid = (read_req.offset > state.length - ONE_TRANSFER_WIDTH); + if read_req_valid & offset_invalid { + trace_fmt!("WARNING: Asking for too high offset (req: {:#x}, max: {:#x})", + read_req.offset, state.length - ONE_TRANSFER_WIDTH); + } else {}; + + let parallel_ram_read_req = ParallelRamReadReq { + addr: state.curr_offset - read_req.offset - ONE_TRANSFER_WIDTH, + }; + + send_if(tok, parallel_ram_read_req_s, read_req_valid, parallel_ram_read_req); + + // handle write request + let (tok, write_req, write_req_valid) = recv_non_blocking(join(), write_req_r, zero!()); + + let parallel_ram_write_req = ParallelRamWriteReq { + addr: state.curr_offset, + data: write_req.data, + }; + + send_if(tok, parallel_ram_write_req_s, write_req_valid, parallel_ram_write_req); + + // update offset + if write_req_valid { + let next_length = if state.length > MAX_OFFSET - ONE_TRANSFER_WIDTH { + MAX_OFFSET + } else { + state.length + ONE_TRANSFER_WIDTH + }; + + State { + curr_offset: state.curr_offset + ONE_TRANSFER_WIDTH, + length: next_length, + } + } else { + state + } + } +} + +const INST_SIZE = u32:1024; +const INST_DATA_W = u32:64; +const INST_OFFSET_W = {std::clog2(INST_SIZE)}; +const INST_RAM_SIZE = INST_SIZE / RAM_NUM; +const INST_RAM_DATA_W = {INST_DATA_W / RAM_NUM}; +const INST_RAM_ADDR_W = {std::clog2(INST_RAM_SIZE)}; +const INST_RAM_PARTITION_SIZE = {INST_RAM_DATA_W}; +const INST_RAM_NUM_PARTITIONS = {ram::num_partitions(INST_RAM_PARTITION_SIZE, INST_RAM_DATA_W)}; + +proc HistoryBufferInst { + type InstReadReq = HistoryBufferReadReq; + type InstReadResp = HistoryBufferReadResp; + type InstWriteReq = HistoryBufferWriteReq; + type InstWriteResp = HistoryBufferWriteResp; + + type InstParallelRamReadReq = aligned_parallel_ram::AlignedParallelRamReadReq; + type InstParallelRamReadResp = aligned_parallel_ram::AlignedParallelRamReadResp; + type InstParallelRamWriteReq = aligned_parallel_ram::AlignedParallelRamWriteReq; + type InstParallelRamWriteResp = aligned_parallel_ram::AlignedParallelRamWriteResp; + + type InstRamReadReq = ram::ReadReq; + type InstRamReadResp = ram::ReadResp; + type InstRamWriteReq = ram::WriteReq; + type InstRamWriteResp = ram::WriteResp; + + config ( + read_req_r: chan in, + read_resp_s: chan out, + write_req_r: chan in, + write_resp_s: chan out, + ram_read_req_s: chan[RAM_NUM] out, + ram_read_resp_r: chan[RAM_NUM] in, + ram_write_req_s: chan[RAM_NUM] out, + ram_write_resp_r: chan[RAM_NUM] in, + ) { + spawn HistoryBuffer ( + read_req_r, read_resp_s, + write_req_r, write_resp_s, + ram_read_req_s, ram_read_resp_r, + ram_write_req_s, ram_write_resp_r, + ); + } + + init { } + + next (state: ()) { } +} + +const TEST_SIZE = u32:128; +const TEST_DATA_W = u32:64; +const TEST_OFFSET_W = {std::clog2(TEST_SIZE)}; +const TEST_RAM_SIZE = TEST_SIZE / aligned_parallel_ram::RAM_NUM; +const TEST_RAM_DATA_W = {TEST_DATA_W / RAM_NUM}; +const TEST_RAM_ADDR_W = {std::clog2(TEST_RAM_SIZE)}; +const TEST_RAM_PARTITION_SIZE = {TEST_RAM_DATA_W}; +const TEST_RAM_NUM_PARTITIONS = {ram::num_partitions(TEST_RAM_PARTITION_SIZE, TEST_RAM_DATA_W)}; + +const TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR = ram::SimultaneousReadWriteBehavior::READ_BEFORE_WRITE; +const TEST_RAM_INITIALIZED = true; + +type TestReadReq = HistoryBufferReadReq; +type TestReadResp = HistoryBufferReadResp; +type TestWriteReq = HistoryBufferWriteReq; +type TestWriteResp = HistoryBufferWriteResp; + +type TestRamReadReq = ram::ReadReq; +type TestRamReadResp = ram::ReadResp; +type TestRamWriteReq = ram::WriteReq; +type TestRamWriteResp = ram::WriteResp; + +const TEST_DATA = uN[TEST_DATA_W][64]:[ + uN[TEST_DATA_W]:0x44f9_072b_5ef2_8a80, uN[TEST_DATA_W]:0x4c01_2eda_5f3d_f4e1, + uN[TEST_DATA_W]:0x75ac_641d_fd42_0551, uN[TEST_DATA_W]:0x3bd0_3798_b29b_725f, + uN[TEST_DATA_W]:0x840e_71e9_e0e6_4fe1, uN[TEST_DATA_W]:0x9436_c1e7_bf3c_14e2, + uN[TEST_DATA_W]:0x1c64_9595_300d_4e0c, uN[TEST_DATA_W]:0x26ad_a821_926d_d9e5, + uN[TEST_DATA_W]:0x27e4_6ce8_a2b4_3a71, uN[TEST_DATA_W]:0xf9d6_cf94_6a39_5c5d, + uN[TEST_DATA_W]:0x7894_d415_88b5_dd0c, uN[TEST_DATA_W]:0x5c4e_4607_96bc_5d54, + uN[TEST_DATA_W]:0x29a4_3388_8b44_d9eb, uN[TEST_DATA_W]:0xda83_ee49_d921_7fb6, + uN[TEST_DATA_W]:0xf25f_c785_12e6_dfd0, uN[TEST_DATA_W]:0x1b8c_06fb_32ea_165a, + uN[TEST_DATA_W]:0x5dda_92e0_faca_af84, uN[TEST_DATA_W]:0x1157_8f9a_6e5c_7e78, + uN[TEST_DATA_W]:0xc908_a151_5b8c_b908, uN[TEST_DATA_W]:0xe978_4a80_f2e9_b11a, + uN[TEST_DATA_W]:0xc34e_96c0_4ae1_dfa9, uN[TEST_DATA_W]:0x4b06_4c8c_df6d_cae5, + uN[TEST_DATA_W]:0x9d51_4716_fd6f_afe9, uN[TEST_DATA_W]:0xfe42_4a9d_29ae_4bc4, + uN[TEST_DATA_W]:0x77ec_b4dd_9238_38b9, uN[TEST_DATA_W]:0xdf45_a790_a3da_1768, + uN[TEST_DATA_W]:0x45e9_5594_ffca_0604, uN[TEST_DATA_W]:0xe496_09f4_18ca_f955, + uN[TEST_DATA_W]:0x57cb_3c3d_ed78_62fd, uN[TEST_DATA_W]:0x0254_24bc_24fa_99f8, + uN[TEST_DATA_W]:0xc405_370f_a58a_1303, uN[TEST_DATA_W]:0xa451_310a_65b2_4785, + uN[TEST_DATA_W]:0x4373_65ac_f3ce_97ec, uN[TEST_DATA_W]:0x2a85_abd3_afde_133c, + uN[TEST_DATA_W]:0x836e_ce62_56cb_50ec, uN[TEST_DATA_W]:0x53ce_ab2f_d079_eb9a, + uN[TEST_DATA_W]:0xae76_7db7_0e64_8b88, uN[TEST_DATA_W]:0x079a_c187_642d_cbac, + uN[TEST_DATA_W]:0x2d07_5e3b_6150_d5c5, uN[TEST_DATA_W]:0x7865_5206_3c5a_98ed, + uN[TEST_DATA_W]:0xe905_351c_edda_0682, uN[TEST_DATA_W]:0xf41d_f3f2_1106_3639, + uN[TEST_DATA_W]:0xa44c_05c0_24b3_86ad, uN[TEST_DATA_W]:0xaa1f_c6b5_4c02_1f0c, + uN[TEST_DATA_W]:0xad67_cc1a_8740_87ae, uN[TEST_DATA_W]:0xf382_3bbf_f4b8_2f81, + uN[TEST_DATA_W]:0xe0cd_1eb3_b8c0_820b, uN[TEST_DATA_W]:0xb5d5_1c98_3415_1319, + uN[TEST_DATA_W]:0x583e_9722_ed31_84e6, uN[TEST_DATA_W]:0x6063_ccb6_6228_286e, + uN[TEST_DATA_W]:0xc642_cca8_e04f_769e, uN[TEST_DATA_W]:0x7cc7_ab72_7a9c_05d8, + uN[TEST_DATA_W]:0x4a66_f7c1_7b5e_6d30, uN[TEST_DATA_W]:0xd3d2_5e04_0310_7689, + uN[TEST_DATA_W]:0xe99d_a201_5dee_8e16, uN[TEST_DATA_W]:0xee15_ca30_c679_e1dd, + uN[TEST_DATA_W]:0xe61c_4ac3_183e_9478, uN[TEST_DATA_W]:0x2528_e948_2349_f8fd, + uN[TEST_DATA_W]:0xf15d_4275_a042_2135, uN[TEST_DATA_W]:0x05b5_3768_34e9_4bca, + uN[TEST_DATA_W]:0x1e00_a1a9_cffd_7a84, uN[TEST_DATA_W]:0x3396_a42c_2433_76f2, + uN[TEST_DATA_W]:0x80ba_e00e_9b93_7d76, uN[TEST_DATA_W]:0x85d4_10e6_404f_fa4d, +]; + +#[test_proc] +proc HistoryBuffer_test { + terminator: chan out; + + read_req_s: chan out; + read_resp_r: chan in; + write_req_s: chan out; + write_resp_r: chan in; + + config (terminator: chan out) { + let (read_req_s, read_req_r) = chan("read_req"); + let (read_resp_s, read_resp_r) = chan("read_resp"); + let (write_req_s, write_req_r) = chan("write_req"); + let (write_resp_s, write_resp_r) = chan("write_resp"); + + let (ram_read_req_s, ram_read_req_r) = chan[RAM_NUM]("ram_read_req"); + let (ram_read_resp_s, ram_read_resp_r) = chan[RAM_NUM]("ram_read_resp"); + let (ram_write_req_s, ram_write_req_r) = chan[RAM_NUM]("ram_write_req"); + let (ram_write_resp_s, ram_write_resp_r) = chan[RAM_NUM]("ram_write_resp"); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[0], ram_read_resp_s[0], ram_write_req_r[0], ram_write_resp_s[0], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[1], ram_read_resp_s[1], ram_write_req_r[1], ram_write_resp_s[1], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[2], ram_read_resp_s[2], ram_write_req_r[2], ram_write_resp_s[2], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[3], ram_read_resp_s[3], ram_write_req_r[3], ram_write_resp_s[3], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[4], ram_read_resp_s[4], ram_write_req_r[4], ram_write_resp_s[4], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[5], ram_read_resp_s[5], ram_write_req_r[5], ram_write_resp_s[5], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[6], ram_read_resp_s[6], ram_write_req_r[6], ram_write_resp_s[6], + ); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED + >( + ram_read_req_r[7], ram_read_resp_s[7], ram_write_req_r[7], ram_write_resp_s[7], + ); + + spawn HistoryBuffer ( + read_req_r, read_resp_s, + write_req_r, write_resp_s, + ram_read_req_s, ram_read_resp_r, + ram_write_req_s, ram_write_resp_r, + ); + + ( + terminator, + read_req_s, read_resp_r, + write_req_s, write_resp_r, + ) + } + + init { } + + next (state: ()) { + let tok = join(); + + let tok = for (i, tok) in range(u32:0, array_size(TEST_DATA)) { + let test_data = TEST_DATA[i]; + + // write current test data + let write_req = TestWriteReq { + data: test_data, + }; + let tok = send(tok, write_req_s, write_req); + trace_fmt!("Sent #{} write request {:#x}", i + u32:1, write_req); + + let (tok, _) = recv(tok, write_resp_r); + trace_fmt!("Received #{} write response", i + u32:1); + + // check written data + let read_req = TestReadReq { + offset: uN[TEST_OFFSET_W]:0, + }; + let tok = send(tok, read_req_s, read_req); + trace_fmt!("Sent #{} read request {:#x}", i + u32:1, read_req); + + let (tok, read_resp) = recv(tok, read_resp_r); + trace_fmt!("Received #{} read response {:#x}", i + u32:1, read_resp); + assert_eq(test_data, read_resp.data); + + // check previously saved data + let tok = for (offset, tok) in range(u32:0, TEST_SIZE - TEST_DATA_W) { + // check only offsets where data was written also dont check for all offsets + // to speedup the test + if (offset < RAM_NUM * i) && ((offset + i) % u32:13 == u32:0) { + let data_idx_0 = ((i * RAM_NUM - offset) >> RAM_NUM_W) % array_size(TEST_DATA); + let data_idx_1 = (((i * RAM_NUM - offset) >> RAM_NUM_W) + u32:1) % array_size(TEST_DATA); + let ram_offset = (offset) as uN[RAM_NUM_W]; + let prev_test_data = if ram_offset == uN[RAM_NUM_W]:0 { + TEST_DATA[data_idx_0] + } else { + ( + TEST_DATA[data_idx_1] << (TEST_RAM_DATA_W * ram_offset as u32) | + TEST_DATA[data_idx_0] >> (TEST_RAM_DATA_W * (aligned_parallel_ram::RAM_NUM - ram_offset as u32)) + ) + }; + + let read_req = TestReadReq { + offset: offset as uN[TEST_OFFSET_W], + }; + let tok = send(tok, read_req_s, read_req); + trace_fmt!("Sent #{}.{} read request {:#x}", i + u32:1, offset, read_req); + + let (tok, read_resp) = recv(tok, read_resp_r); + trace_fmt!("Received #{}.{} read response {:#x}", i + u32:1, offset, read_resp); + assert_eq(prev_test_data, read_resp.data); + + tok + } else { + tok + } + }(tok); + + tok + }(tok); + + send(tok, terminator, true); + } +} diff --git a/xls/modules/zstd/img/ZSTD_decoder.png b/xls/modules/zstd/img/ZSTD_decoder.png index f157751512..d52c11b3d8 100644 Binary files a/xls/modules/zstd/img/ZSTD_decoder.png and b/xls/modules/zstd/img/ZSTD_decoder.png differ diff --git a/xls/modules/zstd/img/ZSTD_decoder_wrapper.png b/xls/modules/zstd/img/ZSTD_decoder_wrapper.png new file mode 100644 index 0000000000..293420234f Binary files /dev/null and b/xls/modules/zstd/img/ZSTD_decoder_wrapper.png differ diff --git a/xls/modules/zstd/magic.x b/xls/modules/zstd/magic.x deleted file mode 100644 index 196f2f528f..0000000000 --- a/xls/modules/zstd/magic.x +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright 2024 The XLS Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// This file contains utilities related to ZSTD magic number parsing -// More information about the ZSTD Magic Number can be found in: -// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1 - -import std; -import xls.modules.zstd.buffer as buff; - -type Buffer = buff::Buffer; -type BufferStatus = buff::BufferStatus; - -// Magic number value, as in: -// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1 -const MAGIC_NUMBER = u32:0xFD2FB528; - -// Status values reported by the magic number parsing function -pub enum MagicStatus: u2 { - OK = 0, - CORRUPTED = 1, - NO_ENOUGH_DATA = 2, -} - -// structure for returning results of magic number parsing -pub struct MagicResult { - buffer: Buffer, - status: MagicStatus, -} - -// Parses a Buffer and checks if it contains the magic number. -// The buffer is assumed to contain a valid beginning of the ZSTD file. -// The function returns MagicResult structure with the buffer after parsing -// the magic number and the status of the operation. On failure, the returned -// buffer is the same as the input buffer. -pub fn parse_magic_number(buffer: Buffer) -> MagicResult { - let (result, data) = buff::buffer_fixed_pop_checked(buffer); - - match result.status { - BufferStatus::OK => { - if data == MAGIC_NUMBER { - trace_fmt!("parse_magic_number: Magic number found!"); - MagicResult {status: MagicStatus::OK, buffer: result.buffer} - } else { - trace_fmt!("parse_magic_number: Magic number not found!"); - MagicResult {status: MagicStatus::CORRUPTED, buffer: buffer} - } - }, - _ => { - trace_fmt!("parse_frame_header: Not enough data to parse magic number!"); - MagicResult {status: MagicStatus::NO_ENOUGH_DATA, buffer: buffer} - } - } -} - -#[test] -fn test_parse_magic_number() { - let buffer = Buffer { content: MAGIC_NUMBER, length: u32:32}; - let result = parse_magic_number(buffer); - assert_eq(result, MagicResult { - status: MagicStatus::OK, - buffer: Buffer {content: u32:0, length: u32:0}, - }); - - let buffer = Buffer { content: u32:0x12345678, length: u32:32}; - let result = parse_magic_number(buffer); - assert_eq(result, MagicResult { - status: MagicStatus::CORRUPTED, - buffer: buffer - }); - - let buffer = Buffer { content: u32:0x1234, length: u32:16}; - let result = parse_magic_number(buffer); - assert_eq(result, MagicResult { - status: MagicStatus::NO_ENOUGH_DATA, - buffer: buffer, - }); -} diff --git a/xls/modules/zstd/match_finder.x b/xls/modules/zstd/match_finder.x new file mode 100644 index 0000000000..c351a58ce3 --- /dev/null +++ b/xls/modules/zstd/match_finder.x @@ -0,0 +1,1182 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// This file contains implementation of MatchFinder + +import std; + +import xls.examples.ram as ram; +import xls.modules.zstd.common as common; +import xls.modules.zstd.memory.mem_reader as mem_reader; +import xls.modules.zstd.memory.mem_writer as mem_writer; +import xls.modules.zstd.memory.axi as axi; +import xls.modules.zstd.memory.axi_ram as axi_ram; +import xls.modules.zstd.history_buffer as history_buffer; +import xls.modules.zstd.hash_table as hash_table; +import xls.modules.zstd.aligned_parallel_ram as aligned_parallel_ram; + + +const SYMBOL_WIDTH = common::SYMBOL_WIDTH; + +struct ZstdParams { + num_entries_log2: uN[HT_SIZE_W], +} + +enum MatchFinderRespStatus : u1 { + OK = 0, + FAIL = 1, +} + +struct MatchFinderSequence { + literals_len: u16, + match_offset: u16, + match_len: u16, +} + +pub struct MatchFinderReq { + input_addr: uN[ADDR_W], + input_size: uN[ADDR_W], + output_lit_addr: uN[ADDR_W], + output_seq_addr: uN[ADDR_W], + zstd_params: ZstdParams, +} + +pub struct MatchFinderResp { + status: MatchFinderRespStatus, // indicate the state of the operation + lit_cnt: u32, // number of literals + seq_cnt: u32, // number of sequences +} + +struct MatchFinderInputBufferReq { + input_addr: uN[ADDR_W], + input_size: uN[ADDR_W], +} + +struct MatchFinderInputBufferResp { + data: uN[SYMBOL_WIDTH], + last: bool, +} + +struct MatchFinderInputBufferState< + ADDR_W: u32, DATA_W: u32, + DATA_W_LOG2: u32 = {std::clog2(DATA_W + u32:1)}, +> { + addr: uN[ADDR_W], + left_to_read: uN[ADDR_W], + buffer: uN[DATA_W], + buffer_len: uN[DATA_W_LOG2], +} + +proc MatchFinderInputBuffer< + ADDR_W: u32, DATA_W: u32, + DATA_W_LOG2: u32 = {std::clog2(DATA_W + u32:1)}, +> { + type MemReaderReq = mem_reader::MemReaderReq; + type MemReaderResp = mem_reader::MemReaderResp; + type MemReaderStatus = mem_reader::MemReaderStatus; + + type Req = MatchFinderInputBufferReq; + type Resp = MatchFinderInputBufferResp; + type State = MatchFinderInputBufferState; + + req_r: chan in; + next_r: chan<()> in; + out_s: chan out; + + // MemReader interface + mem_rd_req_s: chan out; + mem_rd_resp_r: chan in; + + config ( + req_r: chan in, + next_r: chan<()> in, + out_s: chan out, + mem_rd_req_s: chan out, + mem_rd_resp_r: chan in, + ) { + (req_r, next_r, out_s, mem_rd_req_s, mem_rd_resp_r) + } + + init { zero!() } + + next (state: State) { + // receive request + let do_recv_req = state.left_to_read == uN[ADDR_W]:0; + let (tok_req, req, req_valid) = recv_if_non_blocking(join(), req_r, do_recv_req, zero!()); + + // send memory read request + let mem_rd_req = MemReaderReq { + addr: req.input_addr, + length: req.input_size, + }; + send_if(tok_req, mem_rd_req_s, req_valid, mem_rd_req); + + // receive memory read response + let do_recv_mem_rd_resp = (state.left_to_read > uN[ADDR_W]:0) && (state.buffer_len == uN[DATA_W_LOG2]:0); + let (_, mem_rd_resp, mem_rd_resp_valid) = recv_if_non_blocking(join(), mem_rd_resp_r, do_recv_mem_rd_resp, zero!()); + + // receive next and send data + let do_recv_next = (state.left_to_read > uN[ADDR_W]:0 && (state.buffer_len > uN[DATA_W_LOG2]:0)); + let (tok_next, _, next_valid) = recv_if_non_blocking(join(), next_r, do_recv_next, ()); + + let resp = Resp { + data: state.buffer as uN[SYMBOL_WIDTH], + last: state.left_to_read == uN[ADDR_W]:1, + }; + send_if(tok_next, out_s, next_valid, resp); + + // update state + if req_valid { + State { + addr: req.input_addr, + left_to_read: req.input_size, + ..zero!() + } + } else if mem_rd_resp_valid { + State { + buffer: mem_rd_resp.data, + buffer_len: DATA_W as uN[DATA_W_LOG2], + ..state + } + } else if next_valid { + State { + left_to_read: state.left_to_read - uN[ADDR_W]:1, + buffer: state.buffer >> SYMBOL_WIDTH, + buffer_len: state.buffer_len - (SYMBOL_WIDTH as uN[DATA_W_LOG2]), + ..state + } + } else { + state + } + } +} + +enum MatchFinderFSM: u5 { + IDLE = 0, + INPUT_NEXT = 1, + INPUT_READ = 2, + HASH_TABLE_REQ = 4, + HASH_TABLE_RESP = 5, + HISTORY_BUFFER_RESP = 6, + OUTPUT_LITERAL_REQ_PACKET = 8, + OUTPUT_LITERAL_RESP = 9, + OUTPUT_SEQUENCE_REQ_PACKET = 10, + OUTPUT_SEQUENCE_RESP = 11, + INPUT_READ_NEXT_REQ = 15, + INPUT_READ_NEXT_RESP = 16, + SEND_RESP = 17, + FAILURE = 18, +} + +struct MatchFinderState< + DATA_W: u32, ADDR_W: u32, HT_SIZE_W: u32, MIN_SEQ_LEN: u32, + DATA_W_LOG2: u32 = {std::clog2(DATA_W + u32:1)} +> { + fsm: MatchFinderFSM, + req: MatchFinderReq, + input_data: uN[SYMBOL_WIDTH], + input_last: bool, + input_addr_offset: uN[ADDR_W], + output_lit_addr: uN[ADDR_W], + output_seq_addr: uN[ADDR_W], + lit_buffer: uN[MIN_SEQ_LEN][SYMBOL_WIDTH], + lit_buffer_last: bool, + literals_length: u16, + match_offset: u16, + match_length: u16, + lit_cnt: u32, + seq_cnt: u32, +} + +proc MatchFinder< + ADDR_W: u32, DATA_W: u32, HT_SIZE: u32, HB_SIZE: u32, MIN_SEQ_LEN: u32, + DATA_W_LOG2: u32 = {std::clog2(DATA_W + u32:1)}, + + HT_KEY_W: u32 = {SYMBOL_WIDTH}, + HT_VALUE_W: u32 = {SYMBOL_WIDTH + ADDR_W}, + HT_SIZE_W: u32 = {std::clog2(HT_SIZE + u32:1)}, + + HB_DATA_W: u32 = {SYMBOL_WIDTH}, + HB_OFFSET_W: u32 = {std::clog2(HB_SIZE)}, +> { + type MemReaderReq = mem_reader::MemReaderReq; + type MemReaderResp = mem_reader::MemReaderResp; + type MemReaderStatus = mem_reader::MemReaderStatus; + + type InputBufferReq = MatchFinderInputBufferReq; + type InputBufferResp = MatchFinderInputBufferResp; + + type MemWriterReq = mem_writer::MemWriterReq; + type MemWriterResp = mem_writer::MemWriterResp; + type MemWriterDataPacket = mem_writer::MemWriterDataPacket; + type MemWriterRespStatus = mem_writer::MemWriterRespStatus; + + type Addr = uN[ADDR_W]; + + type HashTableRdReq = hash_table::HashTableReadReq; + type HashTableRdResp = hash_table::HashTableReadResp; + type HashTableWrReq = hash_table::HashTableWriteReq; + type HashTableWrResp = hash_table::HashTableWriteResp; + + type HistoryBufferRdReq = history_buffer::HistoryBufferReadReq; + type HistoryBufferRdResp = history_buffer::HistoryBufferReadResp; + type HistoryBufferWrReq = history_buffer::HistoryBufferWriteReq; + type HistoryBufferWrResp = history_buffer::HistoryBufferWriteResp; + + type Req = MatchFinderReq; + type Resp = MatchFinderResp; + type RespStatus = MatchFinderRespStatus; + type State = MatchFinderState; + + type NumEntries = uN[HT_SIZE_W]; + type Key = uN[HT_KEY_W]; + type FSM = MatchFinderFSM; + + req_r: chan in; + resp_s: chan out; + + // InputBuffer interface + inp_buf_req_s: chan out; + inp_buf_next_s: chan<()> out; + inp_buf_out_r: chan in; + + // MemWriter interface + mem_wr_req_s: chan out; + mem_wr_packet_s: chan out; + mem_wr_resp_r: chan in; + + // HashTable interface + ht_rd_req_s: chan out; + ht_rd_resp_r: chan in; + ht_wr_req_s: chan out; + ht_wr_resp_r: chan in; + + // HistoryBuffer interface + hb_rd_req_s: chan out; + hb_rd_resp_r: chan in; + hb_wr_req_s: chan out; + hb_wr_resp_r: chan in; + + config ( + // Req & Resp + req_r: chan in, + resp_s: chan out, + + // Access to input + mem_rd_req_s: chan out, + mem_rd_resp_r: chan in, + + // Output + mem_wr_req_s: chan out, + mem_wr_packet_s: chan out, + mem_wr_resp_r: chan in, + + // HashTable RAM interface + ht_rd_req_s: chan out, + ht_rd_resp_r: chan in, + ht_wr_req_s: chan out, + ht_wr_resp_r: chan in, + + // HistoryBuffer RAM interface + hb_rd_req_s: chan out, + hb_rd_resp_r: chan in, + hb_wr_req_s: chan out, + hb_wr_resp_r: chan in, + ) { + let (inp_buf_req_s, inp_buf_req_r) = chan("inp_buf_req"); + let (inp_buf_next_s, inp_buf_next_r) = chan<(), u32:0>("inp_buf_next"); + let (inp_buf_out_s, inp_buf_out_r) = chan("inp_buf_out"); + + spawn MatchFinderInputBuffer ( + inp_buf_req_r, inp_buf_next_r, inp_buf_out_s, + mem_rd_req_s, mem_rd_resp_r, + ); + + ( + req_r, resp_s, + inp_buf_req_s, inp_buf_next_s, inp_buf_out_r, + mem_wr_req_s, mem_wr_packet_s, mem_wr_resp_r, + ht_rd_req_s, ht_rd_resp_r, ht_wr_req_s, ht_wr_resp_r, + hb_rd_req_s, hb_rd_resp_r, hb_wr_req_s, hb_wr_resp_r, + ) + } + + init { zero!() } + + next (state: State) { + let tok0 = join(); + + // [IDLE] + let (tok1_0, req) = recv_if(tok0, req_r, state.fsm == FSM::IDLE, state.req); + let inp_buf_req = InputBufferReq { input_addr: req.input_addr, input_size: req.input_size }; + let tok1_1 = send_if(tok0, inp_buf_req_s, state.fsm == FSM::IDLE, inp_buf_req); + + // [INPUT_NEXT] + let do_send_inp_buf_next = state.fsm == FSM::INPUT_NEXT || state.fsm == FSM::INPUT_READ_NEXT_REQ; + + // [INPUT_READ] + let do_recv_inp_buf_out = state.fsm == FSM::INPUT_READ || state.fsm == FSM::INPUT_READ_NEXT_RESP; + let (tok1_2, inp_buf_out) = recv_if(tok0, inp_buf_out_r, do_recv_inp_buf_out, zero!()); + + // [HASH_TABLE_REQ] + let ht_rd_req = HashTableRdReq { + num_entries_log2: state.req.zstd_params.num_entries_log2, + key: state.input_data, + }; + let tok1_3 = send_if(tok0, ht_rd_req_s, state.fsm == FSM::HASH_TABLE_REQ, ht_rd_req); + + // [HASH_TABLE_RESP] + let (tok1_4, ht_rd_resp) = recv_if(tok0, ht_rd_resp_r, state.fsm == FSM::HASH_TABLE_RESP, zero!()); + let ht_is_match = ht_rd_resp.is_match && (((ht_rd_resp.value >> ADDR_W) as uN[SYMBOL_WIDTH]) == state.input_data); + let hb_rd_req = if ht_is_match { + HistoryBufferRdReq { + offset: (state.req.input_addr + state.input_addr_offset - (ht_rd_resp.value as uN[ADDR_W])) as uN[HB_OFFSET_W], + } + } else { + zero!() + }; + let tok1_4 = send_if(tok1_4, hb_rd_req_s, ht_is_match, hb_rd_req); + + // [HISTORY_BUFFER_RESP] | [HISTORY_BUFFER_NEXT_RESP] + let (tok1_5, hb_rd_resp) = recv_if(tok0, hb_rd_resp_r, state.fsm == FSM::HISTORY_BUFFER_RESP, zero!()); + let (tok1_2, inp_buf_out) = recv_if(tok0, inp_buf_out_r, state.fsm == FSM::HISTORY_BUFFER_RESP, zero!()); + + let is_next_match = state.input_data == hb_rd_resp.data; + let tok1_2 = send_if(tok0, inp_buf_next_s, do_send_inp_buf_next || ht_is_match || is_next_match, ()); + + // write entry in hash table + let do_save_entry = ( + (state.fsm == FSM::HASH_TABLE_RESP && !ht_is_match) || + (state.fsm == FSM::HISTORY_BUFFER_RESP && !is_next_match) + ); + let ht_wr_req = HashTableWrReq { + num_entries_log2: state.req.zstd_params.num_entries_log2, + key: state.input_data, + value: ( + (state.input_data) ++ + (state.req.input_addr + state.input_addr_offset) + ), + }; + let tok1_2 = send_if(tok1_2, ht_wr_req_s, do_save_entry, ht_wr_req); + // write entry in history buffer + let hb_wr_req = HistoryBufferWrReq { + data: state.input_data + }; + let tok1_2 = send_if(tok1_2, hb_wr_req_s, do_save_entry, hb_wr_req); + + // [OUTPUT_LITERAL_REQ_PACKET] + let lit_mem_wr_req = MemWriterReq { + addr: state.output_lit_addr, + length: uN[ADDR_W]:1, + }; + let tok1_7 = send_if(tok0, mem_wr_req_s, state.fsm == FSM::OUTPUT_LITERAL_REQ_PACKET, lit_mem_wr_req); + + let lit_mem_wr_packet = MemWriterDataPacket { + data: state.input_data as uN[DATA_W], + length: uN[ADDR_W]:1, + last: true, + }; + let tok1_8 = send_if(tok0, mem_wr_packet_s, state.fsm == FSM::OUTPUT_LITERAL_REQ_PACKET, lit_mem_wr_packet); + + // [OUTPUT_LITERAL_RESP] + let (tok1_9, lit_mem_wr_resp) = recv_if(tok0, mem_wr_resp_r, state.fsm == FSM::OUTPUT_LITERAL_RESP, zero!()); + + // [OUTPUT_SEQUENCE_REQ_PACKET] + let seq_mem_wr_req = MemWriterReq { + addr: state.output_seq_addr, + length: uN[ADDR_W]:6, + }; + let tok1_9 = send_if(tok0, mem_wr_req_s, state.fsm == FSM::OUTPUT_SEQUENCE_REQ_PACKET, seq_mem_wr_req); + + let seq_mem_wr_packet = MemWriterDataPacket { + data: (state.literals_length ++ state.match_offset ++ state.match_length) as uN[DATA_W], + length: uN[ADDR_W]:6, + last: true, + }; + + let tok1_10 = send_if(tok0, mem_wr_packet_s, state.fsm == FSM::OUTPUT_SEQUENCE_REQ_PACKET, seq_mem_wr_packet); + + // [OUTPUT_SEQUENCE_RESP] + let (tok1_11, seq_mem_wr_resp) = recv_if(tok0, mem_wr_resp_r, state.fsm == FSM::OUTPUT_SEQUENCE_RESP, zero!()); + + // [INPUT_READ_NEXT_REQ] + let hb_rd_req = HistoryBufferRdReq { + offset: (state.match_offset + state.match_length) as uN[HB_OFFSET_W], + }; + let tok1_15 = send_if(tok0, hb_rd_req_s, state.fsm == FSM::INPUT_READ_NEXT_REQ, hb_rd_req); + + // [SEND_RESP] + let resp = Resp { + status: RespStatus::OK, + lit_cnt: state.lit_cnt, + seq_cnt: state.seq_cnt, + }; + + let tok1_19 = send_if(tok0, resp_s, state.fsm == FSM::SEND_RESP, resp); + + // [FAILURE] + let fail_resp = MatchFinderResp { + status: MatchFinderRespStatus::FAIL, + lit_cnt: u32:0, + seq_cnt: u32:0, + }; + let tok1_4 = send_if(tok0, resp_s, state.fsm == FSM::FAILURE, fail_resp); + + let (tok, _, _) = recv_if_non_blocking(tok0, ht_wr_resp_r, false, zero!()); + let (tok, _, _) = recv_if_non_blocking(tok0, hb_wr_resp_r, false, zero!()); + + match state.fsm { + FSM::IDLE => { + trace_fmt!("[IDLE] Received match finder request {:#x}", req); + State { + fsm: FSM::INPUT_NEXT, + req: req, + input_addr_offset: uN[ADDR_W]:0, + output_lit_addr: req.output_lit_addr, + output_seq_addr: req.output_seq_addr, + ..zero!() + } + }, + + FSM::INPUT_NEXT => { + trace_fmt!("[INPUT_NEXT] Sent next to input buffer"); + State { + fsm: FSM::INPUT_READ, + input_addr_offset: state.input_addr_offset + uN[ADDR_W]:1, + ..state + } + }, + + FSM::INPUT_READ => { + trace_fmt!("[INPUT_READ] Received input {:#x}", inp_buf_out); + State { + fsm: FSM::HASH_TABLE_REQ, + input_data: inp_buf_out.data, + input_last: inp_buf_out.last, + ..state + } + }, + + FSM::HASH_TABLE_REQ => { + trace_fmt!("[HASH_TABLE_REQ] Sent HT read request {:#x}", ht_rd_req); + State { + fsm: FSM::HASH_TABLE_RESP, + ..state + } + }, + + FSM::HASH_TABLE_RESP => { + trace_fmt!("[HASH_TABLE_RESP] Received HT read respose {:#x}", ht_rd_resp); + if ht_is_match { + trace_fmt!("[HASH_TABLE_RESP] Sent HB read request {:#x}", hb_rd_req); + State { + fsm: FSM::HISTORY_BUFFER_RESP, + match_offset: hb_rd_req.offset as u16, + ..state + } + } else { + State { + fsm: FSM::OUTPUT_LITERAL_REQ_PACKET, + ..state + } + } + }, + + FSM::HISTORY_BUFFER_RESP => { + trace_fmt!("[HISTORY_BUFFER_RESP] Received HB read response {:#x}", hb_rd_resp); + trace_fmt!("[HISTORY_BUFFER_RESP] Next symbol {:#x}", state.input_data); + if is_next_match { + State { + fsm: FSM::INPUT_NEXT, + match_length: state.match_length + u16:1, + ..state + } + } else if state.match_length as u32 < MIN_SEQ_LEN { + State { + fsm: FSM::OUTPUT_LITERAL_REQ_PACKET, + ..state + } + } else { + State { + fsm: FSM::OUTPUT_SEQUENCE_REQ_PACKET, + ..state + } + } + }, + + FSM::OUTPUT_LITERAL_REQ_PACKET => { + trace_fmt!("[OUTPUT_LITERAL_REQ_PACKET] Sent mem write request {:#x}", lit_mem_wr_req); + trace_fmt!("[OUTPUT_LITERAL_REQ_PACKET] Sent mem write data {:#x}", lit_mem_wr_packet); + State { + fsm: FSM::OUTPUT_LITERAL_RESP, + ..state + } + }, + + FSM::OUTPUT_LITERAL_RESP => { + trace_fmt!("[OUTPUT_LITERAL_RESP] Received mem write response {:#x}", lit_mem_wr_resp); + if state.input_last { + State { + fsm: FSM::OUTPUT_SEQUENCE_REQ_PACKET, + output_lit_addr: state.output_lit_addr + uN[ADDR_W]:1, + literals_length: state.literals_length + u16:1, + ..state + } + } else { + State { + fsm: FSM::INPUT_NEXT, + output_lit_addr: state.output_lit_addr + uN[ADDR_W]:1, + literals_length: state.literals_length + u16:1, + ..state + } + } + }, + + FSM::OUTPUT_SEQUENCE_REQ_PACKET => { + trace_fmt!("[OUTPUT_SEQUENCE_REQ_PACKET] Sent mem write request {:#x}", seq_mem_wr_req); + trace_fmt!("[OUTPUT_SEQUENCE_REQ_PACKET] Sent mem write data {:#x}", seq_mem_wr_packet); + State { + fsm: FSM::OUTPUT_SEQUENCE_RESP, + lit_cnt: state.lit_cnt + (state.literals_length as u32), + seq_cnt: state.seq_cnt + (state.match_length as u32), + ..state + } + }, + + FSM::OUTPUT_SEQUENCE_RESP => { + trace_fmt!("[OUTPUT_SEQUENCE_RESP] Received mem write response {:#x}", seq_mem_wr_resp); + if state.input_last { + State { + fsm: FSM::SEND_RESP, + output_seq_addr: state.output_seq_addr + uN[ADDR_W]:6, + ..state + } + } else { + State { + fsm: FSM::HASH_TABLE_REQ, // here we reuse last response, which was not matched + output_seq_addr: state.output_seq_addr + uN[ADDR_W]:6, + literals_length: u16:0, + match_length: u16:0, + ..state + } + } + }, + + FSM::INPUT_READ_NEXT_REQ => { + trace_fmt!("[INPUT_NEXT] Sent next to input buffer"); + State { + fsm: FSM::INPUT_READ_NEXT_RESP, + input_addr_offset: state.input_addr_offset + uN[ADDR_W]:1, + ..state + } + }, + + FSM::INPUT_READ_NEXT_RESP => { + trace_fmt!("[INPUT_READ_NEXT_RESP ] Received input {:#x}", inp_buf_out); + State { + fsm: FSM::HISTORY_BUFFER_RESP, + input_data: inp_buf_out.data, + input_last: inp_buf_out.last, + ..state + } + }, + + FSM::SEND_RESP => { + trace_fmt!("[SEND_RESP] Sent response {:#x}", resp); + State { + fsm: FSM::IDLE, + ..zero!() + } + }, + + FSM::FAILURE => { + trace_fmt!("[FAILURE] !!!"); + State { + fsm: FSM::IDLE, + ..zero!() + } + }, + + _ => state, + } + } +} + +const INST_ADDR_W = u32:32; +const INST_DATA_W = u32:64; +const INST_MIN_SEQ_LEN = u32:3; +const INST_DATA_W_LOG2 = std::clog2(INST_DATA_W + u32:1); + +const INST_HT_SIZE = u32:512; +const INST_HT_SIZE_W = std::clog2(INST_HT_SIZE + u32:1); +const INST_HT_KEY_W = SYMBOL_WIDTH; +const INST_HT_VALUE_W = SYMBOL_WIDTH + INST_ADDR_W; // original symbol + address +const INST_HB_DATA_W = SYMBOL_WIDTH; +const INST_HB_SIZE = u32:1024; +const INST_HB_OFFSET_W = std::clog2(INST_HB_SIZE); + +proc MatchFinderInst { + type MemReaderReq = mem_reader::MemReaderReq; + type MemReaderResp = mem_reader::MemReaderResp; + + type MemWriterReq = mem_writer::MemWriterReq; + type MemWriterResp = mem_writer::MemWriterResp; + type MemWriterDataPacket = mem_writer::MemWriterDataPacket; + + type HashTableRdReq = hash_table::HashTableReadReq; + type HashTableRdResp = hash_table::HashTableReadResp; + type HashTableWrReq = hash_table::HashTableWriteReq; + type HashTableWrResp = hash_table::HashTableWriteResp; + + type HistoryBufferRdReq = history_buffer::HistoryBufferReadReq; + type HistoryBufferRdResp = history_buffer::HistoryBufferReadResp; + type HistoryBufferWrReq = history_buffer::HistoryBufferWriteReq; + type HistoryBufferWrResp = history_buffer::HistoryBufferWriteResp; + + type Req = MatchFinderReq; + type Resp = MatchFinderResp; + + config ( + // Req & Resp + req_r: chan in, + resp_s: chan out, + + // Access to input + mem_rd_req_s: chan out, + mem_rd_resp_r: chan in, + + // Output + mem_wr_req_s: chan out, + mem_wr_packet_s: chan out, + mem_wr_resp_r: chan in, + + // HashTable RAM interface + ht_rd_req_s: chan out, + ht_rd_resp_r: chan in, + ht_wr_req_s: chan out, + ht_wr_resp_r: chan in, + + // HistoryBuffer RAM interface + hb_rd_req_s: chan out, + hb_rd_resp_r: chan in, + hb_wr_req_s: chan out, + hb_wr_resp_r: chan in, + ) { + spawn MatchFinder< + INST_ADDR_W, INST_DATA_W, INST_HT_SIZE, INST_HB_SIZE, INST_MIN_SEQ_LEN, + INST_DATA_W_LOG2, + INST_HT_KEY_W, INST_HT_VALUE_W, INST_HT_SIZE_W, + INST_HB_DATA_W, INST_HB_OFFSET_W, + >( + req_r, resp_s, + mem_rd_req_s, mem_rd_resp_r, + mem_wr_req_s, mem_wr_packet_s, mem_wr_resp_r, + ht_rd_req_s, ht_rd_resp_r, ht_wr_req_s, ht_wr_resp_r, + hb_rd_req_s, hb_rd_resp_r, hb_wr_req_s, hb_wr_resp_r, + ); + } + + init {} + + next (state: ()) {} +} +const TEST_ADDR_W = u32:32; +const TEST_DATA_W = u32:64; +const TEST_MIN_SEQ_LEN = u32:3; +const TEST_HT_SIZE = u32:512; +const TEST_HB_SIZE = u32:1024; +const TEST_DATA_W_LOG2 = std::clog2(TEST_DATA_W + u32:1); +const TEST_DEST_W = u32:8; +const TEST_ID_W = u32:8; + +const TEST_HT_KEY_W = SYMBOL_WIDTH; +const TEST_HT_VALUE_W = SYMBOL_WIDTH + TEST_ADDR_W; // original symbol + address +const TEST_HT_HASH_W = std::clog2(TEST_HT_SIZE); +const TEST_HT_RAM_DATA_W = TEST_HT_VALUE_W + u32:1; // value + valid +const TEST_HT_RAM_WORD_PARTITION_SIZE = u32:1; +const TEST_HT_RAM_NUM_PARTITIONS = ram::num_partitions(TEST_HT_RAM_WORD_PARTITION_SIZE, TEST_HT_RAM_DATA_W); +const TEST_HT_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR = ram::SimultaneousReadWriteBehavior::READ_BEFORE_WRITE; +const TEST_HT_RAM_INITIALIZED = true; +const TEST_HT_SIZE_W = std::clog2(TEST_HT_SIZE + u32:1); + +const TEST_HB_RAM_NUM = u32:8; +const TEST_HB_DATA_W = SYMBOL_WIDTH; +const TEST_HB_OFFSET_W = std::clog2(TEST_HB_SIZE); +const TEST_HB_RAM_SIZE = TEST_HB_SIZE / TEST_HB_RAM_NUM; +const TEST_HB_RAM_DATA_W = SYMBOL_WIDTH / TEST_HB_RAM_NUM; +const TEST_HB_RAM_ADDR_W = std::clog2(TEST_HB_RAM_SIZE); +const TEST_HB_RAM_PARTITION_SIZE = TEST_HB_RAM_DATA_W; +const TEST_HB_RAM_NUM_PARTITIONS = ram::num_partitions(TEST_HB_RAM_PARTITION_SIZE, TEST_HB_RAM_DATA_W); +const TEST_HB_RAM_SIMULTANEOUS_RW_BEHAVIOR = ram::SimultaneousReadWriteBehavior::READ_BEFORE_WRITE; +const TEST_HB_RAM_INITIALIZED = true; + +const TEST_RAM_DATA_W = TEST_DATA_W; +const TEST_RAM_SIZE = u32:1024; +const TEST_RAM_ADDR_W = TEST_ADDR_W; +const TEST_RAM_PARTITION_SIZE = TEST_RAM_DATA_W / u32:8; +const TEST_RAM_NUM_PARTITIONS = ram::num_partitions(TEST_RAM_PARTITION_SIZE, TEST_RAM_DATA_W); +const TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR = ram::SimultaneousReadWriteBehavior::READ_BEFORE_WRITE; +const TEST_RAM_INITIALIZED = true; +const TEST_RAM_ASSERT_VALID_READ = true; + +const TEST_OUTPUT_LIT_ADDR = uN[TEST_ADDR_W]:0x100; +const TEST_OUTPUT_SEQ_ADDR = uN[TEST_ADDR_W]:0x200; +const TEST_OUTPUT_ADDR_MASK = uN[TEST_ADDR_W]:0xF00; + +// Test data +// 010A_0B0C_0102_0104_0A0B_0C05_0A0B_0C09 +// ^- ---- ^--- -- ^--- -- +// Expected output +// literals: 010A_0B0C_0102_0104_0509 +// sequences: (8, 7, 3), (1, 4, 3), (1, 0, 0) + +const TEST_DATA = uN[TEST_RAM_DATA_W][2]:[ + u64:0x0401_0201_0C0B_0A01, + u64:0x090C_0B0A_050C_0B0A, +]; + +const TEST_LITERALS = u8[10]:[ + u8:0x09, u8:0x05, u8:0x04, u8:0x01, u8:0x02, u8:0x01, u8:0x0C, u8:0x0B, u8:0x0A, u8:0x01, +]; + +const TEST_SEQUENCES = MatchFinderSequence[3]:[ + MatchFinderSequence { + literals_len: u16:8, + match_offset: u16:7, + match_len: u16:3, + }, + MatchFinderSequence { + literals_len: u16:1, + match_offset: u16:4, + match_len: u16:3, + }, + MatchFinderSequence { + literals_len: u16:1, + match_offset: u16:0, + match_len: u16:0, + }, +]; + +struct TestState { + iteration: u32, + lit_buffer: uN[SYMBOL_WIDTH][256], + seq_buffer: MatchFinderSequence[32], + wr_addr: uN[TEST_ADDR_W], + wr_offset: uN[TEST_ADDR_W], + wr_len: uN[TEST_ADDR_W], +} + +#[test_proc] +proc MatchFinderTest { + + // Memory Reader + Input + + type MemReaderReq = mem_reader::MemReaderReq; + type MemReaderResp = mem_reader::MemReaderResp; + + type InputBufferRamRdReq = ram::ReadReq; + type InputBufferRamRdResp = ram::ReadResp; + type InputBufferRamWrReq = ram::WriteReq; + type InputBufferRamWrResp = ram::WriteResp; + + type AxiAr = axi::AxiAr; + type AxiR = axi::AxiR; + + // Memory Writer + + type MemWriterReq = mem_writer::MemWriterReq; + type MemWriterResp = mem_writer::MemWriterResp; + type MemWriterDataPacket = mem_writer::MemWriterDataPacket; + + // Hash Table + + type HashTableRdReq = hash_table::HashTableReadReq; + type HashTableRdResp = hash_table::HashTableReadResp; + type HashTableWrReq = hash_table::HashTableWriteReq; + type HashTableWrResp = hash_table::HashTableWriteResp; + + type HashTableRamRdReq = ram::ReadReq; + type HashTableRamRdResp = ram::ReadResp; + type HashTableRamWrReq = ram::WriteReq; + type HashTableRamWrResp = ram::WriteResp; + + // History Buffer + + type HistoryBufferRdReq = history_buffer::HistoryBufferReadReq; + type HistoryBufferRdResp = history_buffer::HistoryBufferReadResp; + type HistoryBufferWrReq = history_buffer::HistoryBufferWriteReq; + type HistoryBufferWrResp = history_buffer::HistoryBufferWriteResp; + + type HistoryBufferRamRdReq = ram::ReadReq; + type HistoryBufferRamRdResp = ram::ReadResp; + type HistoryBufferRamWrReq = ram::WriteReq; + type HistoryBufferRamWrResp = ram::WriteResp; + + // Match Finder + + type Req = MatchFinderReq; + type Resp = MatchFinderResp; + + // Other + + type NumEntriesLog2 = uN[TEST_HT_SIZE_W]; + type RamAddr = uN[TEST_RAM_ADDR_W]; + type RamData = uN[TEST_RAM_DATA_W]; + type RamMask = uN[TEST_RAM_NUM_PARTITIONS]; + + terminator: chan out; + + req_s: chan out; + resp_r: chan in; + + mem_wr_req_r: chan in; + mem_wr_packet_r: chan in; + mem_wr_resp_s: chan out; + + input_ram_rd_req_s: chan out; + input_ram_rd_resp_r: chan in; + input_ram_wr_req_s: chan out; + input_ram_wr_resp_r: chan in; + + config(terminator: chan out) { + + // Hash Table RAM + + let (ht_ram_rd_req_s, ht_ram_rd_req_r) = chan("ht_ram_rd_req"); + let (ht_ram_rd_resp_s, ht_ram_rd_resp_r) = chan("ht_ram_rd_resp"); + let (ht_ram_wr_req_s, ht_ram_wr_req_r) = chan("ht_ram_wr_req"); + let (ht_ram_wr_resp_s, ht_ram_wr_resp_r) = chan("ht_ram_wr_resp"); + + spawn ram::RamModel< + TEST_HT_RAM_DATA_W, TEST_HT_SIZE, TEST_HT_RAM_WORD_PARTITION_SIZE, + TEST_HT_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_HT_RAM_INITIALIZED + >( + ht_ram_rd_req_r, ht_ram_rd_resp_s, + ht_ram_wr_req_r, ht_ram_wr_resp_s + ); + + // Hash Table + + let (ht_rd_req_s, ht_rd_req_r) = chan("ht_rd_req"); + let (ht_rd_resp_s, ht_rd_resp_r) = chan("ht_rd_resp"); + let (ht_wr_req_s, ht_wr_req_r) = chan("ht_wr_req"); + let (ht_wr_resp_s, ht_wr_resp_r) = chan("ht_wr_resp"); + + spawn hash_table::HashTable( + ht_rd_req_r, ht_rd_resp_s, + ht_wr_req_r, ht_wr_resp_s, + ht_ram_rd_req_s, ht_ram_rd_resp_r, + ht_ram_wr_req_s, ht_ram_wr_resp_r, + ); + + // History Buffer RAM + + let (hb_ram_rd_req_s, hb_ram_rd_req_r) = chan[8]("hb_ram_rd_req"); + let (hb_ram_rd_resp_s, hb_ram_rd_resp_r) = chan[8]("hb_ram_rd_resp"); + let (hb_ram_wr_req_s, hb_ram_wr_req_r) = chan[8]("hb_ram_wr_req"); + let (hb_ram_wr_resp_s, hb_ram_wr_resp_r) = chan[8]("hb_ram_wr_resp"); + + spawn ram::RamModel< + TEST_HB_RAM_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_HB_RAM_INITIALIZED + >( + hb_ram_rd_req_r[0], hb_ram_rd_resp_s[0], + hb_ram_wr_req_r[0], hb_ram_wr_resp_s[0], + ); + + spawn ram::RamModel< + TEST_HB_RAM_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_HB_RAM_INITIALIZED + >( + hb_ram_rd_req_r[1], hb_ram_rd_resp_s[1], + hb_ram_wr_req_r[1], hb_ram_wr_resp_s[1], + ); + + spawn ram::RamModel< + TEST_HB_RAM_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_HB_RAM_INITIALIZED + >( + hb_ram_rd_req_r[2], hb_ram_rd_resp_s[2], + hb_ram_wr_req_r[2], hb_ram_wr_resp_s[2], + ); + + spawn ram::RamModel< + TEST_HB_RAM_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_HB_RAM_INITIALIZED + >( + hb_ram_rd_req_r[3], hb_ram_rd_resp_s[3], + hb_ram_wr_req_r[3], hb_ram_wr_resp_s[3], + ); + + spawn ram::RamModel< + TEST_HB_RAM_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_HB_RAM_INITIALIZED + >( + hb_ram_rd_req_r[4], hb_ram_rd_resp_s[4], + hb_ram_wr_req_r[4], hb_ram_wr_resp_s[4], + ); + + spawn ram::RamModel< + TEST_HB_RAM_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_HB_RAM_INITIALIZED + >( + hb_ram_rd_req_r[5], hb_ram_rd_resp_s[5], + hb_ram_wr_req_r[5], hb_ram_wr_resp_s[5], + ); + + spawn ram::RamModel< + TEST_HB_RAM_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_HB_RAM_INITIALIZED + >( + hb_ram_rd_req_r[6], hb_ram_rd_resp_s[6], + hb_ram_wr_req_r[6], hb_ram_wr_resp_s[6], + ); + + spawn ram::RamModel< + TEST_HB_RAM_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_HB_RAM_INITIALIZED + >( + hb_ram_rd_req_r[7], hb_ram_rd_resp_s[7], + hb_ram_wr_req_r[7], hb_ram_wr_resp_s[7], + ); + + // History Buffer + + let (hb_rd_req_s, hb_rd_req_r) = chan("hb_rd_req"); + let (hb_rd_resp_s, hb_rd_resp_r) = chan("hb_rd_resp"); + let (hb_wr_req_s, hb_wr_req_r) = chan("hb_wr_req"); + let (hb_wr_resp_s, hb_wr_resp_r) = chan("hb_wr_resp"); + + spawn history_buffer::HistoryBuffer( + hb_rd_req_r, hb_rd_resp_s, + hb_wr_req_r, hb_wr_resp_s, + hb_ram_rd_req_s, hb_ram_rd_resp_r, + hb_ram_wr_req_s, hb_ram_wr_resp_r, + ); + + // Input Memory + + let (input_ram_rd_req_s, input_ram_rd_req_r) = chan("input_ram_rd_req"); + let (input_ram_rd_resp_s, input_ram_rd_resp_r) = chan("input_ram_rd_resp"); + let (input_ram_wr_req_s, input_ram_wr_req_r) = chan("input_ram_wr_req"); + let (input_ram_wr_resp_s, input_ram_wr_resp_r) = chan("input_ram_wr_resp"); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_RW_BEHAVIOR, TEST_RAM_INITIALIZED, + TEST_RAM_ASSERT_VALID_READ, TEST_RAM_ADDR_W, + >( + input_ram_rd_req_r, input_ram_rd_resp_s, + input_ram_wr_req_r, input_ram_wr_resp_s, + ); + + // Input Memory Axi Reader + + let (axi_ar_s, axi_ar_r) = chan("axi_ar"); + let (axi_r_s, axi_r_r) = chan("axi_r"); + + spawn axi_ram::AxiRamReader< + TEST_ADDR_W, TEST_DATA_W, + TEST_DEST_W, TEST_ID_W, + TEST_RAM_SIZE, + >( + axi_ar_r, axi_r_s, + input_ram_rd_req_s, input_ram_rd_resp_r, + ); + + // Input Memory Reader + + let (mem_rd_req_s, mem_rd_req_r) = chan("mem_rd_req"); + let (mem_rd_resp_s, mem_rd_resp_r) = chan("mem_rd_resp"); + + spawn mem_reader::MemReader< + TEST_DATA_W, TEST_ADDR_W, TEST_DEST_W, TEST_ID_W, + >( + mem_rd_req_r, mem_rd_resp_s, + axi_ar_s, axi_r_r, + ); + + // Output Memory Writer + + let (mem_wr_req_s, mem_wr_req_r) = chan("mem_wr_req"); + let (mem_wr_packet_s, mem_wr_packet_r) = chan("mem_wr_packet"); + let (mem_wr_resp_s, mem_wr_resp_r) = chan("mem_wr_resp"); + + // Match Finder + + let (req_s, req_r) = chan("req"); + let (resp_s, resp_r) = chan("resp"); + + spawn MatchFinder< + TEST_ADDR_W, TEST_DATA_W, TEST_HT_SIZE, TEST_HB_SIZE, TEST_MIN_SEQ_LEN, + TEST_DATA_W_LOG2, + TEST_HT_KEY_W, TEST_HT_VALUE_W, TEST_HT_SIZE_W, + TEST_HB_DATA_W, TEST_HB_OFFSET_W, + >( + req_r, resp_s, + mem_rd_req_s, mem_rd_resp_r, + mem_wr_req_s, mem_wr_packet_s, mem_wr_resp_r, + ht_rd_req_s, ht_rd_resp_r, ht_wr_req_s, ht_wr_resp_r, + hb_rd_req_s, hb_rd_resp_r, hb_wr_req_s, hb_wr_resp_r, + ); + + ( + terminator, + req_s, resp_r, + mem_wr_req_r, mem_wr_packet_r, mem_wr_resp_s, + input_ram_rd_req_s, input_ram_rd_resp_r, input_ram_wr_req_s, input_ram_wr_resp_r, + ) + + } + + init { zero!() } + + next(state: TestState) { + let tok = join(); + + let tok = if state.iteration == u32:0 { + // Fill the input RAM + let tok = for ((i, test_data), tok) in enumerate(TEST_DATA) { + let ram_wr_req = InputBufferRamWrReq { + addr: i as RamAddr, + data: test_data, + mask: !RamMask:0, + }; + let tok = send(tok, input_ram_wr_req_s, ram_wr_req); + trace_fmt!("[TEST] Sent #{} data to input RAM {:#x}", i + u32:1, ram_wr_req); + let (tok, _) = recv(tok, input_ram_wr_resp_r); + tok + }(tok); + + // Start the request + + let req = Req { + input_addr: uN[TEST_ADDR_W]:0x0, + input_size: (array_size(TEST_DATA) * TEST_DATA_W / SYMBOL_WIDTH) as u32, + output_lit_addr: TEST_OUTPUT_LIT_ADDR, + output_seq_addr: TEST_OUTPUT_SEQ_ADDR, + zstd_params: ZstdParams { + num_entries_log2: NumEntriesLog2:8, + }, + }; + + let tok = send(tok, req_s, req); + trace_fmt!("[TEST] Sent request to the MatchFinder: {:#x}", req); + + tok + } else { + tok + }; + + let (tok, mem_wr_req, mem_wr_req_valid) = recv_if_non_blocking( + tok, mem_wr_req_r, state.wr_len == uN[TEST_ADDR_W]:0, zero!() + ); + let (tok, mem_wr_packet, mem_wr_packet_valid) = recv_if_non_blocking( + tok, mem_wr_packet_r, state.wr_len > uN[TEST_ADDR_W]:0, zero!() + ); + let (tok, resp, resp_valid) = recv_if_non_blocking( + tok, resp_r, state.wr_len == uN[TEST_ADDR_W]:0, zero!() + ); + + let tok = send_if(tok, mem_wr_resp_s, mem_wr_packet_valid, MemWriterResp{status: mem_writer::MemWriterRespStatus::OKAY}); + + let state = if mem_wr_req_valid { + TestState { + wr_addr: mem_wr_req.addr, + wr_offset: uN[TEST_ADDR_W]:0, + wr_len: mem_wr_req.length, + ..state + } + } else { + state + }; + + if mem_wr_req_valid { + trace_fmt!("[TEST] Received {:#x}", mem_wr_req); + } else {}; + if mem_wr_packet_valid { + trace_fmt!("[TEST] Received {:#x}", mem_wr_packet); + } else {}; + + let state = if mem_wr_packet_valid { + if mem_wr_packet.length > state.wr_len { + trace_fmt!("[TEST] Invalid packet length"); + fail!("invalid_packet_length", mem_wr_packet.length); + } else {}; + let state = match (state.wr_addr & TEST_OUTPUT_ADDR_MASK) { + TEST_OUTPUT_LIT_ADDR => { + trace_fmt!("[TEST] Received literals"); + let lit_buffer = for (i, lit_buffer) in range(u32:0, TEST_DATA_W / SYMBOL_WIDTH) { + if i < mem_wr_packet.length { + let literal = (mem_wr_packet.data >> (SYMBOL_WIDTH * i)) as uN[SYMBOL_WIDTH]; + let idx = (TEST_ADDR_W / SYMBOL_WIDTH) * (state.wr_addr - TEST_OUTPUT_LIT_ADDR + state.wr_offset) + i; + update(lit_buffer, idx, literal) + } else { + lit_buffer + } + }(state.lit_buffer); + TestState { + lit_buffer: lit_buffer, + wr_offset: state.wr_offset + mem_wr_packet.length, + wr_len: state.wr_len - mem_wr_packet.length, + ..state + } + }, + TEST_OUTPUT_SEQ_ADDR => { + trace_fmt!("[TEST] Received sequence"); + assert_eq(uN[TEST_ADDR_W]:6, mem_wr_packet.length); + let sequence = MatchFinderSequence { + literals_len: (mem_wr_packet.data >> u32:32) as u16, + match_offset: (mem_wr_packet.data >> u32:16) as u16, + match_len: mem_wr_packet.data as u16, + }; + let idx = ((TEST_ADDR_W / SYMBOL_WIDTH) * (state.wr_addr - TEST_OUTPUT_SEQ_ADDR + state.wr_offset)) / u32:3; + let seq_buffer = update(state.seq_buffer, idx, sequence); + TestState { + seq_buffer: seq_buffer, + wr_offset: state.wr_offset + mem_wr_packet.length, + wr_len: state.wr_len - mem_wr_packet.length, + ..state + } + }, + _ => { + trace_fmt!("[TEST] Invalid write addres"); + fail!("invalid_wr_addr", state.wr_addr); + state + }, + }; + state + } else { + state + }; + + if resp_valid { + // check buffers content + trace_fmt!("[TEST] Received Match Finder response {:#x}", resp); + for ((i, test_lit), ()) in enumerate(TEST_LITERALS) { + assert_eq(test_lit, state.lit_buffer[i]); + }(()); + for ((i, test_seq), ()) in enumerate(TEST_SEQUENCES) { + assert_eq(test_seq, state.seq_buffer[i]); + }(()); + } else { }; + + send_if(tok, terminator, resp_valid || state.iteration > u32:1000, true); + + TestState { + iteration: state.iteration + u32:1, + ..state + } + } +} diff --git a/xls/modules/zstd/math.x b/xls/modules/zstd/math.x new file mode 100644 index 0000000000..1b9a8dd1db --- /dev/null +++ b/xls/modules/zstd/math.x @@ -0,0 +1,88 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import std; + +fn fast_if(cond: bool, arg1: uN[N], arg2: uN[N]) -> uN[N] { + let mask = if cond { !bits[N]:0 } else { bits[N]:0 }; + (arg1 & mask) | (arg2 & !mask) +} + +#[test] +fn fast_if_test() { + assert_eq(if true { u32:1 } else { u32:5 }, fast_if(true, u32:1, u32:5)); + assert_eq(if false { u32:1 } else { u32:5 }, fast_if(false, u32:1, u32:5)); +} + +// Log-depth shift bits left +pub fn logshiftl(n: bits[N], r: bits[R]) -> bits[N] { + for (i, y) in u32:0..R { + fast_if(r[i+:u1], { y << (bits[R]:1 << i) }, { y }) + }(n as bits[N]) +} + +#[test] +fn logshiftl_test() { + // Test varying base + assert_eq(logshiftl(bits[64]:0, bits[6]:3), bits[64]:0 << u32:3); + assert_eq(logshiftl(bits[64]:1, bits[6]:3), bits[64]:1 << u32:3); + assert_eq(logshiftl(bits[64]:2, bits[6]:3), bits[64]:2 << u32:3); + assert_eq(logshiftl(bits[64]:3, bits[6]:3), bits[64]:3 << u32:3); + assert_eq(logshiftl(bits[64]:4, bits[6]:3), bits[64]:4 << u32:3); + + // Test varying exponent + assert_eq(logshiftl(bits[64]:50, bits[6]:0), bits[64]:50 << u32:0); + assert_eq(logshiftl(bits[64]:50, bits[6]:1), bits[64]:50 << u32:1); + assert_eq(logshiftl(bits[64]:50, bits[6]:2), bits[64]:50 << u32:2); + assert_eq(logshiftl(bits[64]:50, bits[6]:3), bits[64]:50 << u32:3); + assert_eq(logshiftl(bits[64]:50, bits[6]:4), bits[64]:50 << u32:4); + + // Test overflow + let max = std::unsigned_max_value(); + assert_eq(logshiftl(max, u4:4), max << u4:4); + assert_eq(logshiftl(max, u4:5), max << u4:5); + assert_eq(logshiftl(max, u4:15), max << u4:15); + assert_eq(logshiftl(bits[24]:0xc0ffee, u8:12), bits[24]:0xfee000); +} + +// Log-depth shift bits right +pub fn logshiftr(n: bits[N], r: bits[R]) -> bits[N] { + for (i, y) in u32:0..R { + fast_if(r[i+:u1], { y >> (bits[R]:1 << i) }, { y }) + }(n as bits[N]) +} + +#[test] +fn logshiftr_test() { + // Test varying base + assert_eq(logshiftr(bits[64]:0x0fac4e782, bits[6]:3), bits[64]:0x0fac4e782 >> u32:3); + assert_eq(logshiftr(bits[64]:0x1fac4e782, bits[6]:3), bits[64]:0x1fac4e782 >> u32:3); + assert_eq(logshiftr(bits[64]:0x2fac4e782, bits[6]:3), bits[64]:0x2fac4e782 >> u32:3); + assert_eq(logshiftr(bits[64]:0x3fac4e782, bits[6]:3), bits[64]:0x3fac4e782 >> u32:3); + assert_eq(logshiftr(bits[64]:0x4fac4e782, bits[6]:3), bits[64]:0x4fac4e782 >> u32:3); + + // Test varying exponent + assert_eq(logshiftr(bits[64]:0x50fac4e782, bits[6]:0), bits[64]:0x50fac4e782 >> u32:0); + assert_eq(logshiftr(bits[64]:0x50fac4e782, bits[6]:1), bits[64]:0x50fac4e782 >> u32:1); + assert_eq(logshiftr(bits[64]:0x50fac4e782, bits[6]:2), bits[64]:0x50fac4e782 >> u32:2); + assert_eq(logshiftr(bits[64]:0x50fac4e782, bits[6]:3), bits[64]:0x50fac4e782 >> u32:3); + assert_eq(logshiftr(bits[64]:0x50fac4e782, bits[6]:4), bits[64]:0x50fac4e782 >> u32:4); + + // Test overflow + let max = std::unsigned_max_value(); + assert_eq(logshiftr(max, u4:4), max >> u4:4); + assert_eq(logshiftr(max, u4:5), max >> u4:5); + assert_eq(logshiftr(max, u4:15), max >> u4:15); + assert_eq(logshiftr(bits[24]:0xc0ffee, u8:12), bits[24]:0x000c0f); +} diff --git a/xls/modules/zstd/memory/BUILD b/xls/modules/zstd/memory/BUILD index ca5e0a155f..82214b6bbf 100644 --- a/xls/modules/zstd/memory/BUILD +++ b/xls/modules/zstd/memory/BUILD @@ -15,6 +15,7 @@ load("@rules_hdl//place_and_route:build_defs.bzl", "place_and_route") load("@rules_hdl//synthesis:build_defs.bzl", "benchmark_synth", "synthesize_rtl") load("@rules_hdl//verilog:providers.bzl", "verilog_library") +load("@xls_pip_deps//:requirements.bzl", "requirement") load( "//xls/build_rules:xls_build_defs.bzl", "xls_benchmark_ir", @@ -58,8 +59,6 @@ CLOCK_PERIOD_PS = "750" # Clock periods for modules that exceed the 750ps critical path in IR benchmark AXI_READER_CLOCK_PERIOD_PS = "1800" -AXI_STREAM_REMOVE_EMPTY_CLOCK_PERIOD_PS = "1300" - MEM_READER_CLOCK_PERIOD_PS = "2600" common_codegen_args = { @@ -166,9 +165,68 @@ xls_dslx_test( tags = ["manual"], ) +axi_stream_remove_empty_internal_codegen_args = common_codegen_args | { + "module_name": "axi_stream_remove_empty_internal", + "pipeline_stages": "1", +} + +xls_dslx_verilog( + name = "axi_stream_remove_empty_internal_verilog", + codegen_args = axi_stream_remove_empty_internal_codegen_args, + dslx_top = "AxiStreamRemoveEmptyInternalInst", + library = ":axi_stream_remove_empty_dslx", + tags = ["manual"], + verilog_file = "axi_stream_remove_empty_internal.v", +) + +xls_benchmark_ir( + name = "axi_stream_remove_empty_internal_opt_ir_benchmark", + src = ":axi_stream_remove_empty_internal_verilog.opt.ir", + benchmark_ir_args = axi_stream_remove_empty_internal_codegen_args | { + "pipeline_stages": "10", + "top": "__axi_stream_remove_empty__AxiStreamRemoveEmptyInternalInst__AxiStreamRemoveEmptyInternal_0__32_4_6_32_32_next", + }, + tags = ["manual"], +) + +verilog_library( + name = "axi_stream_remove_empty_internal_verilog_lib", + srcs = [ + ":axi_stream_remove_empty_internal.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "axi_stream_remove_empty_internal_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "axi_stream_remove_empty_internal", + deps = [ + ":axi_stream_remove_empty_internal_verilog_lib", + ], +) + +benchmark_synth( + name = "axi_stream_remove_empty_internal_benchmark_synth", + synth_target = ":axi_stream_remove_empty_internal_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "axi_stream_remove_empty_internal_place_and_route", + clock_period = CLOCK_PERIOD_PS, + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":axi_stream_remove_empty_internal_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + axi_stream_remove_empty_codegen_args = common_codegen_args | { "module_name": "axi_stream_remove_empty", - "clock_period_ps": AXI_STREAM_REMOVE_EMPTY_CLOCK_PERIOD_PS, "pipeline_stages": "2", } @@ -181,20 +239,11 @@ xls_dslx_verilog( verilog_file = "axi_stream_remove_empty.v", ) -xls_benchmark_ir( - name = "axi_stream_remove_empty_opt_ir_benchmark", - src = ":axi_stream_remove_empty_verilog.opt.ir", - benchmark_ir_args = axi_stream_remove_empty_codegen_args | { - "pipeline_stages": "10", - "top": "__axi_stream_remove_empty__AxiStreamRemoveEmptyInst__AxiStreamRemoveEmpty_0__32_4_6_32_32_next", - }, - tags = ["manual"], -) - verilog_library( name = "axi_stream_remove_empty_verilog_lib", srcs = [ ":axi_stream_remove_empty.v", + "//xls/modules/zstd:xls_fifo_wrapper.v", ], tags = ["manual"], ) @@ -227,6 +276,66 @@ place_and_route( target_die_utilization_percentage = "10", ) +remove_empty_bytes_codegen_args = common_codegen_args | { + "module_name": "remove_empty_bytes", + "pipeline_stages": "2", +} + +xls_dslx_verilog( + name = "remove_empty_bytes_verilog", + codegen_args = remove_empty_bytes_codegen_args, + dslx_top = "RemoveEmptyBytesInst", + library = ":axi_stream_remove_empty_dslx", + tags = ["manual"], + verilog_file = "remove_empty_bytes.v", +) + +xls_benchmark_ir( + name = "remove_empty_bytes_opt_ir_benchmark", + src = ":remove_empty_bytes_verilog.opt.ir", + benchmark_ir_args = remove_empty_bytes_codegen_args | { + "top": "__axi_stream_remove_empty__RemoveEmptyBytesInst__RemoveEmptyBytes_0__32_4_6_32_9_32_next", + "pipeline_stages": "10", + }, + tags = ["manual"], +) + +verilog_library( + name = "remove_empty_bytes_verilog_lib", + srcs = [ + ":remove_empty_bytes.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "remove_empty_bytes_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "remove_empty_bytes", + deps = [ + ":remove_empty_bytes_verilog_lib", + ], +) + +benchmark_synth( + name = "remove_empty_bytes_benchmark_synth", + synth_target = ":remove_empty_bytes_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "remove_empty_bytes_place_and_route", + clock_period = CLOCK_PERIOD_PS, + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":remove_empty_bytes_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + xls_dslx_library( name = "axi_stream_downscaler_dslx", srcs = ["axi_stream_downscaler.x"], @@ -301,6 +410,94 @@ place_and_route( target_die_utilization_percentage = "10", ) +xls_dslx_library( + name = "axi_ram_dslx", + srcs = ["axi_ram.x"], + deps = [ + ":axi_dslx", + "//xls/examples:ram_dslx", + "//xls/modules/zstd:math_dslx", + ], +) + +xls_dslx_test( + name = "axi_ram_dslx_test", + library = ":axi_ram_dslx", +) + +xls_dslx_verilog( + name = "axi_ram_verilog", + codegen_args = { + "module_name": "AxiRam", + "delay_model": "asap7", + "ram_configurations": "{ram_name}:1R1W:{rd_req}:{rd_resp}:{wr_req}:{wr_resp}:{latency}".format( + latency = 5, + ram_name = "ram", + rd_req = "axi_ram__rd_req_s", + rd_resp = "axi_ram__rd_resp_r", + wr_req = "axi_ram__wr_req_s", + wr_resp = "axi_ram__wr_resp_r", + ), + "pipeline_stages": "8", + "reset": "rst", + "use_system_verilog": "false", + }, + dslx_top = "AxiRamReaderInstWithEmptyWrites", + library = ":axi_ram_dslx", + opt_ir_args = { + "inline_procs": "true", + "top": "__axi_ram__AxiRamReaderInstWithEmptyWrites__AxiRamReader_0__AxiRamReaderResponder_0__32_32_4_5_6_8_8_32768_7_32_5_6_4_100_next", + }, + tags = ["manual"], + verilog_file = "axi_ram.v", +) + +verilog_library( + name = "axi_ram_verilog_lib", + srcs = [ + ":axi_ram.v", + ], + tags = ["manual"], +) + +xls_benchmark_ir( + name = "axi_ram_opt_ir_benchmark", + src = ":axi_ram_verilog.opt.ir", + benchmark_ir_args = { + "pipeline_stages": "4", + "delay_model": "asap7", + }, + tags = ["manual"], +) + +synthesize_rtl( + name = "axi_ram_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "AxiRam", + deps = [ + ":axi_ram_verilog_lib", + ], +) + +benchmark_synth( + name = "axi_ram_benchmark_synth", + synth_target = ":axi_ram_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "axi_ram_place_and_route", + clock_period = "750", + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":axi_ram_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + xls_dslx_library( name = "mem_reader_dslx", srcs = ["mem_reader.x"], @@ -380,12 +577,12 @@ place_and_route( ) mem_reader_codegen_args = common_codegen_args | { + "clock_period_ps": MEM_READER_CLOCK_PERIOD_PS, "module_name": "mem_reader", "pipeline_stages": "4", "streaming_channel_data_suffix": "_data", "flop_inputs_kind": "skid", "flop_outputs_kind": "skid", - "clock_period_ps": MEM_READER_CLOCK_PERIOD_PS, "materialize_internal_fifos": "true", } @@ -402,6 +599,7 @@ verilog_library( name = "mem_reader_verilog_lib", srcs = [ ":mem_reader.v", + "//xls/modules/zstd:xls_fifo_wrapper.v", ], tags = ["manual"], ) @@ -435,12 +633,12 @@ place_and_route( ) mem_reader_adv_codegen_args = common_codegen_args | { + "clock_period_ps": MEM_READER_CLOCK_PERIOD_PS, "module_name": "mem_reader_adv", "pipeline_stages": "4", "streaming_channel_data_suffix": "_data", "flop_inputs_kind": "skid", "flop_outputs_kind": "skid", - "clock_period_ps": MEM_READER_CLOCK_PERIOD_PS, "materialize_internal_fifos": "true", } @@ -457,6 +655,7 @@ verilog_library( name = "mem_reader_adv_verilog_lib", srcs = [ ":mem_reader_adv.v", + "//xls/modules/zstd:xls_fifo_wrapper.v", ], tags = ["manual"], ) @@ -489,6 +688,34 @@ place_and_route( target_die_utilization_percentage = "10", ) +py_test( + name = "mem_reader_cocotb_test", + srcs = ["mem_reader_cocotb_test.py"], + data = [ + ":mem_reader_adv.v", + ":mem_reader_wrapper.v", + "//xls/modules/zstd:xls_fifo_wrapper.v", + "@com_icarus_iverilog//:iverilog", + "@com_icarus_iverilog//:vvp", + ], + env = {"BUILD_WORKING_DIRECTORY": "sim_build"}, + tags = ["manual"], + visibility = ["//xls:xls_users"], + deps = [ + requirement("cocotb"), + requirement("cocotbext-axi"), + requirement("pytest"), + "//xls/common:runfiles", + "//xls/modules/zstd/cocotb:channel", + "//xls/modules/zstd/cocotb:memory", + "//xls/modules/zstd/cocotb:utils", + "//xls/modules/zstd/cocotb:xlsstruct", + "@com_google_absl_py//absl:app", + "@com_google_absl_py//absl/flags", + "@com_google_protobuf//:protobuf_python", + ], +) + xls_dslx_library( name = "axi_writer_dslx", srcs = ["axi_writer.x"], @@ -568,6 +795,33 @@ place_and_route( target_die_utilization_percentage = "10", ) +py_test( + name = "axi_writer_cocotb_test", + srcs = ["axi_writer_cocotb_test.py"], + data = [ + ":axi_writer.v", + ":axi_writer_wrapper.v", + "@com_icarus_iverilog//:iverilog", + "@com_icarus_iverilog//:vvp", + ], + env = {"BUILD_WORKING_DIRECTORY": "sim_build"}, + imports = ["."], + tags = ["manual"], + visibility = ["//xls:xls_users"], + deps = [ + requirement("cocotb"), + requirement("cocotbext-axi"), + requirement("pytest"), + "//xls/common:runfiles", + "//xls/modules/zstd/cocotb:channel", + "//xls/modules/zstd/cocotb:utils", + "//xls/modules/zstd/cocotb:xlsstruct", + "@com_google_absl_py//absl:app", + "@com_google_absl_py//absl/flags", + "@com_google_protobuf//:protobuf_python", + ], +) + xls_dslx_library( name = "axi_stream_add_empty_dslx", srcs = ["axi_stream_add_empty.x"], @@ -652,6 +906,7 @@ xls_dslx_library( ":axi_dslx", ":axi_st_dslx", ":axi_stream_add_empty_dslx", + ":axi_stream_remove_empty_dslx", ":axi_writer_dslx", ":common_dslx", ], @@ -662,14 +917,72 @@ xls_dslx_test( library = ":mem_writer_dslx", ) +mem_writer_internal_codegen_args = common_codegen_args | { + "module_name": "mem_writer_internal", + "pipeline_stages": "2", +} + +xls_dslx_verilog( + name = "mem_writer_internal_verilog", + codegen_args = mem_writer_internal_codegen_args, + dslx_top = "MemWriterInternalInst", + library = ":mem_writer_dslx", + tags = ["manual"], + verilog_file = "mem_writer_internal.v", +) + +xls_benchmark_ir( + name = "mem_writer_internal_opt_ir_benchmark", + src = ":mem_writer_internal_verilog.opt.ir", + benchmark_ir_args = common_codegen_args | { + "pipeline_stages": "10", + "top": "__mem_writer__MemWriterInternalInst__MemWriterInternal_0__16_32_4_4_4_2_next", + }, + tags = ["manual"], +) + +verilog_library( + name = "mem_writer_internal_verilog_lib", + srcs = [ + ":mem_writer_internal.v", + ], + tags = ["manual"], +) + +synthesize_rtl( + name = "mem_writer_internal_synth_asap7", + standard_cells = "@org_theopenroadproject_asap7sc7p5t_28//:asap7-sc7p5t_rev28_rvt", + tags = ["manual"], + top_module = "mem_writer_internal", + deps = [ + ":mem_writer_internal_verilog_lib", + ], +) + +benchmark_synth( + name = "mem_writer_internal_benchmark_synth", + synth_target = ":mem_writer_internal_synth_asap7", + tags = ["manual"], +) + +place_and_route( + name = "mem_writer_internal_place_and_route", + clock_period = CLOCK_PERIOD_PS, + core_padding_microns = 2, + min_pin_distance = "0.5", + placement_density = "0.30", + stop_after_step = "global_routing", + synthesized_rtl = ":mem_writer_internal_synth_asap7", + tags = ["manual"], + target_die_utilization_percentage = "10", +) + mem_writer_codegen_args = common_codegen_args | { "module_name": "mem_writer", - "pipeline_stages": "2", + "pipeline_stages": "10", "streaming_channel_data_suffix": "_data", - "multi_proc": "true", "flop_inputs_kind": "skid", "flop_outputs_kind": "skid", - "worst_case_throughput": "1", "materialize_internal_fifos": "true", } @@ -686,6 +999,7 @@ verilog_library( name = "mem_writer_verilog_lib", srcs = [ ":mem_writer.v", + "//xls/modules/zstd:xls_fifo_wrapper.v", ], tags = ["manual"], ) @@ -717,3 +1031,31 @@ place_and_route( tags = ["manual"], target_die_utilization_percentage = "10", ) + +py_test( + name = "mem_writer_cocotb_test", + srcs = ["mem_writer_cocotb_test.py"], + data = [ + ":mem_writer.v", + ":mem_writer_wrapper.v", + "//xls/modules/zstd:xls_fifo_wrapper.v", + "@com_icarus_iverilog//:iverilog", + "@com_icarus_iverilog//:vvp", + ], + env = {"BUILD_WORKING_DIRECTORY": "sim_build"}, + imports = ["."], + tags = ["manual"], + visibility = ["//xls:xls_users"], + deps = [ + requirement("cocotb"), + requirement("cocotbext-axi"), + requirement("pytest"), + "//xls/common:runfiles", + "//xls/modules/zstd/cocotb:channel", + "//xls/modules/zstd/cocotb:utils", + "//xls/modules/zstd/cocotb:xlsstruct", + "@com_google_absl_py//absl:app", + "@com_google_absl_py//absl/flags", + "@com_google_protobuf//:protobuf_python", + ], +) diff --git a/xls/modules/zstd/memory/README.md b/xls/modules/zstd/memory/README.md index 6a0e4aedfb..17367fa710 100644 --- a/xls/modules/zstd/memory/README.md +++ b/xls/modules/zstd/memory/README.md @@ -87,3 +87,43 @@ The list below shows the usage of the `MemWriter` proc: 3. Wait for the response submitted on the `resp_s` channel, which indicates if the write operation was successful or an error occurred. + +# Cocotb Simulation + +This directory also contains Verilog simulations of the created modules, +which test their interaction with RAM attached to the AXI bus. These Verilog +simulations provide insight into the design's latency and achievable throughput. + +The simulation interacts with verilog file generated from the particular DSLX proc +through a verilog wrapper. The wrapper is used to create an interface that is +compliant with the AXI specification so that the cocotb testbench can interact +with the DUT with the help of an extension tailored for handling the AXI bus. + +## Usage + +1. Run the simulation with the following command: + +``` +bazel run -c opt //xls/modules/zstd/memory:_cocotb_test -- --logtostderr +``` + +2. Observe simulation results, e.g. for `mem_writer_cocotb_test`: + +``` +************************************************************************************************************************************************************* +** TEST STATUS SIM TIME (ns) REAL TIME (s) RATIO (ns/s) ** +************************************************************************************************************************************************************* +** mem_writer_cocotb_test.ram_test_single_burst_1_transfer PASS 1970000.00 0.05 40004933.01 ** +** mem_writer_cocotb_test.ram_test_single_burst_2_transfers PASS 2140000.00 0.04 52208013.80 ** +** mem_writer_cocotb_test.ram_test_single_burst_almost_max_burst_transfer PASS 42620000.00 1.00 42734572.11 ** +** mem_writer_cocotb_test.ram_test_single_burst_max_burst_transfer PASS 43380000.00 1.03 42245987.95 ** +** mem_writer_cocotb_test.ram_test_multiburst_2_full_bursts PASS 85940000.00 2.00 42978720.13 ** +** mem_writer_cocotb_test.ram_test_multiburst_1_full_burst_and_single_transfer PASS 44510000.00 1.02 43487911.16 ** +** mem_writer_cocotb_test.ram_test_multiburst_crossing_4kb_boundary PASS 3740000.00 0.06 60190612.91 ** +** mem_writer_cocotb_test.ram_test_multiburst_crossing_4kb_boundary_with_perfectly_aligned_full_bursts PASS 21440000.00 0.50 42469371.00 ** +** mem_writer_cocotb_test.ram_test_multiburst_crossing_4kb_boundary_with_2_full_bursts_and_1_transfer PASS 87070000.00 2.01 43348812.05 ** +** mem_writer_cocotb_test.ram_test_random PASS 4491230000.00 109.05 41184670.96 ** +************************************************************************************************************************************************************* +** TESTS=10 PASS=10 FAIL=0 SKIP=0 4824040000.01 116.82 41296261.92 ** +************************************************************************************************************************************************************* +``` diff --git a/xls/modules/zstd/memory/axi.x b/xls/modules/zstd/memory/axi.x index 09bfc194e2..d4b347f013 100644 --- a/xls/modules/zstd/memory/axi.x +++ b/xls/modules/zstd/memory/axi.x @@ -25,7 +25,18 @@ pub enum AxiAxSize : u3 { MAX_128B_TRANSFER = 7, } -pub enum AxiWriteResp : u3 { +pub const AXI_AXSIZE_ENCODING_TO_SIZE = u11[8]:[ + u11:8, + u11:16, + u11:32, + u11:64, + u11:128, + u11:256, + u11:512, + u11:1024, +]; + +pub enum AxiWriteResp: u3 { OKAY = 0, EXOKAY = 1, SLVERR = 2, @@ -95,12 +106,12 @@ pub struct AxiAw { pub struct AxiW { data: uN[DATA_W], strb: uN[STRB_W], - last: u1 + last: u1, } pub struct AxiB { resp: AxiWriteResp, - id: uN[ID_W] + id: uN[ID_W], } pub struct AxiAr { diff --git a/xls/modules/zstd/memory/axi_ram.x b/xls/modules/zstd/memory/axi_ram.x new file mode 100644 index 0000000000..9683e61fc6 --- /dev/null +++ b/xls/modules/zstd/memory/axi_ram.x @@ -0,0 +1,759 @@ +// Copyright 2023-2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import std; + +import xls.modules.zstd.math; +import xls.modules.zstd.memory.axi; +import xls.examples.ram; + +type AxiAr = axi::AxiAr; +type AxiR = axi::AxiR; + +type AxiReadResp = axi::AxiReadResp; +type AxiAxBurst = axi::AxiAxBurst; + +const AXI_AXSIZE_ENCODING_TO_SIZE = axi::AXI_AXSIZE_ENCODING_TO_SIZE; + +enum AxiRamReaderStatus: u1 { + IDLE = 0, + READ_BURST = 1, +} + +// FIXME: add default value for RAM_DATA_W_PLUS1_LOG2 = {std::clog2(AXI_DATA_W + u32:1)} (https://github.com/google/xls/issues/992) +struct AxiRamReaderSync { + do_recv_ram_resp: bool, + read_data_size: uN[RAM_DATA_W_PLUS1_LOG2], + read_data_offset: uN[RAM_DATA_W_PLUS1_LOG2], + send_data: bool, + resp: AxiReadResp, + id: uN[AXI_ID_W], + last: bool, +} + +struct AxiRamReaderRequesterState { + status: AxiRamReaderStatus, + ar_bundle: AxiAr, + read_data_size: u32, + addr: uN[AXI_ADDR_W], + ram_rd_req_idx: u8, +} + +// FIXME: add default value for AXI_DATA_W_PLUS1_LOG2 = {std::clog2(AXI_DATA_W + u32:1)} (https://github.com/google/xls/issues/992) +struct AxiRamReaderResponderState { + data: uN[AXI_DATA_W], + data_size: uN[AXI_DATA_W_PLUS1_LOG2], +} + +// Translates RAM requests to AXI read requests +proc AxiRamReaderRequester< + // AXI parameters + AXI_ADDR_W: u32, AXI_DATA_W: u32, AXI_DEST_W: u32, AXI_ID_W: u32, + + // RAM parameters + RAM_SIZE: u32, + BASE_ADDR: u32 = {u32:0}, + RAM_DATA_W: u32 = {AXI_DATA_W}, + RAM_ADDR_W: u32 = {AXI_ADDR_W}, + RAM_NUM_PARTITIONS: u32 = {AXI_DATA_W / u32:8 }, + + AXI_DATA_W_DIV8: u32 = { AXI_DATA_W / u32:8 }, + RAM_DATA_W_LOG2: u32 = { std::clog2(RAM_DATA_W) }, + AXI_DATA_W_LOG2: u32 = { std::clog2(AXI_DATA_W) }, + AXI_DATA_W_PLUS1_LOG2: u32 = { std::clog2(AXI_DATA_W + u32:1) }, + RAM_DATA_W_PLUS1_LOG2: u32 = { std::clog2(RAM_DATA_W + u32:1) }, +> { + type AxiAr = axi::AxiAr; + type ReadReq = ram::ReadReq; + + type State = AxiRamReaderRequesterState; + type Status = AxiRamReaderStatus; + type Sync = AxiRamReaderSync; + + axi_ar_r: chan in; + rd_req_s: chan out; + + sync_s: chan out; + + init { zero!() } + + config( + // AXI interface + axi_ar_r: chan in, + rd_req_s: chan out, + sync_s: chan out, + ) { + (axi_ar_r, rd_req_s, sync_s) + } + + next(state: State) { + const RAM_DATA_W_DIV8 = RAM_DATA_W >> u32:3; + + // receive AXI read request + let (tok, ar_bundle, ar_bundle_valid) = recv_if_non_blocking(join(), axi_ar_r, state.status == Status::IDLE, zero!()); + + // validate bundle + let ar_bundle_ok = ar_bundle_valid && ((ar_bundle.size as u32 + u32:3) <= AXI_DATA_W_LOG2); + if ar_bundle_valid { + trace_fmt!("{:#x}", ar_bundle); + } else {}; + let tok = send_if(tok, sync_s, ar_bundle_valid && !ar_bundle_ok, Sync { + id: ar_bundle.id, + resp: AxiReadResp::SLVERR, + last: true, + send_data: true, + ..zero!() + }); + + // send RAM read reqest + let addr_valid = state.addr < ((RAM_SIZE * RAM_DATA_W_DIV8) as uN[AXI_ADDR_W]); + let addr = (state.addr / RAM_DATA_W_DIV8) as uN[RAM_ADDR_W]; + + let do_read_from_ram = ( + (state.status == Status::READ_BURST) && + addr_valid && + (state.ram_rd_req_idx <= state.ar_bundle.len) + ); + let ram_read_req = ReadReq { + addr: addr, + mask: !uN[RAM_NUM_PARTITIONS]:0, + }; + let tok = send_if(join(), rd_req_s, do_read_from_ram, ram_read_req); + if do_read_from_ram { + trace_fmt!("Sent RAM read request {:#x}", ram_read_req); + } else {}; + + // send sync + let resp = if addr_valid { + AxiReadResp::OKAY + } else { + AxiReadResp::DECERR + }; + + // calculate read size and offset + let arsize_bits = AXI_AXSIZE_ENCODING_TO_SIZE[state.ar_bundle.size as u3] as uN[AXI_DATA_W_PLUS1_LOG2]; + + let (read_data_size, read_data_offset) = if (arsize_bits > RAM_DATA_W as uN[AXI_DATA_W_PLUS1_LOG2]) { + ( + RAM_DATA_W as uN[RAM_DATA_W_PLUS1_LOG2], + uN[RAM_DATA_W_PLUS1_LOG2]:0, + ) + } else { + ( + arsize_bits, + ((state.addr % RAM_DATA_W_DIV8) << u32:3) as uN[RAM_DATA_W_PLUS1_LOG2], + ) + }; + + let tok = send_if(tok, sync_s, state.status == Status::READ_BURST, Sync { + do_recv_ram_resp: do_read_from_ram, + read_data_size: read_data_size, + read_data_offset: read_data_offset, + send_data: read_data_size == arsize_bits, + resp: resp, + id: state.ar_bundle.id, + last: state.ram_rd_req_idx == state.ar_bundle.len, + }); + + // update state + match state.status { + Status::IDLE => { + if ar_bundle_ok { + State { + status: AxiRamReaderStatus::READ_BURST, + ar_bundle: ar_bundle, + addr: ar_bundle.addr, + ram_rd_req_idx: u8:0, + read_data_size: u32:0, + } + } else { state } + }, + Status::READ_BURST => { + if (state.ram_rd_req_idx == state.ar_bundle.len) { + State { + status: Status::IDLE, + ..state + } + } else { + let incr = math::logshiftl(uN[AXI_ADDR_W]:1, state.ar_bundle.size as uN[AXI_ADDR_W]); + let addr = match state.ar_bundle.burst { + AxiAxBurst::FIXED => state.addr, + AxiAxBurst::INCR => state.addr + incr, + AxiAxBurst::WRAP => if ((state.addr + incr) >= (RAM_SIZE * RAM_DATA_W_DIV8)) { + uN[AXI_ADDR_W]:0 + } else { + state.addr + incr + }, + _ => fail!("invalid_burst_mode", state.addr), + }; + State { + ram_rd_req_idx: state.ram_rd_req_idx + u8:1, + addr: addr, + ..state + } + } + }, + _ => state, + } + } +} + +// Should translate RAM responses to AXI read responses +proc AxiRamReaderResponder< + // AXI parameters + AXI_ADDR_W: u32, AXI_DATA_W: u32, AXI_DEST_W: u32, AXI_ID_W: u32, + + // RAM parameters + RAM_SIZE: u32, + BASE_ADDR: u32 = {u32:0}, + RAM_DATA_W: u32 = {AXI_DATA_W}, + RAM_ADDR_W: u32 = {AXI_ADDR_W}, + RAM_NUM_PARTITIONS: u32 = {AXI_DATA_W / u32:8 }, + + AXI_DATA_W_DIV8: u32 = { AXI_DATA_W / u32:8 }, + AXI_DATA_W_LOG2: u32 = { std::clog2(AXI_DATA_W) }, + RAM_DATA_W_LOG2: u32 = { std::clog2(RAM_DATA_W) }, + AXI_DATA_W_PLUS1_LOG2: u32 = { std::clog2(AXI_DATA_W + u32:1) }, + RAM_DATA_W_PLUS1_LOG2: u32 = { std::clog2(RAM_DATA_W + u32:1) }, +> { + type AxiR = axi::AxiR; + type ReadResp = ram::ReadResp; + + type State = AxiRamReaderResponderState; + type Sync = AxiRamReaderSync; + + rd_resp_r: chan in; + axi_r_s: chan out; + + sync_r: chan in; + + init { zero!() } + + config( + rd_resp_r: chan in, + axi_r_s: chan out, + sync_r: chan in, + ) { + (rd_resp_r, axi_r_s, sync_r) + } + + next(state: State) { + let tok = join(); + + // receive sync + let (tok, sync_data) = recv(tok, sync_r); + trace_fmt!("Received sync {:#x}", sync_data); + + // receive RAM read respose + let (tok, ram_read_resp) = recv_if(tok, rd_resp_r, sync_data.do_recv_ram_resp, zero!()); + if sync_data.do_recv_ram_resp { + trace_fmt!("Received RAM response {:#x}", ram_read_resp); + } else {}; + + let mask = math::logshiftl(uN[RAM_DATA_W]:1, sync_data.read_data_size as uN[RAM_DATA_W]) - uN[RAM_DATA_W]:1; + let mask = math::logshiftl(mask, state.data_size); + + let ram_data_shifted = if (sync_data.read_data_offset > state.data_size) { + math::logshiftr(ram_read_resp.data, sync_data.read_data_offset - state.data_size) as uN[AXI_DATA_W] & mask + } else { + math::logshiftl(ram_read_resp.data, state.data_size - sync_data.read_data_offset) as uN[AXI_DATA_W] & mask + }; + + // update state + let state = State { + data: ram_data_shifted, + data_size: state.data_size + sync_data.read_data_size, + }; + + // send AXI read response + let axi_r_bundle = AxiR { + id: sync_data.id, + data: state.data, + resp: sync_data.resp, + last: sync_data.last, + }; + let tok = send_if(tok, axi_r_s, sync_data.send_data, axi_r_bundle); + + if sync_data.send_data { + zero!() + } else { + state + } + } +} + +pub proc AxiRamReader< + // AXI parameters + AXI_ADDR_W: u32, + AXI_DATA_W: u32, + AXI_DEST_W: u32, + AXI_ID_W: u32, + + // RAM parameters + RAM_SIZE: u32, + BASE_ADDR: u32 = {u32:0}, + RAM_DATA_W: u32 = {AXI_DATA_W}, + RAM_ADDR_W: u32 = {AXI_ADDR_W}, + RAM_NUM_PARTITIONS: u32 = { AXI_DATA_W / u32:8 }, + + AXI_DATA_W_DIV8: u32 = { AXI_DATA_W / u32:8 }, + RAM_DATA_W_LOG2: u32 = { std::clog2(RAM_DATA_W) }, + RAM_DATA_W_PLUS1_LOG2: u32 = { std::clog2(RAM_DATA_W + u32:1) }, +> { + type AxiAr = axi::AxiAr; + type AxiR = axi::AxiR; + + type ReadReq = ram::ReadReq; + type ReadResp = ram::ReadResp; + + type Sync = AxiRamReaderSync; + + init { } + + config( + // AXI interface + axi_ar_r: chan in, + axi_r_s: chan out, + + // RAM interface + rd_req_s: chan out, + rd_resp_r: chan in, + ) { + let (sync_s, sync_r) = chan("sync"); + + spawn AxiRamReaderRequester< + AXI_ADDR_W, AXI_DATA_W, AXI_DEST_W, AXI_ID_W, + RAM_SIZE, BASE_ADDR, RAM_DATA_W, RAM_ADDR_W, RAM_NUM_PARTITIONS, + AXI_DATA_W_DIV8, + >(axi_ar_r, rd_req_s, sync_s); + spawn AxiRamReaderResponder< + AXI_ADDR_W, AXI_DATA_W, AXI_DEST_W, AXI_ID_W, + RAM_SIZE, BASE_ADDR, RAM_DATA_W, RAM_ADDR_W, RAM_NUM_PARTITIONS, + AXI_DATA_W_DIV8, + >(rd_resp_r, axi_r_s, sync_r); + } + + next(state: ()) { } +} + +const INST_AXI_ADDR_W = u32:32; +const INST_AXI_DATA_W = u32:32; +const INST_AXI_DEST_W = u32:8; +const INST_AXI_ID_W = u32:8; +const INST_AXI_DATA_W_DIV8 = INST_AXI_DATA_W / u32:8; + +const INST_RAM_SIZE = u32:100; +const INST_RAM_DATA_W = INST_AXI_DATA_W; +const INST_RAM_ADDR_W = std::clog2(INST_RAM_SIZE); +const INST_RAM_WORD_PARTITION_SIZE = u32:8; +const INST_RAM_NUM_PARTITIONS = INST_RAM_DATA_W / INST_RAM_WORD_PARTITION_SIZE; + +const INST_BASE_ADDR = u32:0x8000; + +proc AxiRamReaderInst< + FAKE_PARAM: u32 = {u32:0} // FIXME: remove after https://github.com/google/xls/issues/1415 is fixed +> { + type AxiAr = axi::AxiAr; + type AxiR = axi::AxiR; + type ReadReq = ram::ReadReq; + type ReadResp = ram::ReadResp; + + init { } + + config( + // AXI interface + axi_ar_r: chan in, + axi_r_s: chan out, + // RAM interface + rd_req_s: chan out, + rd_resp_r: chan in, + ) { + spawn AxiRamReader< + INST_AXI_ADDR_W, INST_AXI_DATA_W, INST_AXI_DEST_W, INST_AXI_ID_W, + INST_RAM_SIZE, INST_BASE_ADDR, INST_RAM_DATA_W, INST_RAM_ADDR_W, INST_RAM_NUM_PARTITIONS, + INST_AXI_DATA_W_DIV8 + > (axi_ar_r, axi_r_s, rd_req_s, rd_resp_r); + } + + next(state: ()) { } +} + +// only for RAM rewrite +proc AxiRamReaderInstWithEmptyWrites { + type AxiAr = axi::AxiAr; + type AxiR = axi::AxiR; + type ReadReq = ram::ReadReq; + type ReadResp = ram::ReadResp; + type WriteReq = ram::WriteReq; + type WriteResp = ram::WriteResp; + + wr_req_s: chan out; + wr_resp_r: chan in; + + init { } + + config( + // AXI interface + axi_ar_r: chan in, + axi_r_s: chan out, + // RAM interface + rd_req_s: chan out, + rd_resp_r: chan in, + wr_req_s: chan out, + wr_resp_r: chan in, + ) { + spawn AxiRamReader< + INST_AXI_ADDR_W, INST_AXI_DATA_W, INST_AXI_DEST_W, INST_AXI_ID_W, + INST_RAM_SIZE, INST_BASE_ADDR, INST_RAM_DATA_W, INST_RAM_ADDR_W, INST_RAM_NUM_PARTITIONS, + INST_AXI_DATA_W_DIV8 + > (axi_ar_r, axi_r_s, rd_req_s, rd_resp_r); + + ( + wr_req_s, wr_resp_r + ) + } + + next(state: ()) { + send_if(join(), wr_req_s, false, zero!()); + recv_if(join(), wr_resp_r, false, zero!()); + } +} + +const TEST_AXI_ADDR_W = u32:32; +const TEST_AXI_DATA_W = u32:32; +const TEST_AXI_DEST_W = u32:8; +const TEST_AXI_ID_W = u32:8; +const TEST_AXI_DATA_W_DIV8 = TEST_AXI_DATA_W / u32:8; + +const TEST_RAM_SIZE = u32:100; +const TEST_RAM_DATA_W = TEST_AXI_DATA_W; +const TEST_RAM_ADDR_W = std::clog2(TEST_RAM_SIZE); +const TEST_RAM_WORD_PARTITION_SIZE = u32:8; +const TEST_RAM_NUM_PARTITIONS = TEST_RAM_DATA_W / TEST_RAM_WORD_PARTITION_SIZE; +const TEST_RAM_SIZE_BYTES = TEST_RAM_SIZE * (TEST_RAM_DATA_W / u32:8); + +const TEST_BASE_ADDR = u32:0x8000; + +type TestAxiAr = axi::AxiAr; +type TestAxiR = axi::AxiR; + +type TestReadReq = ram::ReadReq; +type TestReadResp = ram::ReadResp; +type TestWriteReq = ram::WriteReq; +type TestWriteResp = ram::WriteResp; + +const ZERO_AXI_AR_BUNDLE = zero!(); + +type TestAxiId = uN[TEST_AXI_ID_W]; +type TestAxiAddr = uN[TEST_AXI_ADDR_W]; +type TestAxiRegion = uN[4]; +type TestAxiLen = uN[8]; +type TestAxiSize = axi::AxiAxSize; +type TestAxiBurst = axi::AxiAxBurst; +type TestAxiCache = axi::AxiArCache; +type TestAxiProt = uN[3]; +type TestAxiQos = uN[4]; + +const TEST_RAM_DATA = u32[TEST_RAM_SIZE]:[ + u32:0xD945_50A5, u32:0xA20C_D8D3, u32:0xB0BE_D046, u32:0xF83C_6D26, u32:0xFAE4_B0C4, + u32:0x9A78_91C4, u32:0xFDA0_9B1E, u32:0x5E66_D76D, u32:0xCB7D_76CB, u32:0x4033_5F2F, + u32:0x2128_9B0B, u32:0xD263_365F, u32:0xD989_DD81, u32:0xE4CB_45C9, u32:0x0425_06B6, + u32:0x5D31_107C, u32:0x2282_7A67, u32:0xCAC7_0C94, u32:0x23A9_5FD8, u32:0x6122_BBC3, + u32:0x1F99_F3D0, u32:0xA70C_FB34, u32:0x3812_5EF2, u32:0x9157_61BC, u32:0x171A_C1B1, + + u32:0xDE6F_1B08, u32:0x420D_F1AF, u32:0xAEE9_F51B, u32:0xB31E_E3A3, u32:0x66AC_09D6, + u32:0x18E9_9703, u32:0xEE87_1E7A, u32:0xB63D_47DE, u32:0x59BF_4F52, u32:0x94D8_5636, + u32:0x2B81_34EE, u32:0x6711_9968, u32:0xFB2B_F8CB, u32:0x173F_CB1B, u32:0xFB94_3A67, + u32:0xF40B_714F, u32:0x383B_82FE, u32:0xA692_055E, u32:0x58A6_2110, u32:0x0185_B5E0, + u32:0x9DF0_9C22, u32:0x54CA_DB57, u32:0xC626_097F, u32:0xEA04_3110, u32:0xF11C_4D36, + + u32:0xB8CC_FAB0, u32:0x7801_3B20, u32:0x8189_BF9C, u32:0xE380_A505, u32:0x4672_AE34, + u32:0x1CD5_1B3A, u32:0x5F95_EE9E, u32:0xBC5C_9931, u32:0xBCE6_50D2, u32:0xC10D_0544, + u32:0x5AB4_DEA1, u32:0x5E20_3394, u32:0x7FDA_0CA1, u32:0x6FEC_112E, u32:0x107A_2F81, + u32:0x86CA_4491, u32:0xEA68_0EB7, u32:0x50F1_AA22, u32:0x3F47_F2CA, u32:0xE407_92F7, + u32:0xF35C_EEE0, u32:0x1D6B_E819, u32:0x3FA7_05FA, u32:0x08BB_A499, u32:0x7C0C_4812, + + u32:0xF5A5_3D5C, u32:0x079A_BE16, u32:0xACA1_F84B, u32:0x4D2B_9402, u32:0x45B1_28FD, + u32:0x2C7C_CBA5, u32:0x6874_FC32, u32:0x95A0_8288, u32:0xFB13_E707, u32:0x61F9_2FEF, + u32:0xF6E3_DAFC, u32:0xDBA0_0A80, u32:0xBB84_831B, u32:0xAD63_2520, u32:0xEFB3_D817, + u32:0xD190_C435, u32:0x9064_1E4F, u32:0x0839_3D28, u32:0x1C07_874C, u32:0xBBEB_D633, + u32:0xB0A9_C751, u32:0x83B9_A340, u32:0x028A_FF8A, u32:0xB4ED_EE5C, u32:0xD700_BD9C, +]; + +const TEST_AXI_AR_BUNDLES = TestAxiAr[16]:[ + AxiAr { + id: TestAxiId:0, + addr: TestAxiAddr:40, + len: TestAxiLen:8, + size: TestAxiSize::MAX_4B_TRANSFER, + burst: TestAxiBurst::FIXED, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:0, + addr: TestAxiAddr:440, + len: TestAxiLen:8, + size: TestAxiSize::MAX_4B_TRANSFER, + burst: TestAxiBurst::FIXED, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:1, + addr: TestAxiAddr:32, + len: TestAxiLen:8, + size: TestAxiSize::MAX_4B_TRANSFER, + burst: TestAxiBurst::FIXED, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:2, + addr: TestAxiAddr:16, + len: TestAxiLen:8, + size: TestAxiSize::MAX_4B_TRANSFER, + burst: TestAxiBurst::INCR, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:3, + addr: TestAxiAddr:92, + len: TestAxiLen:4, + size: TestAxiSize::MAX_4B_TRANSFER, + burst: TestAxiBurst::INCR, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:4, + addr: TestAxiAddr:0, + len: TestAxiLen:2, + size: TestAxiSize::MAX_4B_TRANSFER, + burst: TestAxiBurst::INCR, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:5, + addr: TestAxiAddr:52, + len: TestAxiLen:20, + size: TestAxiSize::MAX_4B_TRANSFER, + burst: TestAxiBurst::INCR, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:6, + addr: TestAxiAddr:96, + len: TestAxiLen:10, + size: TestAxiSize::MAX_4B_TRANSFER, + burst: TestAxiBurst::INCR, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:7, + addr: TestAxiAddr:128, + len: TestAxiLen:16, + size: TestAxiSize::MAX_4B_TRANSFER, + burst: TestAxiBurst::WRAP, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:8, + addr: TestAxiAddr:256, + len: TestAxiLen:2, + size: TestAxiSize::MAX_4B_TRANSFER, + burst: TestAxiBurst::WRAP, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:9, + addr: TestAxiAddr:32, + len: TestAxiLen:4, + size: TestAxiSize::MAX_2B_TRANSFER, + burst: TestAxiBurst::FIXED, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:10, + addr: TestAxiAddr:80, + len: TestAxiLen:4, + size: TestAxiSize::MAX_1B_TRANSFER, + burst: TestAxiBurst::INCR, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:11, + addr: TestAxiAddr:256, + len: TestAxiLen:16, + size: TestAxiSize::MAX_2B_TRANSFER, + burst: TestAxiBurst::WRAP, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:12, + addr: TestAxiAddr:64, + len: TestAxiLen:2, + size: TestAxiSize::MAX_8B_TRANSFER, + burst: TestAxiBurst::FIXED, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:13, + addr: TestAxiAddr:192, + len: TestAxiLen:16, + size: TestAxiSize::MAX_64B_TRANSFER, + burst: TestAxiBurst::INCR, + ..ZERO_AXI_AR_BUNDLE + }, + AxiAr { + id: TestAxiId:14, + addr: TestAxiAddr:16, + len: TestAxiLen:16, + size: TestAxiSize::MAX_128B_TRANSFER, + burst: TestAxiBurst::INCR, + ..ZERO_AXI_AR_BUNDLE + }, +]; + +#[test_proc] +proc AxiRamReaderTest { + terminator: chan out; + + axi_ar_s: chan out; + axi_r_r: chan in; + + wr_req_s: chan out; + wr_resp_r: chan in; + + init {} + + config( + terminator: chan out, + ) { + let (rd_req_s, rd_req_r) = chan("rd_req"); + let (rd_resp_s, rd_resp_r) = chan("rd_resp"); + let (wr_req_s, wr_req_r) = chan("wr_req"); + let (wr_resp_s, wr_resp_r) = chan("wr_resp"); + + spawn ram::RamModel ( + rd_req_r, rd_resp_s, wr_req_r, wr_resp_s + ); + + let (axi_ar_s, axi_ar_r) = chan("axi_ar"); + let (axi_r_s, axi_r_r) = chan("axi_r"); + + spawn AxiRamReader< + TEST_AXI_ADDR_W, TEST_AXI_DATA_W, TEST_AXI_DEST_W, TEST_AXI_ID_W, + TEST_RAM_SIZE, TEST_BASE_ADDR, TEST_RAM_DATA_W, TEST_RAM_ADDR_W, TEST_RAM_NUM_PARTITIONS, + TEST_AXI_DATA_W_DIV8, + >(axi_ar_r, axi_r_s, rd_req_s, rd_resp_r); + + ( + terminator, + axi_ar_s, axi_r_r, wr_req_s, wr_resp_r, + ) + } + + next(state: ()) { + type RamAddr = bits[TEST_RAM_ADDR_W]; + type RamData = bits[TEST_RAM_DATA_W]; + type RamMask = bits[TEST_RAM_NUM_PARTITIONS]; + + let tok = join(); + + // write test RAM data + let tok = for ((i, data), tok): ((u32, u32), token) in enumerate(TEST_RAM_DATA) { + let tok = send(tok, wr_req_s, TestWriteReq { + addr: i as RamAddr, + data: data, + mask: !bits[TEST_RAM_NUM_PARTITIONS]:0, + }); + let (tok, _) = recv(tok, wr_resp_r); + + tok + }(tok); + + let tok = for ((i, axi_ar_bundle), tok): ((u32, TestAxiAr), token) in enumerate(TEST_AXI_AR_BUNDLES) { + let tok = send(tok, axi_ar_s, axi_ar_bundle); + trace_fmt!("Sent bundle #{} {:#x}", i + u32:1, axi_ar_bundle); + + let size_valid = (u32:1 << (axi_ar_bundle.size as u32 + u32:3)) <= TEST_AXI_DATA_W; + + let data_len = if size_valid { + axi_ar_bundle.len as u32 + } else { + u32:0 + }; + + for (j, tok): (u32, token) in range(u32:0, TEST_RAM_SIZE) { + if (j <= data_len) { + let (tok, data) = recv(tok, axi_r_r); + trace_fmt!("Received data #{} {:#x}", j, data); + // compute address + let araddr = match axi_ar_bundle.burst { + AxiAxBurst::FIXED => { + axi_ar_bundle.addr + }, + AxiAxBurst::INCR => { + axi_ar_bundle.addr + j * (u32:1 << (axi_ar_bundle.size as u32)) + }, + AxiAxBurst::WRAP => { + (axi_ar_bundle.addr + j * (u32:1 << (axi_ar_bundle.size as u32))) % (TEST_RAM_SIZE * (TEST_RAM_DATA_W / u32:8)) + }, + }; + // create expected data using RAM data + let (expected_data, addr_valid) = for (k, (expected_data, addr_valid)): (u32, (uN[TEST_AXI_DATA_W], bool)) in range(u32:0, TEST_AXI_DATA_W / u32:8) { + if k < (u32:1 << (axi_ar_bundle.size as u32)) { + let ram_addr = (araddr + k) / (TEST_RAM_DATA_W / u32:8); + let ram_offset = ((araddr + k) % (TEST_RAM_DATA_W / u32:8)) * u32:8; + if ram_addr < TEST_RAM_SIZE { + ( + expected_data | (((TEST_RAM_DATA[ram_addr] >> ram_offset) & u32:0xFF) << (u32:8 * k)), + addr_valid, + ) + } else { + ( + uN[TEST_AXI_DATA_W]:0, + false, + ) + } + } else { + ( + expected_data, + addr_valid + ) + } + }((uN[TEST_AXI_DATA_W]:0, true)); + + let expected_rresp = if !size_valid { + AxiReadResp::SLVERR + } else if addr_valid { + AxiReadResp::OKAY + } else { + AxiReadResp::DECERR + }; + + assert_eq(expected_rresp, data.resp); + assert_eq(j == data_len, data.last); + assert_eq(axi_ar_bundle.id, data.id); + if expected_rresp == AxiReadResp::OKAY { + // valid read + assert_eq(expected_data, data.data); + } else { }; + tok + } else { tok } + }(tok) + }(tok); + + send(tok, terminator, true); + } +} + + diff --git a/xls/modules/zstd/memory/axi_stream_remove_empty.x b/xls/modules/zstd/memory/axi_stream_remove_empty.x index a61ec479fc..40c8aa9208 100644 --- a/xls/modules/zstd/memory/axi_stream_remove_empty.x +++ b/xls/modules/zstd/memory/axi_stream_remove_empty.x @@ -29,34 +29,185 @@ struct AxiStreamRemoveEmptyState< dest: uN[DEST_W], } +pub struct ContinuousStream< + DATA_W: u32, + DEST_W: u32, + ID_W: u32, + DATA_W_LOG2: u32 = {std::clog2(DATA_W + u32:1)}, +> { + data: uN[DATA_W], + len: uN[DATA_W_LOG2], + id: uN[ID_W], + dest: uN[DEST_W], + last: u1 +} + +const INST_DATA_W = u32:32; +const INST_DATA_W_DIV8 = INST_DATA_W / u32:8; +const INST_DATA_W_LOG2 = std::clog2(INST_DATA_W + u32:1); +const INST_DEST_W = u32:32; +const INST_ID_W = u32:32; +const TEST_DATA_W = u32:32; +const TEST_DATA_W_DIV8 = TEST_DATA_W / u32:8; +const TEST_DATA_W_LOG2 = std::clog2(TEST_DATA_W + u32:1); +const TEST_DEST_W = u32:32; +const TEST_ID_W = u32:32; // Returns a tuple containing data and length, afer removing non-data // bytes from the in_data varaiable, using information from keep and str fields -fn remove_empty_bytes ( - in_data: uN[DATA_W], keep: uN[DATA_W_DIV8], str: uN[DATA_W_DIV8] -) -> (uN[DATA_W], uN[DATA_W_LOG2]) { - - const EXT_OFFSET_W = DATA_W_LOG2 + u32:3; - +pub proc RemoveEmptyBytes< + DATA_W: u32, DEST_W: u32, ID_W: u32, + DATA_W_DIV8: u32 = {DATA_W / u32:8}, + DATA_W_LOG2: u32 = {std::clog2(DATA_W + u32:1)}, + EXT_OFFSET_W: u32 = {(std::clog2(DATA_W + u32:1)) + u32:3}, +> { type Data = uN[DATA_W]; type Str = uN[DATA_W_DIV8]; - type Keep = uN[DATA_W_DIV8]; type Offset = uN[DATA_W_LOG2]; type OffsetExt = uN[EXT_OFFSET_W]; type Length = uN[DATA_W_LOG2]; - let (data, len, _) = for (i, (data, len, offset)): (u32, (Data, Length, Offset)) in range(u32:0, DATA_W_DIV8) { - if str[i +: u1] & keep[i +: u1] { - ( - data | (in_data & (Data:0xFF << (u32:8 * i))) >> (OffsetExt:8 * offset as OffsetExt), - len + Length:8, - offset, - ) - } else { - (data, len, offset + Offset:1) - } - }((Data:0, Length:0, Offset:0)); - (data, len) + type AxiStream = axi_st::AxiStream; + type StrobedStream = ContinuousStream; + + stream_r: chan in; + continuous_stream_s: chan out; + + config ( + stream_r: chan in, + continuous_stream_s: chan out, + ) { + (stream_r, continuous_stream_s) + } + + init { () } + + next (state: ()) { + let (tok, frame) = recv(join(), stream_r); + let (in_data, str) = (frame.data, frame.str); + + let (data, len, _) = unroll_for! (i, (data, len, offset)): (u32, (Data, Length, Offset)) in range(u32:0, DATA_W_DIV8) { + if str[i +: u1] { + ( + data | (in_data & (Data:0xFF << (u32:8 * i))) >> (OffsetExt:8 * offset as OffsetExt), + len + Length:8, + offset, + ) + } else { + (data, len, offset + Offset:1) + } + }((Data:0, Length:0, Offset:0)); + + let continuous_stream = StrobedStream { + data: data, + len: len, + id: frame.id, + dest: frame.dest, + last: frame.last, + }; + send(tok, continuous_stream_s, continuous_stream); + } +} + +pub proc RemoveEmptyBytesInst { + type AxiStream = axi_st::AxiStream; + type StrobedStream = ContinuousStream; + + config ( + stream_r: chan in, + continuous_stream_s: chan out, + ) { + spawn RemoveEmptyBytes( + stream_r, continuous_stream_s + ); + } + + init { () } + + next (state: ()) {} +} + +#[test_proc] +proc RemoveEmptyBytesTest { + type TestAxiStream = axi_st::AxiStream; + type TestStrobedStream = ContinuousStream; + terminator: chan out; + stream_s: chan out; + continuous_stream_r: chan in; + + config ( + terminator: chan out, + ) { + let (stream_s, stream_r) = chan("frame_data"); + let (continuous_stream_s, continuous_stream_r) = chan("bare_data"); + + spawn RemoveEmptyBytes( + stream_r, continuous_stream_s + ); + + (terminator, stream_s, continuous_stream_r) + } + + init { } + + next (state: ()) { + type Data = uN[TEST_DATA_W]; + type Str = uN[TEST_DATA_W_DIV8]; + type Id = uN[TEST_ID_W]; + type Dest = uN[TEST_DEST_W]; + type Length = uN[TEST_DATA_W_LOG2]; + + let tok = join(); + + let data = Data:0xDEADBEEF; + let input_data: TestAxiStream[16] = [ + TestAxiStream{data: data, str: Str:0b0000, keep: Str:0b0000, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b0001, keep: Str:0b0001, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b0010, keep: Str:0b0010, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b0011, keep: Str:0b0011, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b0100, keep: Str:0b0100, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b0101, keep: Str:0b0101, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b0110, keep: Str:0b0110, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b0111, keep: Str:0b0111, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b1000, keep: Str:0b1000, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b1001, keep: Str:0b1001, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b1010, keep: Str:0b1010, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b1011, keep: Str:0b1011, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b1100, keep: Str:0b1100, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b1101, keep: Str:0b1101, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b1110, keep: Str:0b1110, id: Id:0, dest: Dest:0, last: false}, + TestAxiStream{data: data, str: Str:0b1111, keep: Str:0b1111, id: Id:0, dest: Dest:0, last: true} + ]; + let expected_output: TestStrobedStream[16] = [ + TestStrobedStream{data: Data:0x00, len: Length:0, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xEF, len: Length:8, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xBE, len: Length:8, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xBEEF, len: Length:16, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xAD, len: Length:8, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xADEF, len: Length:16, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xADBE, len: Length:16, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xADBEEF, len: Length:24, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xDE, len: Length:8, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xDEEF, len: Length:16, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xDEBE, len: Length:16, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xDEBEEF, len: Length:24, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xDEAD, len: Length:16, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xDEADEF, len: Length:24, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xDEADBE, len: Length:24, id: Id:0, dest: Dest:0, last: false}, + TestStrobedStream{data: Data:0xDEADBEEF, len: Length:32, id: Id:0, dest: Dest:0, last: true} + ]; + + let tok = for (i, tok): (u32, token) in range(u32:0, u32:16) { + let tok = send(tok, stream_s, input_data[i]); + trace_fmt!("TestRemoveEmptyBytes: Sent #{} strobed packet: {:#x}", i + u32:1, input_data[i]); + let (tok, continuous_stream) = recv(tok, continuous_stream_r); + trace_fmt!("TestRemoveEmptyBytes: Received #{} continuous packet: {:#x}", i + u32:1, continuous_stream); + assert_eq(continuous_stream, expected_output[i]); + (tok) + } (tok); + + send(tok, terminator, true); + } } // Returns the number of bytes that should be soted in the state in case we @@ -71,22 +222,23 @@ fn get_overflow_len(len1: uN[LENGTH_W], len2: uN[LEN // Return the new mask for keep and str fields, calculated using new data length fn get_mask(len: uN[DATA_W_LOG2]) -> uN[DATA_W_DIV8] { - const MAX_LEN = DATA_W as uN[DATA_W_LOG2]; - const MASK = !uN[DATA_W_DIV8]:0; + let len_bytes = std::div_pow2(len, uN[DATA_W_LOG2]:8); + let mask = (uN[DATA_W_DIV8]:1 << len_bytes as uN[DATA_W_DIV8]) - uN[DATA_W_DIV8]:1; - let shift = std::div_pow2((MAX_LEN - len), uN[DATA_W_LOG2]:8); - MASK >> shift + mask } // A proc that removes empty bytes from the Axi Stream and provides aligned data // to other procs, allowing for a simpler implementation of the receiving side // of the design. -pub proc AxiStreamRemoveEmpty< +pub proc AxiStreamRemoveEmptyInternal< DATA_W: u32, DEST_W: u32, ID_W: u32, DATA_W_DIV8: u32 = {DATA_W / u32:8}, DATA_W_LOG2: u32 = {std::clog2(DATA_W + u32:1)}, > { type AxiStream = axi_st::AxiStream; + type StrobedStream = ContinuousStream; + type State = AxiStreamRemoveEmptyState; type Offset = uN[DATA_W_LOG2]; @@ -95,11 +247,11 @@ pub proc AxiStreamRemoveEmpty< type Str = uN[DATA_W_DIV8]; type Data = uN[DATA_W]; - stream_in_r: chan in; + stream_in_r: chan in; stream_out_s: chan out; config ( - stream_in_r: chan in, + stream_in_r: chan in, stream_out_s: chan out, ) { (stream_in_r, stream_out_s) @@ -112,17 +264,13 @@ pub proc AxiStreamRemoveEmpty< const MAX_MASK = !uN[DATA_W_DIV8]:0; let do_recv = !state.last; - let (tok, stream_in) = recv_if(join(), stream_in_r, !state.last, zero!()); - let (id, dest) = if !state.last { - (stream_in.id, stream_in.dest) + let (tok, stream_in) = recv_if(join(), stream_in_r, do_recv, zero!()); + let (id, dest, data, len) = if do_recv { + (stream_in.id, stream_in.dest, stream_in.data, stream_in.len) } else { - (state.id, state.dest) + (state.id, state.dest, Data:0, Length:0) }; - let (data, len) = remove_empty_bytes( - stream_in.data, stream_in.keep, stream_in.str - ); - let empty_input_bytes = MAX_LEN - len; let empty_state_bytes = MAX_LEN - state.len; @@ -130,14 +278,12 @@ pub proc AxiStreamRemoveEmpty< let exact_transfer = (empty_input_bytes == state.len); let combined_state_data = state.data | data << state.len; - let combined_input_data = data | state.data << len; - let overflow_len = get_overflow_len(state.len, len); let sum_len = state.len + len; - let sum_mask = get_mask(sum_len); let (next_state, do_send, data) = if !state.last & exceeds_transfer { // flush and store + let overflow_len = get_overflow_len(state.len, len); ( State { data: data >> empty_state_bytes, @@ -157,6 +303,7 @@ pub proc AxiStreamRemoveEmpty< ) } else if state.last | stream_in.last | exact_transfer { // flush only + let sum_mask = get_mask(sum_len); ( zero!(), true, @@ -172,7 +319,7 @@ pub proc AxiStreamRemoveEmpty< // store ( State { - data: combined_input_data, + data: combined_state_data, len: sum_len, ..state }, @@ -186,15 +333,55 @@ pub proc AxiStreamRemoveEmpty< } } +type InstAxiStream = axi_st::AxiStream; +type InstStrobedStream = ContinuousStream; -const INST_DATA_W = u32:32; -const INST_DEST_W = u32:32; -const INST_ID_W = u32:32; +proc AxiStreamRemoveEmptyInternalInst { + config ( + stream_in_r: chan in, + stream_out_s: chan out, + ) { + spawn AxiStreamRemoveEmptyInternal ( + stream_in_r, + stream_out_s + ); + } -const INST_DATA_W_DIV8 = INST_DATA_W / u32:8; -const INST_DATA_W_LOG2 = std::clog2(INST_DATA_W + u32:1); + init { } -type InstAxiStream = axi_st::AxiStream; + next (state:()) { } +} + +pub proc AxiStreamRemoveEmpty< + DATA_W: u32, DEST_W: u32, ID_W: u32, + DATA_W_DIV8: u32 = {DATA_W / u32:8}, + DATA_W_LOG2: u32 = {std::clog2(DATA_W + u32:1)}, +> { + type AxiStream = axi_st::AxiStream; + type StrobedStream = ContinuousStream; + + config ( + stream_in_r: chan in, + stream_out_s: chan out, + ) { + let (continuous_stream_s, continuous_stream_r) = chan("continuous_stream"); + + spawn RemoveEmptyBytes( + stream_in_r, + continuous_stream_s + ); + spawn AxiStreamRemoveEmptyInternal ( + continuous_stream_r, + stream_out_s + ); + + () + } + + init { () } + + next (state: ()) {} +} proc AxiStreamRemoveEmptyInst { config ( @@ -212,12 +399,6 @@ proc AxiStreamRemoveEmptyInst { next (state:()) { } } - -const TEST_DATA_W = u32:32; -const TEST_DEST_W = u32:32; -const TEST_ID_W = u32:32; -const TEST_DATA_W_DIV8 = TEST_DATA_W / u32:8; - type TestAxiStream = axi_st::AxiStream; #[test_proc] @@ -405,6 +586,344 @@ proc AxiStreamRemoveEmptyTest { dest: Dest:0, }); + // Test 6: Some bits set, last set in the last transfer. + + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x0000_00B9, + str: Str:0b0001, + keep: Keep:0b0001, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x0000_007F, + str: Str:0b0001, + keep: Keep:0b0001, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x0000_0069, + str: Str:0b0001, + keep: Keep:0b0001, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x00DF_5EF7, + str: Str:0b0111, + keep: Keep:0b0111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x0000_C735, + str: Str:0b0011, + keep: Keep:0b0011, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0xF769_7FB9, + str: Str:0xF, + keep: Keep:0xF, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0xC735_DF5E, + str: Str:0xF, + keep: Keep:0xF, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + + // Test 7: Some bits set, last set in the last transfer. + + + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0xf7697fb9, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0xc735df5e, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x70d3da1f, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x0000001d, + str: Str:0b0001, + keep: Keep:0b0001, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x01eaf614, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x00001734, + str: Str:0b0011, + keep: Keep:0b0011, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0xe935b870, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x00f149f5, + str: Str:0b0111, + keep: Keep:0b0111, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0xf073eed1, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0xce97b5bd, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x950cddd9, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x08f0ebd4, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0xABEB9592, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0xB16E2D5C, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x157CF9C6, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let tok = send(tok, stream_in_s, TestAxiStream { + data: Data:0x00000019, + str: Str:0b0001, + keep: Keep:0b0001, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0xf7697fb9, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0xc735df5e, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0x70d3da1f, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0x0000001d, + str: Str:0b0001, + keep: Keep:0b0001, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0x01eaf614, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0x00001734, + str: Str:0b0011, + keep: Keep:0b0011, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0xe935b870, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0x00f149f5, + str: Str:0b0111, + keep: Keep:0b0111, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0xf073eed1, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0xce97b5bd, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0x950cddd9, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0x08f0ebd4, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0xABEB9592, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0xB16E2D5C, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0x157CF9C6, + str: Str:0b1111, + keep: Keep:0b1111, + last: u1:0, + id: Id:0, + dest: Dest:0, + }); + let (tok, stream_out) = recv(tok, stream_out_r); + assert_eq(stream_out, TestAxiStream { + data: Data:0x00000019, + str: Str:0b0001, + keep: Keep:0b0001, + last: u1:1, + id: Id:0, + dest: Dest:0, + }); + send(tok, terminator, true); } } diff --git a/xls/modules/zstd/memory/axi_writer.x b/xls/modules/zstd/memory/axi_writer.x index 2f62307731..21dd09baf4 100644 --- a/xls/modules/zstd/memory/axi_writer.x +++ b/xls/modules/zstd/memory/axi_writer.x @@ -124,6 +124,7 @@ pub proc AxiWriter< next(state: State) { const BYTES_IN_TRANSFER = DATA_W_DIV8 as Addr; const MAX_AXI_BURST_BYTES = Addr:256 * BYTES_IN_TRANSFER; + const MAX_LANE = std::unsigned_max_value(); let tok_0 = join(); @@ -280,7 +281,7 @@ pub proc AxiWriter< Fsm::AXI_WRITE_W => { let last = state.burst_counter == state.burst_end; let low_lane = state.req_low_lane; - let high_lane = if (last) { state.req_high_lane } else {Lane:3}; + let high_lane = if (last) { state.req_high_lane } else {MAX_LANE}; let mask = common::lane_mask(low_lane, high_lane); AxiW { diff --git a/xls/modules/zstd/memory/axi_writer_cocotb_test.py b/xls/modules/zstd/memory/axi_writer_cocotb_test.py new file mode 100644 index 0000000000..b30876a687 --- /dev/null +++ b/xls/modules/zstd/memory/axi_writer_cocotb_test.py @@ -0,0 +1,245 @@ +#!/usr/bin/env python +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import random +import logging +from pathlib import Path + +import cocotb +from cocotb.clock import Clock +from cocotb.triggers import ClockCycles, Event +from cocotb.binary import BinaryValue +from cocotb_bus.scoreboard import Scoreboard + +from cocotbext.axi.axis import AxiStreamSource, AxiStreamBus, AxiStreamFrame +from cocotbext.axi.axi_channels import AxiAWBus, AxiWBus, AxiBBus, AxiWriteBus, AxiAWMonitor, AxiWMonitor, AxiBMonitor, AxiBTransaction, AxiBSource, AxiBSink +from cocotbext.axi.axi_ram import AxiRamWrite +from cocotbext.axi.sparse_memory import SparseMemory + +from xls.modules.zstd.cocotb.channel import ( + XLSChannel, + XLSChannelDriver, + XLSChannelMonitor, +) +from xls.modules.zstd.cocotb.utils import reset, run_test +from xls.modules.zstd.cocotb.xlsstruct import XLSStruct, xls_dataclass + +ID_WIDTH = 4 +ADDR_WIDTH = 16 + +# Override default widths of AXI response signals +signal_widths = {"bresp": 3} +AxiBBus._signal_widths = signal_widths +AxiBTransaction._signal_widths = signal_widths +AxiBSource._signal_widths = signal_widths +AxiBSink._signal_widths = signal_widths +AxiBMonitor._signal_widths = signal_widths + +@xls_dataclass +class AxiWriterRespStruct(XLSStruct): + status: 1 + +@xls_dataclass +class WriteRequestStruct(XLSStruct): + address: ADDR_WIDTH + length: ADDR_WIDTH + +def set_termination_event(monitor, event, transactions): + def terminate_cb(_): + if monitor.stats.received_transactions == transactions: + event.set() + monitor.add_callback(terminate_cb) + +@cocotb.test(timeout_time=20000, timeout_unit="ms") +async def ram_test(dut): + GENERIC_ADDR_REQ_CHANNEL = "write_req" + GENERIC_ADDR_RESP_CHANNEL = "write_resp" + AXI_STREAM_CHANNEL = "axi_st_read" + AXI_AW_CHANNEL = "axi_aw" + AXI_W_CHANNEL = "axi_w" + AXI_B_CHANNEL = "axi_b" + + terminate = Event() + + mem_size = 2**ADDR_WIDTH + test_count = 200 + + (addr_req_input, axi_st_input, addr_resp_expect, memory_verification, expected_memory) = generate_test_data_random(test_count, mem_size) + + dut.rst.setimmediatevalue(0) + + clock = Clock(dut.clk, 10, units="us") + cocotb.start_soon(clock.start()) + + resp_bus = XLSChannel(dut, GENERIC_ADDR_RESP_CHANNEL, dut.clk, start_now=True) + + driver_addr_req = XLSChannelDriver(dut, GENERIC_ADDR_REQ_CHANNEL, dut.clk) + driver_axi_st = AxiStreamSource(AxiStreamBus.from_prefix(dut, AXI_STREAM_CHANNEL), dut.clk, dut.rst) + + bus_axi_aw = AxiAWBus.from_prefix(dut, AXI_AW_CHANNEL) + bus_axi_w = AxiWBus.from_prefix(dut, AXI_W_CHANNEL) + bus_axi_b = AxiBBus.from_prefix(dut, AXI_B_CHANNEL) + bus_axi_write = AxiWriteBus(bus_axi_aw, bus_axi_w, bus_axi_b) + + monitor_addr_req = XLSChannelMonitor(dut, GENERIC_ADDR_REQ_CHANNEL, dut.clk, WriteRequestStruct) + monitor_addr_resp = XLSChannelMonitor(dut, GENERIC_ADDR_RESP_CHANNEL, dut.clk, AxiWriterRespStruct) + monitor_axi_aw = AxiAWMonitor(bus_axi_aw, dut.clk, dut.rst) + monitor_axi_w = AxiWMonitor(bus_axi_w, dut.clk, dut.rst) + monitor_axi_b = AxiBMonitor(bus_axi_b, dut.clk, dut.rst) + + set_termination_event(monitor_addr_resp, terminate, test_count) + + memory = AxiRamWrite(bus_axi_write, dut.clk, dut.rst, size=mem_size) + + log = logging.getLogger("cocotb.tb") + log.setLevel(logging.WARNING) + memory.log.setLevel(logging.WARNING) + driver_axi_st.log.setLevel(logging.WARNING) + + scoreboard = Scoreboard(dut) + scoreboard.add_interface(monitor_addr_resp, addr_resp_expect) + + await reset(dut.clk, dut.rst, cycles=10) + await cocotb.start(driver_addr_req.send(addr_req_input)) + await cocotb.start(drive_axi_st(driver_axi_st, axi_st_input)) + await terminate.wait() + + for bundle in memory_verification: + memory_contents = bytearray(memory.read(bundle["base_address"], bundle["length"])) + expected_memory_contents = bytearray(expected_memory.read(bundle["base_address"], bundle["length"])) + assert memory_contents == expected_memory_contents, "{} bytes of memory contents at base address {}:\n{}\nvs\n{}\nHEXDUMP:\n{}\nvs\n{}".format(hex(bundle["length"]), hex(bundle["base_address"]), memory_contents, expected_memory_contents, memory.hexdump(bundle["base_address"], bundle["length"]), expected_memory.hexdump(bundle["base_address"], bundle["length"])) + +@cocotb.coroutine +async def drive_axi_st(driver, inputs): + for axi_st_input in inputs: + await driver.send(axi_st_input) + +def generate_test_data_random(test_count, mem_size): + AXI_AXSIZE_ENCODING_MAX_4B_TRANSFER = 2 # Must be in sync with AXI_AXSIZE_ENCODING enum in axi.x + + addr_req_input = [] + axi_st_input = [] + addr_resp_expect = [] + memory_verification = [] + memory = SparseMemory(mem_size) + + random.seed(1234) + + for i in range(test_count): + xfer_addr = random.randrange(0, mem_size) + # Don't allow unaligned writes + xfer_addr_aligned = (xfer_addr // 4) * 4 + # Make sure we don't write beyond available memory + memory_size_max_xfer_len = mem_size - xfer_addr_aligned + arbitrary_max_xfer_len = 0x5000 # 20kB + xfer_max_len = min(arbitrary_max_xfer_len, memory_size_max_xfer_len) + xfer_len = random.randrange(1, xfer_max_len) + transfer_req = WriteRequestStruct( + address = xfer_addr_aligned, + length = xfer_len, + ) + addr_req_input.append(transfer_req) + + data_to_write = random.randbytes(xfer_len) + axi_st_frame = AxiStreamFrame(tdata=data_to_write, tkeep=[15]*xfer_len, tid=(i % (1 << ID_WIDTH)), tdest=(i % (1 << ID_WIDTH))) + axi_st_input.append(axi_st_frame) + + write_expected_memory(transfer_req, axi_st_frame.tdata, memory) + + memory_bundle = { + "base_address": transfer_req.address, + "length": transfer_req.length, + } + memory_verification.append(memory_bundle) + + addr_resp_expect = [AxiWriterRespStruct(status=False)] * test_count + + return (addr_req_input, axi_st_input, addr_resp_expect, memory_verification, memory) + +def bytes_to_4k_boundary(addr): + AXI_4K_BOUNDARY = 0x1000 + return AXI_4K_BOUNDARY - (addr % AXI_4K_BOUNDARY) + +def write_expected_memory(transfer_req, data_to_write, memory): + """ + Write test data to reference memory keeping the AXI 4kb boundary + by spliting the write requests into smaller ones. + """ + prev_id = 0 + address = transfer_req.address + length = transfer_req.length + + BYTES_IN_TRANSFER = 4 + MAX_AXI_BURST_BYTES = 256 * BYTES_IN_TRANSFER + + while (length > 0): + bytes_to_4k = bytes_to_4k_boundary(address) + new_len = min(length, min(bytes_to_4k, MAX_AXI_BURST_BYTES)) + new_data = data_to_write[prev_id:prev_id+new_len] + memory.write(address, new_data) + address = address + new_len + length = length - new_len + prev_id = prev_id + new_len + +def generate_test_data_arbitrary(mem_size): + AXI_AXSIZE_ENCODING_MAX_4B_TRANSFER = 2 # Must be in sync with AXI_AXSIZE_ENCODING enum in axi.x + + addr_req_input = [] + axi_st_input = [] + addr_resp_expect = [] + memory_verification = [] + memory = SparseMemory(mem_size) + + xfer_addr_begin = [0, 8, 512, 1000, 0x1234, 256] + xfer_len = [1, 2, 4, 8, 0x48d, 4] + assert len(xfer_len) == len(xfer_addr_begin) + testcase_num = len(xfer_addr_begin) # test cases to execute + for i in range(testcase_num): + transfer_req = WriteRequestStruct( + address = xfer_addr_begin[i], + length = xfer_len[i] * 4, # xfer_len[i] transfers per 4 bytes + ) + addr_req_input.append(transfer_req) + + data_chunks = [] + data_bytes = [[(0xEF + j) & 0xFF, 0xBE, 0xAD, 0xDE] for j in range(xfer_len[i])] + data_words = [int.from_bytes(data_bytes[j]) for j in range(xfer_len[i])] + for j in range(xfer_len[i]): + data_chunks += data_bytes[j] + data_to_write = bytearray(data_chunks) + axi_st_frame = AxiStreamFrame(tdata=data_to_write, tkeep=[15]*xfer_len[i], tid=i, tdest=i) + axi_st_input.append(axi_st_frame) + + write_expected_memory(transfer_req, axi_st_frame.tdata, memory) + + memory_bundle = { + "base_address": transfer_req.address, + "length": transfer_req.length, # 4 byte words + } + memory_verification.append(memory_bundle) + + addr_resp_expect = [AxiWriterRespStruct(status=False)] * testcase_num + + return (addr_req_input, axi_st_input, addr_resp_expect, memory_verification, memory) + +if __name__ == "__main__": + toplevel = "axi_writer_wrapper" + verilog_sources = [ + "xls/modules/zstd/memory/axi_writer.v", + "xls/modules/zstd/memory/axi_writer_wrapper.v", + ] + test_module=[Path(__file__).stem] + run_test(toplevel, test_module, verilog_sources) diff --git a/xls/modules/zstd/memory/axi_writer_wrapper.v b/xls/modules/zstd/memory/axi_writer_wrapper.v new file mode 100644 index 0000000000..556f839284 --- /dev/null +++ b/xls/modules/zstd/memory/axi_writer_wrapper.v @@ -0,0 +1,119 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`default_nettype none + +module axi_writer_wrapper ( + input wire clk, + input wire rst, + + output wire write_resp_data, + output wire write_resp_vld, + input wire write_resp_rdy, + + input wire [31:0] write_req_data, + input wire write_req_vld, + output wire write_req_rdy, + + input wire [31:0] axi_st_read_tdata, + input wire [3:0] axi_st_read_tstr, + input wire [3:0] axi_st_read_tkeep, + input wire [0:0] axi_st_read_tlast, + input wire [3:0] axi_st_read_tid, + input wire [3:0] axi_st_read_tdest, + input wire axi_st_read_tvalid, + output wire axi_st_read_tready, + + output wire [3:0] axi_aw_awid, + output wire [15:0] axi_aw_awaddr, + output wire [2:0] axi_aw_awsize, + output wire [7:0] axi_aw_awlen, + output wire [1:0] axi_aw_awburst, + output wire axi_aw_awvalid, + input wire axi_aw_awready, + + output wire [31:0] axi_w_wdata, + output wire [3:0] axi_w_wstrb, + output wire [0:0] axi_w_wlast, + output wire axi_w_wvalid, + input wire axi_w_wready, + + input wire [2:0] axi_b_bresp, + input wire [3:0] axi_b_bid, + input wire axi_b_bvalid, + output wire axi_b_bready + +); + + wire [32:0] axi_writer__ch_axi_aw_data; + wire [36:0] axi_writer__ch_axi_w_data; + wire [ 6:0] axi_writer__ch_axi_b_data; + + wire [15:0] write_req_data_address; + wire [15:0] write_req_data_length; + + wire [48:0] axi_st_read_data; + + assign {write_req_data_address, write_req_data_length} = write_req_data; + + assign { axi_aw_awid, + axi_aw_awaddr, + axi_aw_awsize, + axi_aw_awlen, + axi_aw_awburst } = axi_writer__ch_axi_aw_data; + + assign {axi_w_wdata, axi_w_wstrb, axi_w_wlast} = axi_writer__ch_axi_w_data; + + assign axi_writer__ch_axi_b_data = {axi_b_bresp, axi_b_bid}; + + assign axi_st_read_data = { + axi_st_read_tdata, + axi_st_read_tstr, + axi_st_read_tkeep, + axi_st_read_tlast, + axi_st_read_tid, + axi_st_read_tdest + }; + + axi_writer axi_writer ( + .clk(clk), + .rst(rst), + + .axi_writer__ch_write_req_data(write_req_data), + .axi_writer__ch_write_req_rdy (write_req_rdy), + .axi_writer__ch_write_req_vld (write_req_vld), + + .axi_writer__ch_write_resp_rdy (write_resp_rdy), + .axi_writer__ch_write_resp_vld (write_resp_vld), + .axi_writer__ch_write_resp_data(write_resp_data), + + .axi_writer__ch_axi_aw_data(axi_writer__ch_axi_aw_data), + .axi_writer__ch_axi_aw_rdy (axi_aw_awready), + .axi_writer__ch_axi_aw_vld (axi_aw_awvalid), + + .axi_writer__ch_axi_w_data(axi_writer__ch_axi_w_data), + .axi_writer__ch_axi_w_rdy (axi_w_wready), + .axi_writer__ch_axi_w_vld (axi_w_wvalid), + + .axi_writer__ch_axi_b_data(axi_writer__ch_axi_b_data), + .axi_writer__ch_axi_b_rdy (axi_b_bready), + .axi_writer__ch_axi_b_vld (axi_b_bvalid), + + .axi_writer__ch_axi_st_read_data(axi_st_read_data), + .axi_writer__ch_axi_st_read_rdy (axi_st_read_tready), + .axi_writer__ch_axi_st_read_vld (axi_st_read_tvalid) + ); + + +endmodule : axi_writer_wrapper diff --git a/xls/modules/zstd/memory/mem_reader.x b/xls/modules/zstd/memory/mem_reader.x index ea96264728..7360e2b03b 100644 --- a/xls/modules/zstd/memory/mem_reader.x +++ b/xls/modules/zstd/memory/mem_reader.x @@ -583,7 +583,8 @@ proc MemReaderTest { let tok = send(tok, axi_r_s, AxiR { id: AxiId:0x0, - data: AxiData:0x1122_3344_5566_7788_9900_AABB_CCDD_EEFF, + data: AxiData:0x1122_3344_5566_7788_9900_AABB_CCDD_EE55, + // Addresses: ^ 0xFFF ^ 0xFF0 resp: AxiResp::OKAY, last: AxiLast:true }); @@ -603,7 +604,8 @@ proc MemReaderTest { let tok = send(tok, axi_r_s, AxiR { id: AxiId:0x0, - data: AxiData:0x1122_3344_5566_7788_9900_AABB_CCDD_EEFF, + data: AxiData:0x5522_3344_5566_7788_9900_AABB_CCDD_EEFF, + // Addresses: ^ 0x100F ^ 0x1000 resp: AxiResp::OKAY, last: AxiLast:true }); @@ -611,7 +613,8 @@ proc MemReaderTest { let (tok, resp) = recv(tok, resp_r); assert_eq(resp, Resp { status: Status::OKAY, - data: Data:0x11FF, + data: Data:0xFF11, + // 0x1000 ^ ^ 0x0FFF length: Length:2, last: true }); diff --git a/xls/modules/zstd/memory/mem_reader_cocotb_test.py b/xls/modules/zstd/memory/mem_reader_cocotb_test.py new file mode 100644 index 0000000000..65f683c0b3 --- /dev/null +++ b/xls/modules/zstd/memory/mem_reader_cocotb_test.py @@ -0,0 +1,272 @@ +#!/usr/bin/env python +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import random +import sys +import warnings +from pathlib import Path + +import cocotb +from cocotb.clock import Clock +from cocotb.triggers import ClockCycles, Event +from cocotb_bus.scoreboard import Scoreboard +from cocotbext.axi.axi_channels import AxiARBus, AxiRBus, AxiReadBus, AxiRTransaction, AxiRSource, AxiRSink, AxiRMonitor +from cocotbext.axi.axi_ram import AxiRamRead +from cocotbext.axi.sparse_memory import SparseMemory + +from xls.modules.zstd.cocotb.channel import ( + XLSChannel, + XLSChannelDriver, + XLSChannelMonitor, +) +from xls.modules.zstd.cocotb.utils import reset, run_test +from xls.modules.zstd.cocotb.xlsstruct import XLSStruct, xls_dataclass + +# to disable warnings from hexdiff used by cocotb's Scoreboard +warnings.filterwarnings("ignore", category=DeprecationWarning) + +DSLX_DATA_W = 64 +DSLX_ADDR_W = 16 + +AXI_DATA_W = 128 +AXI_ADDR_W = 16 + +LAST_W = 1 +STATUS_W = 1 +ERROR_W = 1 +ID_W = 4 +DEST_W = 4 + +# AXI +AXI_AR_PREFIX = "axi_ar" +AXI_R_PREFIX = "axi_r" + +# MemReader +MEM_READER_REQ_CHANNEL = "req" +MEM_READER_RESP_CHANNEL = "resp" + +# Override default widths of AXI response signals +signal_widths = {"rresp": 3, "rlast": 1} +AxiRBus._signal_widths = signal_widths +AxiRTransaction._signal_widths = signal_widths +AxiRSource._signal_widths = signal_widths +AxiRSink._signal_widths = signal_widths +AxiRMonitor._signal_widths = signal_widths + +@xls_dataclass +class MemReaderReq(XLSStruct): + addr: DSLX_ADDR_W + length: DSLX_ADDR_W + + +@xls_dataclass +class MemReaderResp(XLSStruct): + status: STATUS_W + data: DSLX_DATA_W + length: DSLX_ADDR_W + last: LAST_W + + +@xls_dataclass +class AxiReaderReq(XLSStruct): + addr: AXI_ADDR_W + len: AXI_ADDR_W + + +@xls_dataclass +class AxiStream(XLSStruct): + data: AXI_DATA_W + str: AXI_DATA_W // 8 + keep: AXI_DATA_W // 8 = 0 + last: LAST_W = 0 + id: ID_W = 0 + dest: DEST_W = 0 + + +@xls_dataclass +class AxiReaderError(XLSStruct): + error: ERROR_W + + +@xls_dataclass +class AxiAr(XLSStruct): + id: ID_W + addr: AXI_ADDR_W + region: 4 + len: 8 + size: 3 + burst: 2 + cache: 4 + prot: 3 + qos: 4 + + +@xls_dataclass +class AxiR(XLSStruct): + id: ID_W + data: AXI_DATA_W + resp: 3 + last: 1 + + +def print_callback(name: str = "monitor"): + def _print_callback(transaction): + print(f" [{name}]: {transaction}") + + return _print_callback + + +def set_termination_event(monitor, event, transactions): + def terminate_cb(_): + if monitor.stats.received_transactions == transactions: + print("all transactions received") + event.set() + + monitor.add_callback(terminate_cb) + + +def generate_test_data(test_cases, xfer_base=0x0, seed=1234): + random.seed(seed) + mem_size = 2**AXI_ADDR_W + data_w_div8 = DSLX_DATA_W // 8 + + assert xfer_base < mem_size, "Base address outside the memory span" + + req = [] + resp = [] + mem_writes = {} + + for xfer_offset, xfer_length in test_cases: + xfer_addr = xfer_base + xfer_offset + xfer_max_addr = xfer_addr + xfer_length + + if xfer_length == 0: + req += [MemReaderReq(addr=xfer_addr, length=0)] + resp += [MemReaderResp(status=0, data=0, length=0, last=1)] + + assert xfer_max_addr < mem_size, "Max address outside the memory span" + req += [MemReaderReq(addr=xfer_addr, length=xfer_length)] + + rem = xfer_length % data_w_div8 + for addr in range(xfer_addr, xfer_max_addr - (data_w_div8 - 1), data_w_div8): + last = ((addr + data_w_div8) >= xfer_max_addr) & (rem == 0) + data = random.randint(0, 1 << (data_w_div8 * 8)) + mem_writes.update({addr: data}) + resp += [MemReaderResp(status=0, data=data, length=data_w_div8, last=last)] + + if rem > 0: + addr = xfer_max_addr - rem + mask = (1 << (rem * 8)) - 1 + data = random.randint(0, 1 << (data_w_div8 * 8)) + mem_writes.update({addr: data}) + resp += [MemReaderResp(status=0, data=data & mask, length=rem, last=1)] + + return (req, resp, mem_writes) + + +async def test_mem_reader(dut, req_input, resp_output, mem_contents={}): + clock = Clock(dut.clk, 10, units="us") + cocotb.start_soon(clock.start()) + + mem_reader_resp_bus = XLSChannel( + dut, MEM_READER_RESP_CHANNEL, dut.clk, start_now=True + ) + mem_reader_req_driver = XLSChannelDriver(dut, MEM_READER_REQ_CHANNEL, dut.clk) + mem_reader_resp_monitor = XLSChannelMonitor( + dut, MEM_READER_RESP_CHANNEL, dut.clk, MemReaderResp, callback=print_callback() + ) + + terminate = Event() + set_termination_event(mem_reader_resp_monitor, terminate, len(resp_output)) + + scoreboard = Scoreboard(dut) + scoreboard.add_interface(mem_reader_resp_monitor, resp_output) + + ar_bus = AxiARBus.from_prefix(dut, AXI_AR_PREFIX) + r_bus = AxiRBus.from_prefix(dut, AXI_R_PREFIX) + axi_read_bus = AxiReadBus(ar=ar_bus, r=r_bus) + + mem_size = 2**AXI_ADDR_W + sparse_mem = SparseMemory(mem_size) + for addr, data in mem_contents.items(): + sparse_mem.write(addr, (data).to_bytes(8, "little")) + + memory = AxiRamRead(axi_read_bus, dut.clk, dut.rst, size=mem_size, mem=sparse_mem) + + await reset(dut.clk, dut.rst, cycles=10) + await mem_reader_req_driver.send(req_input) + await terminate.wait() + + +@cocotb.test(timeout_time=500, timeout_unit="ms") +async def mem_reader_zero_length_req(dut): + req, resp, _ = generate_test_data( + xfer_base=0xFFF, test_cases=[(0x101, 0)] + ) + await test_mem_reader(dut, req, resp) + + +@cocotb.test(timeout_time=500, timeout_unit="ms") +async def mem_reader_aligned_transfer_shorter_than_bus(dut): + req, resp, mem_contents = generate_test_data( + xfer_base=0xFFF, test_cases=[(0x101, 1)] + ) + await test_mem_reader(dut, req, resp, mem_contents) + + +@cocotb.test(timeout_time=500, timeout_unit="ms") +async def mem_reader_aligned_transfer_shorter_than_bus1(dut): + req, resp, mem_contents = generate_test_data( + xfer_base=0xFFF, test_cases=[(0x2, 1)] + ) + await test_mem_reader(dut, req, resp, mem_contents) + + +@cocotb.test(timeout_time=500, timeout_unit="ms") +async def mem_reader_aligned_transfer_shorter_than_bus2(dut): + req, resp, mem_contents = generate_test_data( + xfer_base=0xFFF, test_cases=[(0x2, 17)] + ) + await test_mem_reader(dut, req, resp, mem_contents) + + +@cocotb.test(timeout_time=500, timeout_unit="ms") +async def mem_reader_aligned_transfer_shorter_than_bus3(dut): + req, resp, mem_contents = generate_test_data( + xfer_base=0xFFF, test_cases=[(0x0, 0x1000)] + ) + await test_mem_reader(dut, req, resp, mem_contents) + + +@cocotb.test(timeout_time=500, timeout_unit="ms") +async def mem_reader_aligned_transfer_shorter_than_bus4(dut): + req, resp, mem_contents = generate_test_data( + xfer_base=0x1, test_cases=[(0x0, 0xFFF), (0x1000, 0x1)] + ) + await test_mem_reader(dut, req, resp, mem_contents) + + +if __name__ == "__main__": + sys.path.append(str(Path(__file__).parent)) + + toplevel = "mem_reader_wrapper" + verilog_sources = [ + "xls/modules/zstd/xls_fifo_wrapper.v", + "xls/modules/zstd/memory/mem_reader_adv.v", + "xls/modules/zstd/memory/mem_reader_wrapper.v", + ] + test_module = [Path(__file__).stem] + run_test(toplevel, test_module, verilog_sources) diff --git a/xls/modules/zstd/memory/mem_reader_wrapper.v b/xls/modules/zstd/memory/mem_reader_wrapper.v new file mode 100644 index 0000000000..3601bcbb0e --- /dev/null +++ b/xls/modules/zstd/memory/mem_reader_wrapper.v @@ -0,0 +1,111 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`default_nettype none + +module mem_reader_wrapper #( + parameter DSLX_DATA_W = 64, + parameter DSLX_ADDR_W = 16, + parameter AXI_DATA_W = 128, + parameter AXI_ADDR_W = 16, + parameter AXI_DEST_W = 8, + parameter AXI_ID_W = 8, + + parameter CTRL_W = (DSLX_ADDR_W), + parameter REQ_W = (2 * DSLX_ADDR_W), + parameter RESP_W = (1 + DSLX_DATA_W + DSLX_ADDR_W + 1), + parameter AXI_AR_W = (AXI_ID_W + AXI_ADDR_W + 28), + parameter AXI_R_W = (AXI_ID_W + AXI_DATA_W + 4) +) ( + input wire clk, + input wire rst, + + output wire req_rdy, + input wire req_vld, + input wire [REQ_W-1:0] req_data, + + output wire resp_vld, + input wire resp_rdy, + output wire [RESP_W-1:0] resp_data, + + output wire axi_ar_arvalid, + input wire axi_ar_arready, + output wire [ AXI_ID_W-1:0] axi_ar_arid, + output wire [AXI_ADDR_W-1:0] axi_ar_araddr, + output wire [ 3:0] axi_ar_arregion, + output wire [ 7:0] axi_ar_arlen, + output wire [ 2:0] axi_ar_arsize, + output wire [ 1:0] axi_ar_arburst, + output wire [ 3:0] axi_ar_arcache, + output wire [ 2:0] axi_ar_arprot, + output wire [ 3:0] axi_ar_arqos, + + input wire axi_r_rvalid, + output wire axi_r_rready, + input wire [ AXI_ID_W-1:0] axi_r_rid, + input wire [AXI_DATA_W-1:0] axi_r_rdata, + input wire [ 2:0] axi_r_rresp, + input wire axi_r_rlast +); + + wire [AXI_AR_W-1:0] axi_ar_data; + wire axi_ar_rdy; + wire axi_ar_vld; + + assign axi_ar_rdy = axi_ar_arready; + + assign axi_ar_arvalid = axi_ar_vld; + assign { + axi_ar_arid, + axi_ar_araddr, + axi_ar_arregion, + axi_ar_arlen, + axi_ar_arsize, + axi_ar_arburst, + axi_ar_arcache, + axi_ar_arprot, + axi_ar_arqos +} = axi_ar_data; + + wire [AXI_R_W-1:0] axi_r_data; + wire axi_r_vld; + wire axi_r_rdy; + + assign axi_r_data = {axi_r_rid, axi_r_rdata, axi_r_rresp, axi_r_rlast}; + assign axi_r_vld = axi_r_rvalid; + + assign axi_r_rready = axi_r_rdy; + + mem_reader_adv mem_reader_adv ( + .clk(clk), + .rst(rst), + + .mem_reader__req_r_data(req_data), + .mem_reader__req_r_rdy (req_rdy), + .mem_reader__req_r_vld (req_vld), + + .mem_reader__resp_s_data(resp_data), + .mem_reader__resp_s_rdy (resp_rdy), + .mem_reader__resp_s_vld (resp_vld), + + .mem_reader__axi_ar_s_data(axi_ar_data), + .mem_reader__axi_ar_s_rdy (axi_ar_rdy), + .mem_reader__axi_ar_s_vld (axi_ar_vld), + + .mem_reader__axi_r_r_data(axi_r_data), + .mem_reader__axi_r_r_vld (axi_r_vld), + .mem_reader__axi_r_r_rdy (axi_r_rdy) + ); + +endmodule diff --git a/xls/modules/zstd/memory/mem_writer.x b/xls/modules/zstd/memory/mem_writer.x index 277c9910ef..f49d147785 100644 --- a/xls/modules/zstd/memory/mem_writer.x +++ b/xls/modules/zstd/memory/mem_writer.x @@ -35,6 +35,7 @@ import xls.modules.zstd.memory.axi; import xls.modules.zstd.memory.axi_st; import xls.modules.zstd.memory.common; import xls.modules.zstd.memory.axi_writer; +import xls.modules.zstd.memory.axi_stream_remove_empty; import xls.modules.zstd.memory.axi_stream_add_empty; pub struct MemWriterReq { @@ -42,6 +43,9 @@ pub struct MemWriterReq { length: uN[ADDR_W], } +pub type MemWriterResp = axi_writer::AxiWriterResp; +pub type MemWriterRespStatus = axi_writer::AxiWriterRespStatus; + pub struct MemWriterDataPacket { data: uN[DATA_W], length: uN[ADDR_W], // Expressed in bytes @@ -67,20 +71,15 @@ struct MemWriterState< axi_writer_req: axi_writer::AxiWriterRequest, } -proc MemWriter< +proc MemWriterInternal< ADDR_W: u32, DATA_W: u32, DEST_W: u32, ID_W: u32, WRITER_ID: u32, - DATA_W_DIV8: u32 = {DATA_W / u32:8}, - DATA_W_LOG2: u32 = {std::clog2(DATA_W / u32:8)} + DATA_W_DIV8: u32 = {DATA_W / u32:8} > { type Req = MemWriterReq; type Data = MemWriterDataPacket; type AxiWriterReq = axi_writer::AxiWriterRequest; - type AxiWriterResp = axi_writer::AxiWriterResp; type PaddingReq = axi_writer::AxiWriterRequest; type AxiStream = axi_st::AxiStream; - type AxiAW = axi::AxiAw; - type AxiW = axi::AxiW; - type AxiB = axi::AxiB; type State = MemWriterState; type Fsm = MemWriterFsm; @@ -95,29 +94,16 @@ proc MemWriter< axi_writer_req_s: chan out; padding_req_s: chan out; axi_st_raw_s: chan out; - resp_s: chan out; config( req_in_r: chan in, data_in_r: chan in, - axi_aw_s: chan out, - axi_w_s: chan out, - axi_b_r: chan in, - resp_s: chan out, + axi_writer_req_s: chan out, + padding_req_s: chan out, + axi_st_raw_s: chan out, ) { - let (axi_writer_req_s, axi_writer_req_r) = chan("axi_writer_req"); - let (padding_req_s, padding_req_r) = chan("padding_req"); - let (axi_st_raw_s, axi_st_raw_r) = chan("axi_st_raw"); - let (axi_st_padded_s, axi_st_padded_r) = chan("axi_st_padded"); - - spawn axi_stream_add_empty::AxiStreamAddEmpty< - DATA_W, DEST_W, ID_W, ADDR_W - >(padding_req_r, axi_st_raw_r, axi_st_padded_s); - spawn axi_writer::AxiWriter< - ADDR_W, DATA_W, DEST_W, ID_W - >(axi_writer_req_r, resp_s, axi_aw_s, axi_w_s, axi_b_r, axi_st_padded_r); - (req_in_r, data_in_r, axi_writer_req_s, padding_req_s, axi_st_raw_s, resp_s) + (req_in_r, data_in_r, axi_writer_req_s, padding_req_s, axi_st_raw_s) } init { zero!() } @@ -147,7 +133,7 @@ proc MemWriter< } }, Fsm::SEND_DATA => { - let next_req_len = state.req_len - sLength:4; + let next_req_len = state.req_len - data_in.length as sLength; State { fsm: if (next_req_len <= sLength:0) {Fsm::RECV_REQ} else {Fsm::SEND_DATA}, req_len: next_req_len, @@ -162,7 +148,7 @@ proc MemWriter< let raw_axi_st_frame = match(state.fsm) { Fsm::SEND_DATA => { - let next_req_len = state.req_len - sLength:4; + let next_req_len = next_state.req_len; let str_keep = ((Length:1 << data_in.length) - Length:1) as Strobe; AxiStream { data: data_in.data, @@ -189,9 +175,90 @@ const INST_DATA_W = u32:32; const INST_DATA_W_DIV8 = INST_DATA_W / u32:8; const INST_DEST_W = INST_DATA_W / u32:8; const INST_ID_W = INST_DATA_W / u32:8; -const INST_DATA_W_LOG2 = u32:6; const INST_WRITER_ID = u32:2; +proc MemWriterInternalInst { + type Req = MemWriterReq; + type Data = MemWriterDataPacket; + type AxiWriterReq = axi_writer::AxiWriterRequest; + type PaddingReq = axi_writer::AxiWriterRequest; + type AxiStream = axi_st::AxiStream; + + config( + req_in_r: chan in, + data_in_r: chan in, + axi_writer_req_s: chan out, + padding_req_s: chan out, + axi_st_raw_s: chan out, + ) { + + spawn MemWriterInternal< + INST_ADDR_W, INST_DATA_W, INST_DEST_W, INST_ID_W, INST_WRITER_ID + >(req_in_r, data_in_r, axi_writer_req_s, padding_req_s, axi_st_raw_s); + () + } + + init {} + + next(state: ()) {} +} + +pub proc MemWriter< + ADDR_W: u32, DATA_W: u32, DEST_W: u32, ID_W: u32, WRITER_ID: u32, + DATA_W_DIV8: u32 = {DATA_W / u32:8} +> { + type Req = MemWriterReq; + type Data = MemWriterDataPacket; + type AxiWriterReq = axi_writer::AxiWriterRequest; + type PaddingReq = axi_writer::AxiWriterRequest; + type AxiStream = axi_st::AxiStream; + type AxiAW = axi::AxiAw; + type AxiW = axi::AxiW; + type AxiB = axi::AxiB; + type State = MemWriterState; + type Fsm = MemWriterFsm; + + type Length = uN[ADDR_W]; + type sLength = sN[ADDR_W]; + type Strobe = uN[DATA_W_DIV8]; + type Id = uN[ID_W]; + type Dest = uN[DEST_W]; + + config( + req_in_r: chan in, + data_in_r: chan in, + axi_aw_s: chan out, + axi_w_s: chan out, + axi_b_r: chan in, + resp_s: chan out, + ) { + let (axi_writer_req_s, axi_writer_req_r) = chan("axi_writer_req"); + let (padding_req_s, padding_req_r) = chan("padding_req"); + let (axi_st_raw_s, axi_st_raw_r) = chan("axi_st_raw"); + let (axi_st_clean_s, axi_st_clean_r) = chan("axi_st_clean"); + let (axi_st_padded_s, axi_st_padded_r) = chan("axi_st_padded"); + + spawn MemWriterInternal< + ADDR_W, DATA_W, DEST_W, ID_W, WRITER_ID + >(req_in_r, data_in_r, axi_writer_req_s, padding_req_s, axi_st_raw_s); + spawn axi_stream_remove_empty::AxiStreamRemoveEmpty< + DATA_W, DEST_W, ID_W + >(axi_st_raw_r, axi_st_clean_s); + spawn axi_stream_add_empty::AxiStreamAddEmpty< + DATA_W, DEST_W, ID_W, ADDR_W + >(padding_req_r, axi_st_clean_r, axi_st_padded_s); + spawn axi_writer::AxiWriter< + ADDR_W, DATA_W, DEST_W, ID_W + >(axi_writer_req_r, resp_s, axi_aw_s, axi_w_s, axi_b_r, axi_st_padded_r); + + () + } + + init {} + + next(state: ()) {} +} + proc MemWriterInst { type InstReq = MemWriterReq; type InstData = MemWriterDataPacket; @@ -199,7 +266,7 @@ proc MemWriterInst { type InstAxiAW = axi::AxiAw; type InstAxiW = axi::AxiW; type InstAxiB = axi::AxiB; - type InstAxiWriterResp = axi_writer::AxiWriterResp; + type InstMemWriterResp = MemWriterResp; config( req_in_r: chan in, @@ -207,7 +274,7 @@ proc MemWriterInst { axi_aw_s: chan out, axi_w_s: chan out, axi_b_r: chan in, - resp_s: chan out + resp_s: chan out ) { spawn MemWriter< INST_ADDR_W, INST_DATA_W, INST_DEST_W, INST_ID_W, INST_WRITER_ID @@ -225,13 +292,12 @@ const TEST_DATA_W = u32:32; const TEST_DATA_W_DIV8 = TEST_DATA_W / u32:8; const TEST_DEST_W = TEST_DATA_W / u32:8; const TEST_ID_W = TEST_DATA_W / u32:8; -const TEST_DATA_W_LOG2 = u32:6; const TEST_WRITER_ID = u32:2; type TestReq = MemWriterReq; type TestData = MemWriterDataPacket; -type TestAxiWriterResp = axi_writer::AxiWriterResp; -type TestAxiWriterRespStatus = axi_writer::AxiWriterRespStatus; +type TestMemWriterResp = MemWriterResp; +type TestMemWriterRespStatus = MemWriterRespStatus; type TestAxiStream = axi_st::AxiStream; type TestAxiAW = axi::AxiAw; type TestAxiW = axi::AxiW; @@ -255,7 +321,7 @@ proc MemWriterTest { axi_aw_r: chan in; axi_w_r: chan in; axi_b_s: chan out; - resp_r: chan in; + resp_r: chan in; config( terminator: chan out, @@ -265,7 +331,7 @@ proc MemWriterTest { let (axi_aw_s, axi_aw_r) = chan("axi_aw"); let (axi_w_s, axi_w_r) = chan("axi_w"); let (axi_b_s, axi_b_r) = chan("axi_b"); - let (resp_s, resp_r) = chan("resp"); + let (resp_s, resp_r) = chan("resp"); spawn MemWriter< TEST_ADDR_W, TEST_DATA_W, TEST_DEST_W, TEST_ID_W, TEST_WRITER_ID >(req_in_r, data_in_r, axi_aw_s, axi_w_s, axi_b_r, resp_s); @@ -306,7 +372,7 @@ proc MemWriterTest { id: TestId:1, }); let (tok, resp) = recv(tok, resp_r); - assert_eq(resp, TestAxiWriterResp{status: TestAxiWriterRespStatus::OKAY}); + assert_eq(resp, TestMemWriterResp{status: TestMemWriterRespStatus::OKAY}); // Unaligned single transfer let tok = send(tok, req_in_s, TestReq { @@ -337,7 +403,7 @@ proc MemWriterTest { id: TestId:2, }); let (tok, resp) = recv(tok, resp_r); - assert_eq(resp, TestAxiWriterResp{status: TestAxiWriterRespStatus::OKAY}); + assert_eq(resp, TestMemWriterResp{status: TestMemWriterRespStatus::OKAY}); // Unaligned single transfer let tok = send(tok, req_in_s, TestReq { @@ -368,7 +434,7 @@ proc MemWriterTest { id: TestId:3, }); let (tok, resp) = recv(tok, resp_r); - assert_eq(resp, TestAxiWriterResp{status: TestAxiWriterRespStatus::OKAY}); + assert_eq(resp, TestMemWriterResp{status: TestMemWriterRespStatus::OKAY}); // Unaligned single transfer let tok = send(tok, req_in_s, TestReq { @@ -399,7 +465,7 @@ proc MemWriterTest { id: TestId:4, }); let (tok, resp) = recv(tok, resp_r); - assert_eq(resp, TestAxiWriterResp{status: TestAxiWriterRespStatus::OKAY}); + assert_eq(resp, TestMemWriterResp{status: TestMemWriterRespStatus::OKAY}); // Unaligned single transfer let tok = send(tok, req_in_s, TestReq { @@ -430,7 +496,7 @@ proc MemWriterTest { id: TestId:5, }); let (tok, resp) = recv(tok, resp_r); - assert_eq(resp, TestAxiWriterResp{status: TestAxiWriterRespStatus::OKAY}); + assert_eq(resp, TestMemWriterResp{status: TestMemWriterRespStatus::OKAY}); // Unaligned 2 transfers let tok = send(tok, req_in_s, TestReq { @@ -467,7 +533,7 @@ proc MemWriterTest { id: TestId:6, }); let (tok, resp) = recv(tok, resp_r); - assert_eq(resp, TestAxiWriterResp{status: TestAxiWriterRespStatus::OKAY}); + assert_eq(resp, TestMemWriterResp{status: TestMemWriterRespStatus::OKAY}); // Unligned 3 transfers let tok = send(tok, req_in_s, TestReq { @@ -515,7 +581,7 @@ proc MemWriterTest { id: TestId:7, }); let (tok, resp) = recv(tok, resp_r); - assert_eq(resp, TestAxiWriterResp{status: TestAxiWriterRespStatus::OKAY}); + assert_eq(resp, TestMemWriterResp{status: TestMemWriterRespStatus::OKAY}); // Crossing AXI 4kB boundary, aligned 2 burst transfers let tok = send(tok, req_in_s, TestReq { @@ -569,7 +635,7 @@ proc MemWriterTest { id: TestId:9, }); let (tok, resp) = recv(tok, resp_r); - assert_eq(resp, TestAxiWriterResp{status: TestAxiWriterRespStatus::OKAY}); + assert_eq(resp, TestMemWriterResp{status: TestMemWriterRespStatus::OKAY}); // Crossing AXI 4kB boundary, unaligned 2 burst transfers let tok = send(tok, req_in_s, TestReq { @@ -629,7 +695,82 @@ proc MemWriterTest { id: TestId:11, }); let (tok, resp) = recv(tok, resp_r); - assert_eq(resp, TestAxiWriterResp{status: TestAxiWriterRespStatus::OKAY}); + assert_eq(resp, TestMemWriterResp{status: TestMemWriterRespStatus::OKAY}); + + // Unligned 3 transfers + let tok = send(tok, req_in_s, TestReq { + addr: TestAddr:0x1f3, + length: TestLength:15 + }); + let tok = send(tok, data_in_s, TestData { + data: TestDataBits:0x11223344, + length: TestLength:4, + last: false, + }); + let tok = send(tok, data_in_s, TestData { + data: TestDataBits:0x00005566, + length: TestLength:2, + last: false, + }); + let tok = send(tok, data_in_s, TestData { + data: TestDataBits:0x778899aa, + length: TestLength:4, + last: false, + }); + let tok = send(tok, data_in_s, TestData { + data: TestDataBits:0x00bbccdd, + length: TestLength:3, + last: false, + }); + let tok = send(tok, data_in_s, TestData { + data: TestDataBits:0x0000eeff, + length: TestLength:2, + last: true, + }); + let (tok, aw) = recv(tok, axi_aw_r); + assert_eq(aw, TestAxiAW { + id: TestId:12, + addr: TestAddr:0x1f0, + size: TestAxiAxSize::MAX_4B_TRANSFER, + len: u8:4, + burst: TestAxiAxBurst::INCR, + }); + let (tok, w) = recv(tok, axi_w_r); + assert_eq(w, TestAxiW { + data: TestDataBits:0x44000000, + strb: TestStrobe:0x8, + last: false, + }); + let (tok, w) = recv(tok, axi_w_r); + assert_eq(w, TestAxiW { + data: TestDataBits:0x66112233, + strb: TestStrobe:0xF, + last: false, + }); + let (tok, w) = recv(tok, axi_w_r); + assert_eq(w, TestAxiW { + data: TestDataBits:0x8899aa55, + strb: TestStrobe:0xf, + last: false, + }); + let (tok, w) = recv(tok, axi_w_r); + assert_eq(w, TestAxiW { + data: TestDataBits:0xbbccdd77, + strb: TestStrobe:0xf, + last: false, + }); + let (tok, w) = recv(tok, axi_w_r); + assert_eq(w, TestAxiW { + data: TestDataBits:0x0000eeff, + strb: TestStrobe:0x3, + last: true, + }); + let tok = send(tok, axi_b_s, TestAxiB { + resp: TestAxiWriteResp::OKAY, + id: TestId:12, + }); + let (tok, resp) = recv(tok, resp_r); + assert_eq(resp, TestMemWriterResp{status: TestMemWriterRespStatus::OKAY}); send(tok, terminator, true); } diff --git a/xls/modules/zstd/memory/mem_writer_cocotb_test.py b/xls/modules/zstd/memory/mem_writer_cocotb_test.py new file mode 100644 index 0000000000..bc7050a99d --- /dev/null +++ b/xls/modules/zstd/memory/mem_writer_cocotb_test.py @@ -0,0 +1,668 @@ +#!/usr/bin/env python +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import random +import logging +from enum import Enum +from pathlib import Path + +import cocotb +from cocotb.clock import Clock +from cocotb.triggers import ClockCycles, Event +from cocotb.binary import BinaryValue +from cocotb_bus.scoreboard import Scoreboard + +from cocotbext.axi.axis import AxiStreamSource, AxiStreamBus, AxiStreamFrame +from cocotbext.axi.axi_channels import AxiAWBus, AxiWBus, AxiBBus, AxiWriteBus, AxiAWMonitor, AxiWMonitor, AxiBMonitor, AxiBTransaction, AxiBSource, AxiBSink +from cocotbext.axi.axi_ram import AxiRamWrite +from cocotbext.axi.sparse_memory import SparseMemory + +from xls.modules.zstd.cocotb.channel import ( + XLSChannel, + XLSChannelDriver, + XLSChannelMonitor, +) +from xls.modules.zstd.cocotb.utils import reset, run_test +from xls.modules.zstd.cocotb.xlsstruct import XLSStruct, xls_dataclass + +DATA_WIDTH = 32 +ADDR_WIDTH = 16 + +# Override default widths of AXI response signals +signal_widths = {"bresp": 3} +AxiBBus._signal_widths = signal_widths +AxiBTransaction._signal_widths = signal_widths +AxiBSource._signal_widths = signal_widths +AxiBSink._signal_widths = signal_widths +AxiBMonitor._signal_widths = signal_widths + +@xls_dataclass +class DataInStruct(XLSStruct): + data: DATA_WIDTH + length: ADDR_WIDTH + last: 1 + +@xls_dataclass +class WriteReqStruct(XLSStruct): + offset: ADDR_WIDTH + length: ADDR_WIDTH + +@xls_dataclass +class MemWriterRespStruct(XLSStruct): + status: 1 + +class MemWriterRespStatus(Enum): + OKAY = 0 + ERROR = 1 + +@xls_dataclass +class WriteRequestStruct(XLSStruct): + address: ADDR_WIDTH + length: ADDR_WIDTH + +def set_termination_event(monitor, event, transactions): + def terminate_cb(_): + if monitor.stats.received_transactions == transactions: + event.set() + monitor.add_callback(terminate_cb) + +async def test_writer(dut, mem_size, write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt): + GENERIC_WRITE_REQ_CHANNEL = "req" + GENERIC_WRITE_RESP_CHANNEL = "resp" + GENERIC_DATA_IN_CHANNEL = "data_in" + AXI_AW_CHANNEL = "axi_aw" + AXI_W_CHANNEL = "axi_w" + AXI_B_CHANNEL = "axi_b" + + terminate = Event() + + dut.rst.setimmediatevalue(0) + + clock = Clock(dut.clk, 10, units="us") + cocotb.start_soon(clock.start()) + + resp_bus = XLSChannel(dut, GENERIC_WRITE_RESP_CHANNEL, dut.clk, start_now=True) + + driver_write_req = XLSChannelDriver(dut, GENERIC_WRITE_REQ_CHANNEL, dut.clk) + driver_data_in = XLSChannelDriver(dut, GENERIC_DATA_IN_CHANNEL, dut.clk) + + bus_axi_aw = AxiAWBus.from_prefix(dut, AXI_AW_CHANNEL) + bus_axi_w = AxiWBus.from_prefix(dut, AXI_W_CHANNEL) + bus_axi_b = AxiBBus.from_prefix(dut, AXI_B_CHANNEL) + bus_axi_write = AxiWriteBus(bus_axi_aw, bus_axi_w, bus_axi_b) + + monitor_write_req = XLSChannelMonitor(dut, GENERIC_WRITE_REQ_CHANNEL, dut.clk, WriteRequestStruct) + monitor_data_in = XLSChannelMonitor(dut, GENERIC_DATA_IN_CHANNEL, dut.clk, WriteRequestStruct) + monitor_write_resp = XLSChannelMonitor(dut, GENERIC_WRITE_RESP_CHANNEL, dut.clk, MemWriterRespStruct) + monitor_axi_aw = AxiAWMonitor(bus_axi_aw, dut.clk, dut.rst) + monitor_axi_w = AxiWMonitor(bus_axi_w, dut.clk, dut.rst) + monitor_axi_b = AxiBMonitor(bus_axi_b, dut.clk, dut.rst) + + set_termination_event(monitor_write_resp, terminate, resp_cnt) + + memory = AxiRamWrite(bus_axi_write, dut.clk, dut.rst, size=mem_size) + + log = logging.getLogger("cocotb.tb") + log.setLevel(logging.WARNING) + memory.log.setLevel(logging.WARNING) + + scoreboard = Scoreboard(dut) + scoreboard.add_interface(monitor_write_resp, write_resp_expect) + + await reset(dut.clk, dut.rst, cycles=10) + await cocotb.start(driver_write_req.send(write_req_input)) + await cocotb.start(driver_data_in.send(data_in_input)) + + await terminate.wait() + + for bundle in memory_verification: + memory_contents = bytearray(memory.read(bundle["base_address"], bundle["length"])) + expected_memory_contents = bytearray(expected_memory.read(bundle["base_address"], bundle["length"])) + assert memory_contents == expected_memory_contents, "{} bytes of memory contents at base address {}:\n{}\nvs\n{}\nHEXDUMP:\n{}\nvs\n{}".format(hex(bundle["length"]), hex(bundle["base_address"]), memory_contents, expected_memory_contents, memory.hexdump(bundle["base_address"], bundle["length"]), expected_memory.hexdump(bundle["base_address"], bundle["length"])) + +@cocotb.test(timeout_time=2000, timeout_unit="ms") +async def ram_test_single_burst_1_transfer(dut): + mem_size = 2**ADDR_WIDTH + + (write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) = generate_test_data_arbitrary(mem_size, test_cases_single_burst_1_transfer) + await test_writer(dut, mem_size, write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) + +@cocotb.test(timeout_time=2000, timeout_unit="ms") +async def ram_test_single_burst_2_transfers(dut): + mem_size = 2**ADDR_WIDTH + + (write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) = generate_test_data_arbitrary(mem_size, test_cases_single_burst_2_transfers) + await test_writer(dut, mem_size, write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) + +@cocotb.test(timeout_time=2000, timeout_unit="ms") +async def ram_test_single_burst_almost_max_burst_transfer(dut): + mem_size = 2**ADDR_WIDTH + + (write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) = generate_test_data_arbitrary(mem_size, test_cases_single_burst_almost_max_burst_transfer) + await test_writer(dut, mem_size, write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) + +@cocotb.test(timeout_time=2000, timeout_unit="ms") +async def ram_test_single_burst_max_burst_transfer(dut): + mem_size = 2**ADDR_WIDTH + + (write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) = generate_test_data_arbitrary(mem_size, test_cases_single_burst_max_burst_transfer) + await test_writer(dut, mem_size, write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) + +@cocotb.test(timeout_time=2000, timeout_unit="ms") +async def ram_test_multiburst_2_full_bursts(dut): + mem_size = 2**ADDR_WIDTH + + (write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) = generate_test_data_arbitrary(mem_size, test_cases_multiburst_2_full_bursts) + await test_writer(dut, mem_size, write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) + +@cocotb.test(timeout_time=2000, timeout_unit="ms") +async def ram_test_multiburst_1_full_burst_and_single_transfer(dut): + mem_size = 2**ADDR_WIDTH + + (write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) = generate_test_data_arbitrary(mem_size, test_cases_multiburst_1_full_burst_and_single_transfer) + await test_writer(dut, mem_size, write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) + +@cocotb.test(timeout_time=2000, timeout_unit="ms") +async def ram_test_multiburst_crossing_4kb_boundary(dut): + mem_size = 2**ADDR_WIDTH + + (write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) = generate_test_data_arbitrary(mem_size, test_cases_multiburst_crossing_4kb_boundary) + await test_writer(dut, mem_size, write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) + +@cocotb.test(timeout_time=2000, timeout_unit="ms") +async def ram_test_multiburst_crossing_4kb_boundary_with_perfectly_aligned_full_bursts(dut): + mem_size = 2**ADDR_WIDTH + + (write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) = generate_test_data_arbitrary(mem_size, test_cases_multiburst_crossing_4kb_boundary_with_perfectly_aligned_full_bursts) + await test_writer(dut, mem_size, write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) + +@cocotb.test(timeout_time=2000, timeout_unit="ms") +async def ram_test_multiburst_crossing_4kb_boundary_with_2_full_bursts_and_1_transfer(dut): + mem_size = 2**ADDR_WIDTH + + (write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) = generate_test_data_arbitrary(mem_size, test_cases_multiburst_crossing_4kb_boundary_with_2_full_bursts_and_1_transfer) + await test_writer(dut, mem_size, write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) + +@cocotb.test(timeout_time=5000, timeout_unit="ms") +async def ram_test_not_full_packets(dut): + mem_size = 2**ADDR_WIDTH + + (write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) = generate_padded_test_data_arbitrary(mem_size, test_cases_not_full_packets) + await test_writer(dut, mem_size, write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) + +@cocotb.test(timeout_time=5000, timeout_unit="ms") +async def ram_test_random(dut): + mem_size = 2**ADDR_WIDTH + test_count = 50 + + (write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) = generate_test_data_random(test_count, mem_size) + await test_writer(dut, mem_size, write_req_input, data_in_input, write_resp_expect, memory_verification, expected_memory, resp_cnt) + +def generate_test_data_random(test_count, mem_size): + AXI_AXSIZE_ENCODING_MAX_4B_TRANSFER = 2 # Must be in sync with AXI_AXSIZE_ENCODING enum in axi.x + + write_req_input = [] + data_in_input = [] + write_resp_expect = [] + memory_verification = [] + memory = SparseMemory(mem_size) + + random.seed(1234) + + xfer_baseaddr = 0 + + for i in range(test_count): + # Generate offset from the absolute address + max_xfer_offset = mem_size - xfer_baseaddr + xfer_offset = random.randrange(0, max_xfer_offset) + xfer_addr = xfer_baseaddr + xfer_offset + # Make sure we don't write beyond available memory + memory_size_max_xfer_len = mem_size - xfer_addr + arbitrary_max_xfer_len = 0x5000 # 20kB + xfer_max_len = min(arbitrary_max_xfer_len, memory_size_max_xfer_len) + xfer_len = random.randrange(1, xfer_max_len) + + write_req = WriteReqStruct( + offset = xfer_offset, + length = xfer_len, + ) + write_req_input.append(write_req) + + data_to_write = random.randbytes(xfer_len) + rem = xfer_len % 4 + for j in list(range(0, xfer_len-3, 4)): + last = ((j + 4) >= xfer_len) & (rem == 0) + data_in = DataInStruct( + data = int.from_bytes(data_to_write[j:j+4], byteorder='little'), + length = 4, + last = last + ) + data_in_input.append(data_in) + if (rem > 0): + data_in = DataInStruct( + data = int.from_bytes(data_to_write[-rem:], byteorder='little'), + length = rem, + last = True + ) + data_in_input.append(data_in) + + + transfer_req = WriteRequestStruct( + address = xfer_addr, + length = xfer_len, + ) + write_expected_memory(transfer_req, data_to_write, memory) + + memory_bundle = { + "base_address": transfer_req.address, + "length": transfer_req.length, + } + memory_verification.append(memory_bundle) + + write_resp_expect = [MemWriterRespStruct(status=MemWriterRespStatus.OKAY.value)] * test_count + + return (write_req_input, data_in_input, write_resp_expect, memory_verification, memory, test_count) + +def bytes_to_4k_boundary(addr): + AXI_4K_BOUNDARY = 0x1000 + return AXI_4K_BOUNDARY - (addr % AXI_4K_BOUNDARY) + +def write_expected_memory(transfer_req, data_to_write, memory): + """ + Write test data to reference memory keeping the AXI 4kb boundary + by spliting the write requests into smaller ones. + """ + prev_id = 0 + address = transfer_req.address + length = transfer_req.length + + BYTES_IN_TRANSFER = 4 + MAX_AXI_BURST_BYTES = 256 * BYTES_IN_TRANSFER + + while (length > 0): + bytes_to_4k = bytes_to_4k_boundary(address) + new_len = min(length, min(bytes_to_4k, MAX_AXI_BURST_BYTES)) + new_data = data_to_write[prev_id:prev_id+new_len] + memory.write(address, new_data) + address = address + new_len + length = length - new_len + prev_id = prev_id + new_len + +def generate_test_data_arbitrary(mem_size, test_cases): + AXI_AXSIZE_ENCODING_MAX_4B_TRANSFER = 2 # Must be in sync with AXI_AXSIZE_ENCODING enum in axi.x + test_count = len(test_cases) + + random.seed(1234) + + write_req_input = [] + data_in_input = [] + write_resp_expect = [] + memory_verification = [] + memory = SparseMemory(mem_size) + + xfer_baseaddr = 0x0 + assert xfer_baseaddr < mem_size + + max_xfer_offset = mem_size - xfer_baseaddr + + for xfer_offset, xfer_len in test_cases: + assert xfer_offset <= max_xfer_offset + xfer_addr = xfer_baseaddr + xfer_offset + # Make sure we don't write beyond available memory + memory_size_max_xfer_len = mem_size - xfer_addr + arbitrary_max_xfer_len = 0x5000 # 20kB + xfer_max_len = min(arbitrary_max_xfer_len, memory_size_max_xfer_len) + assert xfer_len <= xfer_max_len + + write_req = WriteReqStruct( + offset = xfer_offset, + length = xfer_len, + ) + write_req_input.append(write_req) + + data_to_write = random.randbytes(xfer_len) + rem = xfer_len % 4 + for j in list(range(0, xfer_len-3, 4)): + last = ((j + 4) >= xfer_len) & (rem == 0) + data_in = DataInStruct( + data = int.from_bytes(data_to_write[j:j+4], byteorder='little'), + length = 4, + last = last + ) + data_in_input.append(data_in) + if (rem > 0): + data_in = DataInStruct( + data = int.from_bytes(data_to_write[-rem:], byteorder='little'), + length = rem, + last = True + ) + data_in_input.append(data_in) + + + transfer_req = WriteRequestStruct( + address = xfer_addr, + length = xfer_len, + ) + write_expected_memory(transfer_req, data_to_write, memory) + + memory_bundle = { + "base_address": transfer_req.address, + "length": transfer_req.length, + } + memory_verification.append(memory_bundle) + + write_resp_expect = [MemWriterRespStruct(status=MemWriterRespStatus.OKAY.value)] * test_count + + return (write_req_input, data_in_input, write_resp_expect, memory_verification, memory, test_count) + +def generate_padded_test_data_arbitrary(mem_size, test_cases): + AXI_AXSIZE_ENCODING_MAX_4B_TRANSFER = 2 # Must be in sync with AXI_AXSIZE_ENCODING enum in axi.x + test_count = len(test_cases) + + random.seed(1234) + + write_req_input = [] + data_in_input = [] + write_resp_expect = [] + memory_verification = [] + memory = SparseMemory(mem_size) + + xfer_baseaddr = 0x0 + assert xfer_baseaddr < mem_size + + max_xfer_offset = mem_size - xfer_baseaddr + + for xfer_offset, xfer_len in test_cases: + assert xfer_offset <= max_xfer_offset + xfer_addr = xfer_baseaddr + xfer_offset + # Make sure we don't write beyond available memory + memory_size_max_xfer_len = mem_size - xfer_addr + arbitrary_max_xfer_len = 0x5000 # 20kB + xfer_max_len = min(arbitrary_max_xfer_len, memory_size_max_xfer_len) + assert xfer_len <= xfer_max_len + + write_req = WriteReqStruct( + offset = xfer_offset, + length = xfer_len, + ) + write_req_input.append(write_req) + + data_to_write = random.randbytes(xfer_len) + bytes_to_packetize = xfer_len + packetized_bytes = 0 + while(bytes_to_packetize): + packet_len = random.randint(1, 4) + + if (bytes_to_packetize < packet_len): + packet_len = bytes_to_packetize + + last = packet_len == bytes_to_packetize + + data_in = DataInStruct( + data = int.from_bytes(data_to_write[packetized_bytes:packetized_bytes+packet_len], byteorder='little'), + length = packet_len, + last = last + ) + data_in_input.append(data_in) + + bytes_to_packetize -= packet_len + packetized_bytes += packet_len + assert xfer_len == packetized_bytes + + + transfer_req = WriteRequestStruct( + address = xfer_addr, + length = xfer_len, + ) + write_expected_memory(transfer_req, data_to_write, memory) + + memory_bundle = { + "base_address": transfer_req.address, + "length": transfer_req.length, + } + memory_verification.append(memory_bundle) + + write_resp_expect = [MemWriterRespStruct(status=MemWriterRespStatus.OKAY.value)] * test_count + + return (write_req_input, data_in_input, write_resp_expect, memory_verification, memory, test_count) + +if __name__ == "__main__": + toplevel = "mem_writer_wrapper" + verilog_sources = [ + "xls/modules/zstd/xls_fifo_wrapper.v", + "xls/modules/zstd/memory/mem_writer.v", + "xls/modules/zstd/memory/mem_writer_wrapper.v", + ] + test_module=[Path(__file__).stem] + run_test(toplevel, test_module, verilog_sources) + +test_cases_single_burst_1_transfer = [ + # Aligned Address; Aligned Length + (0x0, 0x4), + # Aligned Address; Unaligned Length + (0x10, 0x1), + (0x24, 0x2), + (0x38, 0x3), + # Unaligned Address; Aligned Length + (0x41, 0x4), + (0x52, 0x4), + (0x63, 0x4), + # Unaligned Address; Unaligned Length + (0x71, 0x1), + (0x81, 0x2), + (0x91, 0x3), + (0xa2, 0x1), + (0xb2, 0x2), + (0xc2, 0x3), + (0xd3, 0x1), + (0xe3, 0x2), + (0xf3, 0x3) +] + +test_cases_single_burst_2_transfers = [ + # Aligned Address; Aligned Length + (0x100, 0x8), + # Aligned Address; Unaligned Length + (0x110, 0x5), + (0x120, 0x6), + (0x130, 0x7), + # Unaligned Address; Aligned Length + (0x141, 0x8), + (0x152, 0x8), + (0x163, 0x8), + # Unaligned Address; Unaligned Length + (0x171, 0x5), + (0x182, 0x5), + (0x193, 0x5), + (0x1A1, 0x6), + (0x1B2, 0x6), + (0x1C3, 0x6), + (0x1D1, 0x7), + (0x1E2, 0x7), + (0x1F3, 0x7) +] + +test_cases_single_burst_almost_max_burst_transfer = [ + # Aligned Address; Aligned Length + (0x200, 0x3FC), + # Aligned Address; Unaligned Length + (0x600, 0x3F9), + (0xA00, 0x3FA), + (0x1000, 0x3FB), + # Unaligned Address; Aligned Length + (0x1401, 0x3FC), + (0x1802, 0x3FC), + (0x2003, 0x3FC), + # Unaligned Address; Unaligned Length + (0x2401, 0x3F9), + (0x2802, 0x3F9), + (0x2C03, 0x3F9), + (0x3001, 0x3FA), + (0x3402, 0x3FA), + (0x3803, 0x3FA), + (0x3C01, 0x3FB), + (0x4002, 0x3FB), + (0x4403, 0x3FB) +] + +test_cases_single_burst_max_burst_transfer = [ + # Aligned Address; Aligned Length + (0x4800, 0x400), + # Aligned Address; Unaligned Length + (0x4C00, 0x3FD), + (0x5000, 0x3FE), + (0x5400, 0x3FF), + # Unaligned Address; Aligned Length + (0x5801, 0x400), + (0x6002, 0x400), + (0x6803, 0x400), + # Unaligned Address; Unaligned Length + (0x7001, 0x3FD), + (0x7802, 0x3FD), + (0x8003, 0x3FD), + (0x8801, 0x3FE), + (0x9002, 0x3FE), + (0x9803, 0x3FE), + (0xA001, 0x3FF), + (0xA802, 0x3FF), + (0xB003, 0x3FF) +] + +test_cases_multiburst_2_full_bursts = [ + # Aligned Address; Aligned Length + (0x0400, 0x800), + # Aligned Address; Unaligned Length + (0x1000, 0x7FD), + (0x1800, 0x7FE), + (0x2000, 0x7FF), + # Unaligned Address; Aligned Length + (0x2801, 0x800), + (0x3002, 0x800), + (0x3803, 0x800), + # Unaligned Address; Unaligned Length + (0x4001, 0x7FD), + (0x5002, 0x7FD), + (0x6003, 0x7FD), + (0x7001, 0x7FE), + (0x8002, 0x7FE), + (0x9003, 0x7FE), + (0xA001, 0x7FF), + (0xB002, 0x7FF), + (0xF003, 0x7FF) +] + +test_cases_multiburst_1_full_burst_and_single_transfer = [ + # Aligned Address; Aligned Length; Multi-Burst + (0x0000, 0x404), + # Aligned Address; Unaligned Length; Multi-Burst + (0x0800, 0x401), + (0x1000, 0x402), + (0x1800, 0x403), + # Unaligned Address; Aligned Length; Multi-Burst + (0x2000, 0x404), + (0x2800, 0x404), + (0x3000, 0x404), + # Unaligned Address; Unaligned Length; Multi-Burst + (0x3801, 0x401), + (0x5002, 0x401), + (0x5803, 0x401), + (0x6001, 0x402), + (0x6802, 0x402), + (0x7003, 0x402), + (0x7801, 0x403), + (0x8002, 0x403), + (0x8803, 0x403) +] + +test_cases_multiburst_crossing_4kb_boundary = [ + # Aligned Address; Aligned Length + (0x0FFC, 0x8), + # Aligned Address; Unaligned Length + (0x1FFC, 0x5), + (0x2FFC, 0x6), + (0x3FFC, 0x7), + # Unaligned Address; Aligned Length + (0x4FFD, 0x8), + (0x5FFE, 0x8), + (0x6FFF, 0x8), + # Unaligned Address; Unaligned Length + (0x7FFD, 0x5), + (0x8FFD, 0x6), + (0x9FFD, 0x7), + (0xAFFE, 0x5), + (0xBFFE, 0x6), + (0xCFFE, 0x7), + (0xDFFF, 0x5), + (0xEFFF, 0x6), + # End of address space - wrap around + (0x0FFF, 0x7), +] + +test_cases_multiburst_crossing_4kb_boundary_with_perfectly_aligned_full_bursts = [ + # Aligned Address; Aligned Length; Multi-Burst; crossing 4kB boundary with perfectly aligned full bursts + (0x0C00, 0x800), + # Unaligned Address; Unaligned Length; Multi-Burst; crossing 4kB boundary with perfectly aligned full bursts + (0x1C01, 0x7FF), + (0x2C02, 0x7FE), + (0x3C03, 0x7FD), +] + +test_cases_multiburst_crossing_4kb_boundary_with_2_full_bursts_and_1_transfer = [ + # Aligned Address; Aligned Length + (0x0C04, 0x800), + # Aligned Address; Unaligned Length + (0x1C04, 0x801), + (0x2C04, 0x802), + (0x3C04, 0x803), + # Unaligned Address; Aligned Length + (0x4C01, 0x800), + (0x5C02, 0x800), + (0x6C03, 0x800), + # Unaligned Address; Unaligned Length + (0x7C01, 0x801), + (0x8C02, 0x802), + (0x9C03, 0x803), + (0xAC01, 0x802), + (0xBC02, 0x802), + (0xCC03, 0x802), + (0xDC01, 0x803), + (0xEC02, 0x803), + # End of address space - wrap around + (0x0C03, 0x803), +] + +test_cases_not_full_packets = [ + # Aligned Address; Aligned Length + (0x0000, 0x20), + # Aligned Address; Unaligned Length + (0x100, 0x21), + (0x200, 0x22), + (0x300, 0x23), + # Unaligned Address; Aligned Length + (0x401, 0x20), + (0x502, 0x20), + (0x603, 0x20), + # Unaligned Address; Unaligned Length + (0x701, 0x21), + (0x802, 0x22), + (0x903, 0x23), + (0xA01, 0x22), + (0xB02, 0x22), + (0xC03, 0x22), + (0xD01, 0x23), + (0xE02, 0x23), + (0xF03, 0x23), +] diff --git a/xls/modules/zstd/memory/mem_writer_wrapper.v b/xls/modules/zstd/memory/mem_writer_wrapper.v new file mode 100644 index 0000000000..c7513af58a --- /dev/null +++ b/xls/modules/zstd/memory/mem_writer_wrapper.v @@ -0,0 +1,193 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`default_nettype none + +module mem_writer_wrapper ( + input wire clk, + input wire rst, + + input wire [31:0] req_data, + input wire req_vld, + output wire req_rdy, + + input wire [48:0] data_in_data, + input wire data_in_vld, + output wire data_in_rdy, + + output wire resp_data, + output wire resp_vld, + input wire resp_rdy, + + output wire [3:0] axi_aw_awid, + output wire [15:0] axi_aw_awaddr, + output wire [2:0] axi_aw_awsize, + output wire [7:0] axi_aw_awlen, + output wire [1:0] axi_aw_awburst, + output wire axi_aw_awvalid, + input wire axi_aw_awready, + + output wire [31:0] axi_w_wdata, + output wire [3:0] axi_w_wstrb, + output wire [0:0] axi_w_wlast, + output wire axi_w_wvalid, + input wire axi_w_wready, + + input wire [2:0] axi_b_bresp, + input wire [3:0] axi_b_bid, + input wire axi_b_bvalid, + output wire axi_b_bready +); + + wire [15:0] req_f_addr; + wire [15:0] req_f_length; + + wire [31:0] data_in_f_data; + wire [15:0] data_in_f_length; + wire [0:0] data_in_f_last; + + wire [36:0] axi_w_data; + wire axi_w_vld; + wire axi_w_rdy; + + wire [32:0] axi_aw_data; + wire axi_aw_vld; + wire axi_aw_rdy; + + wire [6:0] axi_b_data; + wire axi_b_rdy; + wire axi_b_vld; + + assign {req_f_addr, req_f_length} = req_data; + + assign {data_in_f_data, data_in_f_length, data_in_f_last} = data_in_data; + + assign {axi_aw_awid, axi_aw_awaddr, axi_aw_awsize, axi_aw_awlen, axi_aw_awburst} = axi_aw_data; + assign axi_aw_awvalid = axi_aw_vld; + assign axi_aw_rdy = axi_aw_awready; + + assign {axi_w_wdata, axi_w_wstrb, axi_w_wlast} = axi_w_data; + assign axi_w_wvalid = axi_w_vld; + assign axi_w_rdy = axi_w_wready; + + assign axi_b_data = {axi_b_bresp, axi_b_bid}; + assign axi_b_vld = axi_b_bvalid; + assign axi_b_bready = axi_b_rdy; + + wire [15:0] axi_writer_write_req_address; + wire [15:0] axi_writer_write_req_length; + wire [ 0:0] axi_writer_write_req_valid; + wire [ 0:0] axi_writer_write_req_ready; + + wire [15:0] padding_write_req_address; + wire [15:0] padding_write_req_length; + wire [ 0:0] padding_write_req_valid; + wire [ 0:0] padding_write_req_ready; + + wire [31:0] axi_stream_raw_tdata; + wire [ 3:0] axi_stream_raw_tstr; + wire [ 3:0] axi_stream_raw_tkeep; + wire [ 0:0] axi_stream_raw_tlast; + wire [ 3:0] axi_stream_raw_tid; + wire [ 3:0] axi_stream_raw_tdest; + wire [ 0:0] axi_stream_raw_tvalid; + wire [ 0:0] axi_stream_raw_tready; + + wire [31:0] axi_stream_clean_tdata; + wire [ 3:0] axi_stream_clean_tstr; + wire [ 3:0] axi_stream_clean_tkeep; + wire [ 0:0] axi_stream_clean_tlast; + wire [ 3:0] axi_stream_clean_tid; + wire [ 3:0] axi_stream_clean_tdest; + wire [ 0:0] axi_stream_clean_tvalid; + wire [ 0:0] axi_stream_clean_tready; + + wire [31:0] axi_stream_padded_tdata; + wire [ 3:0] axi_stream_padded_tstr; + wire [ 3:0] axi_stream_padded_tkeep; + wire [ 0:0] axi_stream_padded_tlast; + wire [ 3:0] axi_stream_padded_tid; + wire [ 3:0] axi_stream_padded_tdest; + wire [ 0:0] axi_stream_padded_tvalid; + wire [ 0:0] axi_stream_padded_tready; + + assign {axi_writer_write_req_address, axi_writer_write_req_length} = mem_writer.__mem_writer__MemWriterInst__MemWriter_0__MemWriterInternal_0__16_32_4_4_4_2_next_inst0.mem_writer__axi_writer_req_data; + assign axi_writer_write_req_valid = mem_writer.__mem_writer__MemWriterInst__MemWriter_0__MemWriterInternal_0__16_32_4_4_4_2_next_inst0.mem_writer__axi_writer_req_vld; + assign axi_writer_write_req_ready = mem_writer.__mem_writer__MemWriterInst__MemWriter_0__MemWriterInternal_0__16_32_4_4_4_2_next_inst0.mem_writer__axi_writer_req_rdy; + + assign {padding_write_req_address, padding_write_req_length} = mem_writer.__mem_writer__MemWriterInst__MemWriter_0__MemWriterInternal_0__16_32_4_4_4_2_next_inst0.mem_writer__padding_req_data; + assign padding_write_req_valid = mem_writer.__mem_writer__MemWriterInst__MemWriter_0__MemWriterInternal_0__16_32_4_4_4_2_next_inst0.mem_writer__padding_req_vld; + assign padding_write_req_ready = mem_writer.__mem_writer__MemWriterInst__MemWriter_0__MemWriterInternal_0__16_32_4_4_4_2_next_inst0.mem_writer__padding_req_rdy; + + assign { axi_stream_raw_tdata, + axi_stream_raw_tstr, + axi_stream_raw_tkeep, + axi_stream_raw_tid, + axi_stream_raw_tdest, + axi_stream_raw_tlast} = mem_writer.__mem_writer__MemWriterInst__MemWriter_0__MemWriterInternal_0__16_32_4_4_4_2_next_inst0.mem_writer__axi_st_raw_data; + assign axi_stream_raw_tvalid = mem_writer.__mem_writer__MemWriterInst__MemWriter_0__MemWriterInternal_0__16_32_4_4_4_2_next_inst0.mem_writer__axi_st_raw_vld; + assign axi_stream_raw_tready = mem_writer.__mem_writer__MemWriterInst__MemWriter_0__MemWriterInternal_0__16_32_4_4_4_2_next_inst0.mem_writer__axi_st_raw_rdy; + + assign { axi_stream_clean_tdata, + axi_stream_clean_tstr, + axi_stream_clean_tkeep, + axi_stream_clean_tid, + axi_stream_clean_tdest, + axi_stream_clean_tlast} = mem_writer.__xls_modules_zstd_memory_axi_stream_add_empty__MemWriterInst__MemWriter_0__AxiStreamAddEmpty_0__16_32_4_2_4_4_next_inst1.mem_writer__axi_st_clean_data; + assign axi_stream_clean_tvalid = mem_writer.__xls_modules_zstd_memory_axi_stream_add_empty__MemWriterInst__MemWriter_0__AxiStreamAddEmpty_0__16_32_4_2_4_4_next_inst1.mem_writer__axi_st_clean_vld; + assign axi_stream_clean_tready = mem_writer.__xls_modules_zstd_memory_axi_stream_add_empty__MemWriterInst__MemWriter_0__AxiStreamAddEmpty_0__16_32_4_2_4_4_next_inst1.mem_writer__axi_st_clean_rdy; + + assign { axi_stream_padded_tdata, + axi_stream_padded_tstr, + axi_stream_padded_tkeep, + axi_stream_padded_tid, + axi_stream_padded_tdest, + axi_stream_padded_tlast} = mem_writer.__xls_modules_zstd_memory_axi_writer__MemWriterInst__MemWriter_0__AxiWriter_0__16_32_4_4_4_2_next_inst4.mem_writer__axi_st_padded_data; + assign axi_stream_padded_tvalid = mem_writer.__xls_modules_zstd_memory_axi_writer__MemWriterInst__MemWriter_0__AxiWriter_0__16_32_4_4_4_2_next_inst4.mem_writer__axi_st_padded_vld; + assign axi_stream_padded_tready = mem_writer.__xls_modules_zstd_memory_axi_writer__MemWriterInst__MemWriter_0__AxiWriter_0__16_32_4_4_4_2_next_inst4.mem_writer__axi_st_padded_rdy; + + mem_writer mem_writer ( + .clk(clk), + .rst(rst), + + // MemWriter Write Request + .mem_writer__req_in_r_data(req_data), + .mem_writer__req_in_r_vld (req_vld), + .mem_writer__req_in_r_rdy (req_rdy), + + // Data to write + .mem_writer__data_in_r_data(data_in_data), + .mem_writer__data_in_r_vld (data_in_vld), + .mem_writer__data_in_r_rdy (data_in_rdy), + + // Response channel + .mem_writer__resp_s_data(resp_data), + .mem_writer__resp_s_rdy (resp_rdy), + .mem_writer__resp_s_vld (resp_vld), + + // Memory AXI + .mem_writer__axi_w_s_data(axi_w_data), + .mem_writer__axi_w_s_vld (axi_w_vld), + .mem_writer__axi_w_s_rdy (axi_w_rdy), + + .mem_writer__axi_aw_s_data(axi_aw_data), + .mem_writer__axi_aw_s_vld (axi_aw_vld), + .mem_writer__axi_aw_s_rdy (axi_aw_rdy), + + .mem_writer__axi_b_r_data(axi_b_data), + .mem_writer__axi_b_r_vld (axi_b_vld), + .mem_writer__axi_b_r_rdy (axi_b_rdy) + ); + +endmodule : mem_writer_wrapper diff --git a/xls/modules/zstd/raw_block_dec.x b/xls/modules/zstd/raw_block_dec.x index a3656011b0..669b66d5b1 100644 --- a/xls/modules/zstd/raw_block_dec.x +++ b/xls/modules/zstd/raw_block_dec.x @@ -17,6 +17,7 @@ // https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.2.2 import xls.modules.zstd.common as common; +import xls.modules.zstd.memory.mem_reader as mem_reader; type BlockDataPacket = common::BlockDataPacket; type BlockPacketLength = common::BlockPacketLength; @@ -26,92 +27,289 @@ type CopyOrMatchContent = common::CopyOrMatchContent; type CopyOrMatchLength = common::CopyOrMatchLength; type SequenceExecutorMessageType = common::SequenceExecutorMessageType; -struct RawBlockDecoderState { - prev_id: u32, // ID of the previous block - prev_last: bool, // if the previous packet was the last one that makes up the whole block - prev_valid: bool, // if prev_id and prev_last contain valid data +pub struct RawBlockDecoderReq { + id: u32, + addr: uN[ADDR_W], + length: uN[ADDR_W], + last_block: bool, } -const ZERO_RAW_BLOCK_DECODER_STATE = zero!(); +pub enum RawBlockDecoderStatus: u1 { + OKAY = 0, + ERROR = 1, +} + +pub struct RawBlockDecoderResp { + status: RawBlockDecoderStatus, +} + +struct RawBlockDecoderState { + id: u32, // ID of the block + last_block: bool, // if the block is the last one +} // RawBlockDecoder is responsible for decoding Raw Blocks, // it should be a part of the ZSTD Decoder pipeline. -pub proc RawBlockDecoder { - input_r: chan in; - output_s: chan out; +pub proc RawBlockDecoder { + type Req = RawBlockDecoderReq; + type Resp = RawBlockDecoderResp; + type Output = ExtendedBlockDataPacket; + type Status = RawBlockDecoderStatus; + + type MemReaderReq = mem_reader::MemReaderReq; + type MemReaderResp = mem_reader::MemReaderResp; + type MemReaderStatus = mem_reader::MemReaderStatus; + + type State = RawBlockDecoderState; - init { (ZERO_RAW_BLOCK_DECODER_STATE) } + // decoder input + req_r: chan in; + resp_s: chan out; + + // decoder output + output_s: chan out; + + // memory interface + mem_req_s: chan out; + mem_resp_r: chan in; + + init { zero!() } config( - input_r: chan in, - output_s: chan out - ) {(input_r, output_s)} + req_r: chan in, + resp_s: chan out, + output_s: chan out, - next(state: RawBlockDecoderState) { - let tok = join(); - let (tok, data) = recv(tok, input_r); - if state.prev_valid && (data.id != state.prev_id) && (state.prev_last == false) { - trace_fmt!("ID changed but previous packet have no last!"); - fail!("no_last", ()); - } else {}; - - let output_data = ExtendedBlockDataPacket { - // Decoded RAW block is always a literal + mem_req_s: chan out, + mem_resp_r: chan in, + ) { + ( + req_r, resp_s, output_s, + mem_req_s, mem_resp_r, + ) + } + + next(state: State) { + let tok0 = join(); + + // receive request + let (tok1_0, req, req_valid) = recv_non_blocking(tok0, req_r, zero!>()); + + // update ID and last in state + let state = if req_valid { + State { id: req.id, last_block: req.last_block} + } else { state }; + + // send memory read request + let req = MemReaderReq { addr: req.addr, length: req.length }; + let tok2_0 = send_if(tok1_0, mem_req_s, req_valid, req); + + // receive memory read response + let (tok1_1, mem_resp, mem_resp_valid) = recv_non_blocking(tok0, mem_resp_r, zero!()); + let mem_resp_error = (mem_resp.status != MemReaderStatus::OKAY); + + // prepare output data, decoded RAW block is always a literal + let output_data = Output { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { - last: data.last, - last_block: data.last_block, - id: data.id, - data: data.data as BlockData, - length: data.length as BlockPacketLength, + last: mem_resp.last, + last_block: state.last_block, + id: state.id, + data: checked_cast(mem_resp.data), + length: checked_cast(mem_resp.length ++ u3:0), }, }; - let tok = send(tok, output_s, output_data); + // send output data + let mem_resp_correct = mem_resp_valid && !mem_resp_error; + let tok2_1 = send_if(tok1_1, output_s, mem_resp_correct, output_data); + + // send response after block end + let resp = if mem_resp_correct { + Resp { status: Status::OKAY } + } else { + Resp { status: Status::ERROR } + }; + + let do_send_resp = mem_resp_valid && mem_resp.last; + let tok2_2 = send_if(tok1_1, resp_s, do_send_resp, resp); - RawBlockDecoderState { - prev_valid: true, - prev_id: output_data.packet.id, - prev_last: output_data.packet.last - } + state } } +const INST_DATA_W = u32:32; +const INST_ADDR_W = u32:32; + +pub proc RawBlockDecoderInst { + type Req = RawBlockDecoderReq; + type Resp = RawBlockDecoderResp; + type Output = ExtendedBlockDataPacket; + + type MemReaderReq = mem_reader::MemReaderReq; + type MemReaderResp = mem_reader::MemReaderResp; + + config ( + req_r: chan in, + resp_s: chan out, + output_s: chan out, + mem_req_s: chan out, + mem_resp_r: chan in, + ) { + spawn RawBlockDecoder( + req_r, resp_s, output_s, mem_req_s, mem_resp_r + ); + } + + init { } + + next (state: ()) { } +} + +const TEST_DATA_W = u32:64; +const TEST_ADDR_W = u32:32; + #[test_proc] proc RawBlockDecoderTest { + type Req = RawBlockDecoderReq; + type Resp = RawBlockDecoderResp; + type Output = ExtendedBlockDataPacket; + + type MemReaderReq = mem_reader::MemReaderReq; + type MemReaderResp = mem_reader::MemReaderResp; + + type Data = uN[TEST_DATA_W]; + type Addr = uN[TEST_ADDR_W]; + type Length = uN[TEST_ADDR_W]; + terminator: chan out; - dec_input_s: chan out; - dec_output_r: chan in; + + req_s: chan out; + resp_r: chan in; + output_r: chan in; + + mem_req_r: chan in; + mem_resp_s: chan out; config(terminator: chan out) { - let (dec_input_s, dec_input_r) = chan("dec_input"); - let (dec_output_s, dec_output_r) = chan("dec_output"); - spawn RawBlockDecoder(dec_input_r, dec_output_s); - (terminator, dec_input_s, dec_output_r) + let (req_s, req_r) = chan("req"); + let (resp_s, resp_r) = chan("resp"); + let (output_s, output_r) = chan("output"); + + let (mem_req_s, mem_req_r) = chan("mem_req"); + let (mem_resp_s, mem_resp_r) = chan("mem_resp"); + + spawn RawBlockDecoder( + req_r, resp_s, output_s, mem_req_s, mem_resp_r + ); + + (terminator, req_s, resp_r, output_r, mem_req_r, mem_resp_s) } init { } next(state: ()) { + let tok = join(); - let data_to_send: BlockDataPacket[5] = [ - BlockDataPacket { id: u32:1, last: u1:false, last_block: u1:false, data: BlockData:1, length: BlockPacketLength:32 }, - BlockDataPacket { id: u32:1, last: u1:false, last_block: u1:false, data: BlockData:2, length: BlockPacketLength:32 }, - BlockDataPacket { id: u32:1, last: u1:true, last_block: u1:false, data: BlockData:3, length: BlockPacketLength:32 }, - BlockDataPacket { id: u32:2, last: u1:false, last_block: u1:false, data: BlockData:4, length: BlockPacketLength:32 }, - BlockDataPacket { id: u32:2, last: u1:true, last_block: u1:true, data: BlockData:5, length: BlockPacketLength:32 }, - ]; - - let tok = for ((_, data), tok): ((u32, BlockDataPacket), token) in enumerate(data_to_send) { - let tok = send(tok, dec_input_s, data); - let (tok, received_data) = recv(tok, dec_output_r); - let expected_data = ExtendedBlockDataPacket { - msg_type: SequenceExecutorMessageType::LITERAL, - packet: data, - }; - assert_eq(expected_data, received_data); - (tok) - }(tok); + + // Test 0 + let req = Req { id: u32:0, last_block: false, addr: Addr:0, length: Length:8 }; + let tok = send(tok, req_s, req); + + let (tok, mem_req) = recv(tok, mem_req_r); + assert_eq(mem_req, MemReaderReq { addr: Addr:0, length: Length:8 }); + + let mem_resp = MemReaderResp { + status: mem_reader::MemReaderStatus::OKAY, + data: Data:0x1122_3344, + length: Length:8, + last: true, + }; + let tok = send(tok, mem_resp_s, mem_resp); + let (tok, output) = recv(tok, output_r); + assert_eq(output, Output { + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: true, + last_block: false, + id: u32:0, + data: Data:0x1122_3344, + length: Length:64, + }, + }); + + // Test 1 + let req = Req { id: u32:1, last_block: true, addr: Addr:0x1001, length: Length:15 }; + let tok = send(tok, req_s, req); + + let (tok, mem_req) = recv(tok, mem_req_r); + assert_eq(mem_req, MemReaderReq { addr: Addr:0x1001, length: Length:15 }); + + let mem_resp = MemReaderResp { + status: mem_reader::MemReaderStatus::OKAY, + data: Data:0x1122_3344_5566_7788, + length: Length:8, + last: false + }; + let tok = send(tok, mem_resp_s, mem_resp); + + let mem_resp = MemReaderResp { + status: mem_reader::MemReaderStatus::OKAY, + data: Data:0xAA_BBCC_DDEE_FF99, + length: Length:7, + last: true, + }; + let tok = send(tok, mem_resp_s, mem_resp); + + let (tok, output) = recv(tok, output_r); + assert_eq(output, Output { + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: false, + last_block: true, + id: u32:1, + data: Data:0x1122_3344_5566_7788, + length: Length:64, + }, + }); + + let (tok, output) = recv(tok, output_r); + assert_eq(output, Output { + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: true, + last_block: true, + id: u32:1, + data: Data:0xAA_BBCC_DDEE_FF99, + length: Length:56, + }, + }); + + // Test 2 + let req = Req {id: u32:2, last_block: false, addr: Addr:0x2000, length: Length:0 }; + let tok = send(tok, req_s, req); + + let (tok, mem_req) = recv(tok, mem_req_r); + assert_eq(mem_req, MemReaderReq { addr: Addr:0x2000, length: Length:0 }); + + let mem_resp = MemReaderResp { + status: mem_reader::MemReaderStatus::OKAY, + data: Data:0x0, + length: Length:0, + last: true, + }; + let tok = send(tok, mem_resp_s, mem_resp); + let (tok, output) = recv(tok, output_r); + assert_eq(output, Output { + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: true, + last_block: false, + id: u32:2, + data: Data:0x0, + length: Length:0, + }, + }); send(tok, terminator, true); } diff --git a/xls/modules/zstd/repacketizer.x b/xls/modules/zstd/repacketizer.x deleted file mode 100644 index f2abd638d1..0000000000 --- a/xls/modules/zstd/repacketizer.x +++ /dev/null @@ -1,215 +0,0 @@ -// Copyright 2024 The XLS Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// Repacketizer -// -// Remove invalid bytes from input packets, -// form new packets with all bits valid if possible. - -import std; -import xls.modules.zstd.common as common; - -type ZstdDecodedPacket = common::ZstdDecodedPacket; -type BlockData = common::BlockData; -type BlockPacketLength = common::BlockPacketLength; - -const DATA_WIDTH = common::DATA_WIDTH; - -struct RepacketizerState { - repacked_data: BlockData, - valid_length: BlockPacketLength, - to_fill: BlockPacketLength, - send_last_leftover: bool -} - -const ZERO_ZSTD_DECODED_PACKET = zero!(); -const ZERO_REPACKETIZER_STATE = zero!(); -const INIT_REPACKETIZER_STATE = RepacketizerState {to_fill: DATA_WIDTH, ..ZERO_REPACKETIZER_STATE}; - -pub proc Repacketizer { - input_r: chan in; - output_s: chan out; - - init {(INIT_REPACKETIZER_STATE)} - - config ( - input_r: chan in, - output_s: chan out, - ) { - (input_r, output_s) - } - - next (state: RepacketizerState) { - let tok = join(); - // Don't receive if we process leftovers - let (tok, decoded_packet) = recv_if(tok, input_r, !state.send_last_leftover, ZERO_ZSTD_DECODED_PACKET); - - // Will be able to send repacketized packet in current next() evaluation - let send_now = state.to_fill <= decoded_packet.length || decoded_packet.last || state.send_last_leftover; - // Received last packet in frame which won't fit into currently processed repacketized packet. - // Set flag indicating that Repacketizer will send another packet to finish the frame in - // next evaluation. - let next_send_last_leftover = decoded_packet.last && state.to_fill < decoded_packet.length; - - let combined_length = state.valid_length + decoded_packet.length; - let leftover_length = (combined_length - DATA_WIDTH) as s32; - let next_valid_length = if leftover_length >= s32:0 {leftover_length as BlockPacketLength} else {combined_length}; - let next_to_fill = DATA_WIDTH - next_valid_length; - - let current_valid_length = if leftover_length >= s32:0 {DATA_WIDTH} else {combined_length}; - let bits_to_take_length = if leftover_length >= s32:0 {state.to_fill} else {decoded_packet.length}; - - // Append lest signifiant bits of received packet to most significant positions of repacked data buffer - let masked_data = ((BlockData:1 << bits_to_take_length) - BlockData:1) & decoded_packet.data; - let repacked_data = state.repacked_data | (masked_data << state.valid_length); - - // Prepare buffer state for the next evaluation - take leftover most significant bits of - // received packet - let leftover_mask = (BlockData:1 << (decoded_packet.length - bits_to_take_length)) - BlockData:1; - let leftover_masked_data = (decoded_packet.data >> bits_to_take_length) & leftover_mask; - let next_repacked_data = if (send_now) {leftover_masked_data} else {repacked_data}; - - let packet_to_send = ZstdDecodedPacket { - data: repacked_data, - length: current_valid_length, - last: state.send_last_leftover || (decoded_packet.last && !next_send_last_leftover), - }; - let tok = send_if(tok, output_s, send_now, packet_to_send); - - let next_state = if (state.send_last_leftover || (decoded_packet.last && !next_send_last_leftover)) { - INIT_REPACKETIZER_STATE - } else { - RepacketizerState { - repacked_data: next_repacked_data, - valid_length: next_valid_length, - to_fill: next_to_fill, - send_last_leftover: next_send_last_leftover, - } - }; - - trace_fmt!("Repacketizer: state: {:#x}", state); - if (!state.send_last_leftover) { - trace_fmt!("Repacketizer: Received packet: {:#x}", decoded_packet); - } else {}; - trace_fmt!("Repacketizer: send_now: {}", send_now); - trace_fmt!("Repacketizer: next_send_last_leftover: {}", next_send_last_leftover); - trace_fmt!("Repacketizer: combined_length: {}", combined_length); - trace_fmt!("Repacketizer: leftover_length: {}", leftover_length); - trace_fmt!("Repacketizer: next_valid_length: {}", next_valid_length); - trace_fmt!("Repacketizer: next_to_fill: {}", next_to_fill); - trace_fmt!("Repacketizer: current_valid_length: {}", current_valid_length); - trace_fmt!("Repacketizer: bits_to_take_length: {}", bits_to_take_length); - trace_fmt!("Repacketizer: masked_data: {:#x}", masked_data); - trace_fmt!("Repacketizer: repacked_data: {:#x}", repacked_data); - trace_fmt!("Repacketizer: leftover_mask: {:#x}", leftover_mask); - trace_fmt!("Repacketizer: leftover_masked_data: {:#x}", leftover_masked_data); - trace_fmt!("Repacketizer: next_repacked_data: {:#x}", next_repacked_data); - if (send_now) { - trace_fmt!("Repacketizer: Sent repacketized packet: {:#x}", packet_to_send); - } else {}; - trace_fmt!("Repacketizer: next_state: {:#x}", next_state); - - next_state - } -} - -#[test_proc] -proc RepacketizerTest { - terminator: chan out; - input_s: chan out; - output_r: chan in; - - init {} - - config (terminator: chan out) { - let (input_s, input_r) = chan("input"); - let (output_s, output_r) = chan("output"); - - spawn Repacketizer(input_r, output_s); - (terminator, input_s, output_r) - } - - next(state: ()) { - let tok = join(); - let DecodedInputs: ZstdDecodedPacket[24] = [ - // Full packet - no need for removing alignment zeros - ZstdDecodedPacket {data: BlockData:0xDEADBEEF12345678, length: BlockPacketLength:64, last:false}, - // Data in 4 packets - should be batched together into one full output packet - ZstdDecodedPacket {data: BlockData:0x78, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0x56, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0x1234, length: BlockPacketLength:16, last:false}, - ZstdDecodedPacket {data: BlockData:0xDEADBEEF, length: BlockPacketLength:32, last:false}, - // Small last packet - should be send out separatelly - ZstdDecodedPacket {data: BlockData:0x9A, length: BlockPacketLength:8, last:true}, - // One not-full packet and consecutive last packet packet in frame which completes previous packet and - // starts new one which should be marked as last - ZstdDecodedPacket {data: BlockData:0xADBEEF12345678, length: BlockPacketLength:56, last:false}, - ZstdDecodedPacket {data: BlockData:0x9ADE, length: BlockPacketLength:16, last:true}, - // 8 1-byte packets forming single output packet - ZstdDecodedPacket {data: BlockData:0xEF, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0xCD, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0xAB, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0x89, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0x67, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0x45, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0x23, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0x01, length: BlockPacketLength:8, last:false}, - // 7 1-byte packets and 1 8-byte packet forming 1 full and 1 7-byte output packet - // marked as last - ZstdDecodedPacket {data: BlockData:0xEF, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0xCD, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0xAB, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0x89, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0x67, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0x45, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0x23, length: BlockPacketLength:8, last:false}, - ZstdDecodedPacket {data: BlockData:0xFEDCBA9876543201, length: BlockPacketLength:64, last:true}, - ]; - - let DecodedOutputs: ZstdDecodedPacket[8] = [ - // Full packet - no need for removing alignment zeros - ZstdDecodedPacket {data: BlockData:0xDEADBEEF12345678, length: BlockPacketLength:64, last:false}, - // Data in 4 packets - should be batched together into one full output packet - ZstdDecodedPacket {data: BlockData:0xDEADBEEF12345678, length: BlockPacketLength:64, last:false}, - // Small last packet - should be send out separatelly - ZstdDecodedPacket {data: BlockData:0x9A, length: BlockPacketLength:8, last:true}, - // One not-full packet and consecutive last packet packet in frame which completes previous packet and - // starts new one which should be marked as last - ZstdDecodedPacket {data: BlockData:0xDEADBEEF12345678, length: BlockPacketLength:64, last:false}, - ZstdDecodedPacket {data: BlockData:0x9A, length: BlockPacketLength:8, last:true}, - // 8 1-byte packets forming single output packet - ZstdDecodedPacket {data: BlockData:0x0123456789ABCDEF, length: BlockPacketLength:64, last:false}, - // 7 1-byte packets and 1 8-byte packet forming 1 full and 1 7-byte output packet - // marked as last - ZstdDecodedPacket {data: BlockData:0x0123456789ABCDEF, length: BlockPacketLength:64, last:false}, - ZstdDecodedPacket {data: BlockData:0xFEDCBA98765432, length: BlockPacketLength:56, last:true}, - ]; - - let tok = for ((counter, decoded_input), tok): ((u32, ZstdDecodedPacket), token) in enumerate(DecodedInputs) { - let tok = send(tok, input_s, decoded_input); - trace_fmt!("Sent #{} decoded zero-filled packet, {:#x}", counter + u32:1, decoded_input); - (tok) - } (tok); - - let tok = for ((counter, expected_output), tok): ((u32, ZstdDecodedPacket), token) in enumerate(DecodedOutputs) { - let (tok, decoded_output) = recv(tok, output_r); - trace_fmt!("Received #{} decoded non-zero-filled packet, {:#x}", counter + u32:1, decoded_output); - trace_fmt!("Expected #{} decoded non-zero-filled packet, {:#x}", counter + u32:1, expected_output); - assert_eq(decoded_output, expected_output); - (tok) - } (tok); - - send(tok, terminator, true); - } -} diff --git a/xls/modules/zstd/rle_block_dec.x b/xls/modules/zstd/rle_block_dec.x index 232d9a6381..c5529c978b 100644 --- a/xls/modules/zstd/rle_block_dec.x +++ b/xls/modules/zstd/rle_block_dec.x @@ -12,44 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -// This file contains the implementation of RleBlockDecoder responsible for decoding -// ZSTD RLE Blocks. More Information about Rle Block's format can be found in: -// https://datatracker.ietf.org/doc/html/rfc8878#section-3.1.1.2.2 -// -// The implementation consist of 3 procs: -// * RleDataPacker -// * RunLengthDecoder -// * BatchPacker -// Connections between those is represented on the diagram below: -// -// RleBlockDecoder -// ┌─────────────────────────────────────────────────────────────┐ -// │ RleDataPacker RunLengthDecoder BatchPacker │ -// │ ┌───────────────┐ ┌──────────────────┐ ┌─────────────┐ │ -// ───┼─►│ ├──►│ ├──►│ ├─┼──► -// │ └───────┬───────┘ └──────────────────┘ └─────────────┘ │ -// │ │ ▲ │ -// │ │ SynchronizationData │ │ -// │ └─────────────────────────────────────────┘ │ -// └─────────────────────────────────────────────────────────────┘ -// -// RleDataPacker is responsible for receiving the incoming packets of block data, converting -// those to format accepted by RunLengthDecoder and passing the data to the actual decoder block. -// It also extracts from the input packets the synchronization data like block_id and last_block -// and then passes those to BatchPacker proc. -// RunLengthDecoder decodes RLE blocks and outputs one symbol for each transaction on output -// channel. -// BatchPacker then gathers those symbols into packets, appends synchronization data received from -// RleDataPacker and passes such packets to the output of the RleBlockDecoder. +import std; import xls.modules.zstd.common; -import xls.modules.rle.rle_dec; -import xls.modules.rle.rle_common; -const SYMBOL_WIDTH = common::SYMBOL_WIDTH; -const BLOCK_SIZE_WIDTH = common::BLOCK_SIZE_WIDTH; -const DATA_WIDTH = common::DATA_WIDTH; -const BATCH_SIZE = DATA_WIDTH / SYMBOL_WIDTH; type BlockDataPacket = common::BlockDataPacket; type BlockPacketLength = common::BlockPacketLength; @@ -61,696 +27,244 @@ type CopyOrMatchContent = common::CopyOrMatchContent; type CopyOrMatchLength = common::CopyOrMatchLength; type SequenceExecutorMessageType = common::SequenceExecutorMessageType; -type RleInput = rle_common::CompressedData; -type RleOutput = rle_common::PlainData; -type Symbol = bits[SYMBOL_WIDTH]; -type SymbolCount = BlockSize; -struct BlockSyncData { - last_block: bool, - count: SymbolCount, - id: u32 +pub enum RleBlockDecoderStatus: u1 { + OKAY = 0, } -proc RleDataPacker { - block_data_r: chan in; - rle_data_s: chan out; - sync_s: chan out; - - config( - block_data_r: chan in, - rle_data_s: chan out, - sync_s: chan out - ) { - (block_data_r, rle_data_s, sync_s) - } - - init { } - - next(state: ()) { - let tok = join(); - let (tok, input) = recv(tok, block_data_r); - let rle_dec_data = RleInput { - symbol: input.data as Symbol, count: input.length as SymbolCount, last: true - }; - // send RLE packet for decoding unless it has symbol count == 0 - let send_always = rle_dec_data.count != SymbolCount:0; - let data_tok = send_if(tok, rle_data_s, send_always, rle_dec_data); - let sync_data = BlockSyncData { last_block: input.last_block, count: rle_dec_data.count, id: input.id }; - // send last block packet even if it has symbol count == 0 - let sync_tok = send(data_tok, sync_s, sync_data); - } +pub struct RleBlockDecoderReq { + id: u32, + symbol: u8, + length: BlockSize, + last_block: bool, } -type RleTestVector = (Symbol, SymbolCount); - -#[test_proc] -proc RleDataPacker_test { - terminator: chan out; - in_s: chan out; - out_r: chan in; - sync_r: chan in; - - config(terminator: chan out) { - let (in_s, in_r) = chan("in"); - let (out_s, out_r) = chan("out"); - let (sync_s, sync_r) = chan("sync"); - - spawn RleDataPacker(in_r, out_s, sync_s); - - (terminator, in_s, out_r, sync_r) - } - - init { } - - next(state: ()) { - let tok = join(); - let EncodedRleBlocks: RleTestVector[6] = [ - (Symbol:0x1, SymbolCount:0x1), - (Symbol:0x2, SymbolCount:0x2), - (Symbol:0x3, SymbolCount:0x4), - (Symbol:0x4, SymbolCount:0x8), - (Symbol:0x5, SymbolCount:0x10), - (Symbol:0x6, SymbolCount:0x1F), - ]; - let tok = for ((counter, block), tok): ((u32, RleTestVector), token) in enumerate(EncodedRleBlocks) { - let last_block = (counter == (array_size(EncodedRleBlocks) - u32:1)); - let data_in = BlockDataPacket { - last: true, - last_block, - id: counter, - data: block.0 as BlockData, - length: block.1 as BlockPacketLength - }; - let tok = send(tok, in_s, data_in); - trace_fmt!("Sent #{} raw encoded block, {:#x}", counter + u32:1, data_in); - - let data_out = RleInput { - last: true, symbol: block.0 as Symbol, count: block.1 as BlockSize - }; - let (tok, dec_output) = recv(tok, out_r); - trace_fmt!("Received #{} packed rle encoded block, {:#x}", counter + u32:1, dec_output); - assert_eq(dec_output, data_out); - - let sync_out = BlockSyncData { - id: counter, - count: block.1, - last_block: counter == (array_size(EncodedRleBlocks) - u32:1), - }; - let (tok, sync_output) = recv(tok, sync_r); - trace_fmt!("Received #{} synchronization data, {:#x}", counter + u32:1, sync_output); - assert_eq(sync_output, sync_out); - (tok) - }(tok); - send(tok, terminator, true); - } +pub struct RleBlockDecoderResp { + status: RleBlockDecoderStatus } -#[test_proc] -proc RleDataPacker_empty_blocks_test { - terminator: chan out; - in_s: chan out; - out_r: chan in; - sync_r: chan in; - - config(terminator: chan out) { - let (in_s, in_r) = chan("in"); - let (out_s, out_r) = chan("out"); - let (sync_s, sync_r) = chan("sync"); - - spawn RleDataPacker(in_r, out_s, sync_s); - - (terminator, in_s, out_r, sync_r) - } - - init { } - - next(state: ()) { - let tok = join(); - let EncodedRleBlocks: RleTestVector[8] = [ - (Symbol:0xFF, SymbolCount:0x0), - (Symbol:0x1, SymbolCount:0x1), - (Symbol:0xFF, SymbolCount:0x0), - (Symbol:0x3, SymbolCount:0x4), - (Symbol:0xFF, SymbolCount:0x0), - (Symbol:0x5, SymbolCount:0x10), - (Symbol:0xFF, SymbolCount:0x0), - (Symbol:0xFF, SymbolCount:0x0), - ]; - let tok = for ((counter, block), tok): ((u32, RleTestVector), token) in enumerate(EncodedRleBlocks) { - let last_block = (counter == (array_size(EncodedRleBlocks) - u32:1)); - let data_in = BlockDataPacket { - last: true, - last_block, - id: counter, - data: block.0 as BlockData, - length: block.1 as BlockPacketLength - }; - let tok = send(tok, in_s, data_in); - trace_fmt!("Sent #{} raw encoded block, {:#x}", counter + u32:1, data_in); - (tok) - }(tok); - - let RleInputs: RleInput[3] = [ - RleInput {last: true, symbol: Symbol:0x1, count: BlockSize:0x1}, - RleInput {last: true, symbol: Symbol:0x3, count: BlockSize:0x4}, - RleInput {last: true, symbol: Symbol:0x5, count: BlockSize:0x10}, - ]; - let tok = for ((counter, rle_in), tok): ((u32, RleInput), token) in enumerate(RleInputs) { - let (tok, dec_output) = recv(tok, out_r); - trace_fmt!("Received #{} packed rle encoded block, {:#x}", counter + u32:1, dec_output); - assert_eq(dec_output, rle_in); - (tok) - }(tok); - - let BlockSyncDataInputs: BlockSyncData[8] = [ - BlockSyncData { id: 0, count: BlockSize:0x0, last_block: false }, - BlockSyncData { id: 1, count: BlockSize:0x1, last_block: false }, - BlockSyncData { id: 2, count: BlockSize:0x0, last_block: false }, - BlockSyncData { id: 3, count: BlockSize:0x4, last_block: false }, - BlockSyncData { id: 4, count: BlockSize:0x0, last_block: false }, - BlockSyncData { id: 5, count: BlockSize:0x10, last_block: false }, - BlockSyncData { id: 6, count: BlockSize:0x0, last_block: false }, - BlockSyncData { id: 7, count: BlockSize:0x0, last_block: true }, - ]; - let tok = for ((counter, sync_data), tok): ((u32, BlockSyncData), token) in enumerate(BlockSyncDataInputs) { - let (tok, sync_output) = recv(tok, sync_r); - trace_fmt!("Received #{} synchronization data, {:#x}", counter + u32:1, sync_output); - assert_eq(sync_output, sync_data); - (tok) - }(tok); - send(tok, terminator, true); - } +struct RleBlockDecoderState { + req: RleBlockDecoderReq, + req_valid: bool, } -struct BatchPackerState { - batch: BlockData, - symbols_in_batch: BlockPacketLength, - symbols_in_block: BlockPacketLength, - prev_last: bool, - prev_sync: BlockSyncData, -} +pub proc RleBlockDecoder { + type Req = RleBlockDecoderReq; + type Resp = RleBlockDecoderResp; + type Output = ExtendedBlockDataPacket; -const ZERO_BATCH_STATE = zero!(); -const ZERO_BLOCK_SYNC_DATA = zero!(); -const ZERO_RLE_OUTPUT = zero!(); -const EMPTY_RLE_OUTPUT = RleOutput {last: true, ..ZERO_RLE_OUTPUT}; + type State = RleBlockDecoderState; -proc BatchPacker { - rle_data_r: chan in; - sync_r: chan in; - block_data_s: chan out; + req_r: chan in; + resp_s: chan out; + output_s: chan out; - config( - rle_data_r: chan in, - sync_r: chan in, - block_data_s: chan out - ) { - (rle_data_r, sync_r, block_data_s) - } + config( req_r: chan in, + resp_s: chan out, + output_s: chan out, + ) { (req_r, resp_s, output_s) } - // Init the state to signal new batch to process - init { (BatchPackerState { prev_last: true, ..ZERO_BATCH_STATE }) } + init { zero!() } - next(state: BatchPackerState) { - let tok = join(); - trace_fmt!("start state: {:#x}", state); - let prev_expected_symbols_in_block = state.prev_sync.count as BlockPacketLength; - let symbols_in_batch = state.symbols_in_batch; - let symbols_in_block = state.symbols_in_block; - let block_in_progress = (symbols_in_block != prev_expected_symbols_in_block); - trace_fmt!("block_in_progress: {:#x}", block_in_progress); - - // Finished receiving RLE data of the previous block - // Proceed with receiving sync data for the next block - let start_new_block = !block_in_progress; - let (tok, sync_data) = recv_if(tok, sync_r, start_new_block, state.prev_sync); - if (start_new_block) { - trace_fmt!("received sync_data: {:#x}", sync_data); - } else { - trace_fmt!("got sync_data from the state: {:#x}", sync_data); - }; + next(state: State) { + const MAX_OUTPUT_SYMBOLS = (DATA_W / u32:8); + const MAX_LEN = MAX_OUTPUT_SYMBOLS as uN[common::BLOCK_SIZE_WIDTH]; - let expected_symbols_in_block = if (start_new_block) { sync_data.count as BlockPacketLength } else { prev_expected_symbols_in_block }; - trace_fmt!("expected_symbols_in_block: {:#x}", expected_symbols_in_block); + let tok0 = join(); - let batch = state.batch; - let empty_block = (expected_symbols_in_block == BlockPacketLength:0); - trace_fmt!("batch: {:#x}", batch); - trace_fmt!("empty_block: {:#x}", empty_block); + let (tok1, req) = recv_if(tok0, req_r, !state.req_valid, state.req); - let do_recv_rle = !empty_block && block_in_progress; - let default_rle_output = if (empty_block) { EMPTY_RLE_OUTPUT } else { ZERO_RLE_OUTPUT }; - let (tok, decoded_data) = recv_if(tok, rle_data_r, do_recv_rle, default_rle_output); - if (do_recv_rle) { - trace_fmt!("received rle_data: {:#x}", decoded_data); - } else { - trace_fmt!("got empty rle_data: {:#x}", decoded_data); - }; + let last = req.length <= MAX_LEN; + let length = if last { req.length } else { MAX_LEN }; + let data = unroll_for! (i, data): (u32, uN[DATA_W]) in range(u32:0, MAX_OUTPUT_SYMBOLS) { + bit_slice_update(data, i * u32:8, req.symbol) + }(uN[DATA_W]:0); - let (batch, symbols_in_batch, symbols_in_block) = if (do_recv_rle) { - // TODO: Improve performance: remove variable shift - let shift = symbols_in_batch << u32:3; // multiply by 8 bits - let updated_batch = batch | ((decoded_data.symbol as BlockData) << shift); - let updated_symbols_in_batch = symbols_in_batch + BlockPacketLength:1; - let updated_symbols_in_block = symbols_in_block + BlockPacketLength:1; - (updated_batch, updated_symbols_in_batch, updated_symbols_in_block) - } else { - (batch, symbols_in_batch, symbols_in_block) - }; - trace_fmt!("updated batch: {:#x}", batch); - trace_fmt!("updated symbols_in_batch: {:#x}", symbols_in_batch); - trace_fmt!("updated symbols_in_block: {:#x}", symbols_in_block); - - let block_in_progress = (symbols_in_block != expected_symbols_in_block); - trace_fmt!("updated block_in_progress: {:#x}", block_in_progress); - - // Last should not occur when batch is still being processed - assert!(!(!block_in_progress ^ decoded_data.last), "corrupted_decoding_flow"); - - let batch_full = symbols_in_batch >= BATCH_SIZE; - trace_fmt!("batch_full: {:#x}", batch_full); - // Send decoded RLE packet when - // - batch size reached the maximal size - // - RLE block decoding is finished - // - Decoded RLE block is empty and is the last block in ZSTD frame - let last = decoded_data.last || (sync_data.last_block && empty_block); - let do_send_batch = (batch_full || last); - trace_fmt!("do_send_batch: {:#x}", do_send_batch); - - let decoded_batch_data = ExtendedBlockDataPacket { - // Decoded RLE block is always a literal + let output = Output { msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { - last: last, - last_block: sync_data.last_block, - id: sync_data.id, - data: batch as BlockData, - // length in bits - length: (symbols_in_batch << 3) as BlockPacketLength, + last, + last_block: req.last_block, + id: req.id, + data: checked_cast(data), + length: checked_cast(length << 3), } }; - let data_tok = - send_if(tok, block_data_s, do_send_batch, decoded_batch_data); - if (do_send_batch) { - trace_fmt!("sent decoded_batch_data: {:#x}", decoded_batch_data); - } else { - trace_fmt!("decoded_batch_data: {:#x}", decoded_batch_data); - }; - - let (new_batch, new_symbols_in_batch) = if (do_send_batch) { - (BlockData:0, BlockPacketLength:0) - } else { - (batch, symbols_in_batch) - }; + send_if(tok1, resp_s, last, zero!()); + send(tok1, output_s, output); - let (new_sync_data, new_symbols_in_block) = if (decoded_data.last || (sync_data.last_block && empty_block)) { - (ZERO_BLOCK_SYNC_DATA, BlockPacketLength:0) + if last { + zero!() } else { - (sync_data, symbols_in_block) - }; - - let new_state = BatchPackerState { - batch: new_batch, - symbols_in_batch: new_symbols_in_batch, - symbols_in_block: new_symbols_in_block, - prev_last: decoded_data.last, - prev_sync: new_sync_data - }; - - trace_fmt!("new_state: {:#x}", new_state); - - new_state + let length = req.length - MAX_LEN; + State { + req: Req { length, ..req }, + req_valid: true, + } + } } } -type BatchTestVector = (Symbol, bool); - -#[test_proc] -proc BatchPacker_test { - terminator: chan out; - in_s: chan out; - sync_s: chan out; - out_r: chan in; - - config(terminator: chan out) { - let (in_s, in_r) = chan("in"); - let (sync_s, sync_r) = chan("sync"); - let (out_s, out_r) = chan("out"); - spawn BatchPacker(in_r, sync_r, out_s); - - (terminator, in_s, sync_s, out_r) - } +const TEST_DATA_W = u32:64; - init { } +#[test_proc] +proc RleBlockDecoderTest { + type Req = RleBlockDecoderReq; + type Resp = RleBlockDecoderResp; + type Output = ExtendedBlockDataPacket; - next(state: ()) { - let tok = join(); - let SyncData: BlockSyncData[6] = [ - BlockSyncData { last_block: false, count: SymbolCount:1, id: u32:0 }, - BlockSyncData { last_block: false, count: SymbolCount:2, id: u32:1 }, - BlockSyncData { last_block: false, count: SymbolCount:4, id: u32:2 }, - BlockSyncData { last_block: false, count: SymbolCount:8, id: u32:3 }, - BlockSyncData { last_block: false, count: SymbolCount:16, id: u32:4 }, - BlockSyncData { last_block: true, count: SymbolCount:31, id: u32:5 }, - ]; - let tok = for ((counter, sync_data), tok): ((u32, BlockSyncData), token) in enumerate(SyncData) { - let tok = send(tok, sync_s, sync_data); - trace_fmt!("Sent #{} synchronization data, {:#x}", counter + u32:1, sync_data); - (tok) - }(tok); - - let DecodedRleBlocks: BatchTestVector[62] = [ - // 1st block - (Symbol:0x01, bool:true), - // 2nd block - (Symbol:0x02, bool:false), (Symbol:0x02, bool:true), - // 3rd block - (Symbol:0x03, bool:false), (Symbol:0x03, bool:false), (Symbol:0x03, bool:false), - (Symbol:0x03, bool:true), - // 4th block - (Symbol:0x04, bool:false), (Symbol:0x04, bool:false), (Symbol:0x04, bool:false), - (Symbol:0x04, bool:false), (Symbol:0x04, bool:false), (Symbol:0x04, bool:false), - (Symbol:0x04, bool:false), (Symbol:0x04, bool:true), - // 5th block - (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), - (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), - (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), - (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), - (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), - (Symbol:0x05, bool:true), - // 6th block - (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), - (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), - (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), - (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), - (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), - (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), - (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), - (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), - (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), - (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), (Symbol:0x06, bool:false), - (Symbol:0x06, bool:true), - ]; - let tok = for ((counter, test_data), tok): ((u32, BatchTestVector), token) in enumerate(DecodedRleBlocks) { - let symbol = test_data.0 as Symbol; - let last = test_data.1; - let data_in = RleOutput { symbol, last }; - let tok = send(tok, in_s, data_in); - trace_fmt!("Sent #{} decoded rle symbol, {:#x}", counter + u32:1, data_in); - (tok) - }(tok); - - let BatchedDecodedRleSymbols: ExtendedBlockDataPacket[10] = [ - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:0, data: BlockData:0x01, length: BlockPacketLength:8}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:1, data: BlockData:0x0202, length: BlockPacketLength:16}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:2, data: BlockData:0x03030303, length: BlockPacketLength:32}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:3, data: BlockData:0x0404040404040404, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:false, last_block: bool:false, id: u32:4, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:4, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:false, last_block: bool:true, id: u32:5, data: BlockData:0x0606060606060606, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:false, last_block: bool:true, id: u32:5, data: BlockData:0x0606060606060606, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:false, last_block: bool:true, id: u32:5, data: BlockData:0x0606060606060606, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:true, id: u32:5, data: BlockData:0x06060606060606, length: BlockPacketLength:56}}, - ]; - - let tok = for ((counter, expected), tok): ((u32, ExtendedBlockDataPacket), token) in enumerate(BatchedDecodedRleSymbols) { - let (tok, dec_output) = recv(tok, out_r); - trace_fmt!("Received #{} batched decoded rle symbols, {:#x}", counter + u32:1, dec_output); - assert_eq(dec_output, expected); - (tok) - }(tok); - send(tok, terminator, true); - } -} + type Data = uN[TEST_DATA_W]; + type Length = uN[common::BLOCK_SIZE_WIDTH]; -#[test_proc] -proc BatchPacker_empty_blocks_test { terminator: chan out; - in_s: chan out; - sync_s: chan out; - out_r: chan in; - config(terminator: chan out) { - let (in_s, in_r) = chan("in"); - let (sync_s, sync_r) = chan("sync"); - let (out_s, out_r) = chan("out"); + req_s: chan out; + resp_r: chan in; + output_r: chan in; + + config (terminator: chan out) { + let (req_s, req_r) = chan("req"); + let (resp_s, resp_r) = chan("resp"); + let (output_s, output_r) = chan("output"); - spawn BatchPacker(in_r, sync_r, out_s); + spawn RleBlockDecoder( + req_r, resp_s, output_s + ); - (terminator, in_s, sync_s, out_r) + (terminator, req_s, resp_r, output_r) } - init { } + init { } - next(state: ()) { + next (state: ()) { let tok = join(); - let SyncData: BlockSyncData[8] = [ - BlockSyncData { last_block: false, count: SymbolCount:0, id: u32:0 }, - BlockSyncData { last_block: false, count: SymbolCount:1, id: u32:1 }, - BlockSyncData { last_block: false, count: SymbolCount:0, id: u32:2 }, - BlockSyncData { last_block: false, count: SymbolCount:4, id: u32:3 }, - BlockSyncData { last_block: false, count: SymbolCount:0, id: u32:4 }, - BlockSyncData { last_block: false, count: SymbolCount:16, id: u32:5 }, - BlockSyncData { last_block: false, count: SymbolCount:0, id: u32:6 }, - BlockSyncData { last_block: true, count: SymbolCount:0, id: u32:7 }, - ]; - let tok = for ((counter, sync_data), tok): ((u32, BlockSyncData), token) in enumerate(SyncData) { - let tok = send(tok, sync_s, sync_data); - trace_fmt!("Sent #{} synchronization data, {:#x}", counter + u32:1, sync_data); - (tok) - }(tok); - - let DecodedRleBlocks: BatchTestVector[21] = [ - // 0 block - // EMPTY - // 1st block - (Symbol:0x01, bool:true), - // 2nd block - // EMPTY - // 3rd block - (Symbol:0x03, bool:false), (Symbol:0x03, bool:false), (Symbol:0x03, bool:false), - (Symbol:0x03, bool:true), - // 4th block - // EMPTY - // 5th block - (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), - (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), - (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), - (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), - (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), (Symbol:0x05, bool:false), - (Symbol:0x05, bool:true), - // 6th block - // EMPTY - // 7th block - // EMPTY - ]; - let tok = for ((counter, test_data), tok): ((u32, BatchTestVector), token) in enumerate(DecodedRleBlocks) { - let symbol = test_data.0 as Symbol; - let last = test_data.1; - let data_in = RleOutput { symbol, last }; - let tok = send(tok, in_s, data_in); - trace_fmt!("Sent #{} decoded rle symbol, {:#x}", counter + u32:1, data_in); - (tok) - }(tok); - - let BatchedDecodedRleSymbols: ExtendedBlockDataPacket[9] = [ - // 0 block - // EMPTY - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:0, data: BlockData:0x0, length: BlockPacketLength:0}}, - // 1st block - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:1, data: BlockData:0x01, length: BlockPacketLength:8}}, - // 2nd block - // EMPTY - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:2, data: BlockData:0x0, length: BlockPacketLength:0}}, - // 3rd block - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:3, data: BlockData:0x03030303, length: BlockPacketLength:32}}, - // 4th block - // EMPTY - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:4, data: BlockData:0x0, length: BlockPacketLength:0}}, - // 5th block - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:false, last_block: bool:false, id: u32:5, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:5, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, - // 6th block - // EMPTY - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:6, data: BlockData:0x0, length: BlockPacketLength:0}}, - // 7th block - // EMPTY with LAST_BLOCK - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:true, id: u32:7, data: BlockData:0x0, length: BlockPacketLength:0}}, - ]; - - let tok = for ((counter, expected), tok): ((u32, ExtendedBlockDataPacket), token) in enumerate(BatchedDecodedRleSymbols) { - let (tok, dec_output) = recv(tok, out_r); - trace_fmt!("Received #{} batched decoded rle symbols, {:#x}", counter + u32:1, dec_output); - assert_eq(dec_output, expected); - (tok) - }(tok); - send(tok, terminator, true); - } -} -pub proc RleBlockDecoder { - input_r: chan in; - output_s: chan out; + let tok = send(tok, req_s, Req { id: u32:5, symbol: u8:0xAB, length: Length:0x28, last_block: true }); + let (tok, resp) = recv(tok, resp_r); + assert_eq(resp, Resp { status: RleBlockDecoderStatus::OKAY }); - config(input_r: chan in, output_s: chan out) { - let (in_s, in_r) = chan("in"); - let (out_s, out_r) = chan("out"); - let (sync_s, sync_r) = chan("sync"); + let (tok, output) = recv(tok, output_r); + assert_eq(output, Output { + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: false, + last_block: true, + id: u32:5, + data: BlockData:0xABAB_ABAB_ABAB_ABAB, + length: BlockPacketLength:64 + } + }); - spawn RleDataPacker(input_r, in_s, sync_s); - spawn rle_dec::RunLengthDecoder( - in_r, out_s); - spawn BatchPacker(out_r, sync_r, output_s); + let (tok, output) = recv(tok, output_r); + assert_eq(output, Output { + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: false, + last_block: true, + id: u32:5, + data: BlockData:0xABAB_ABAB_ABAB_ABAB, + length: BlockPacketLength:64 + } + }); - (input_r, output_s) - } - init { } + let (tok, output) = recv(tok, output_r); + assert_eq(output, Output { + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: false, + last_block: true, + id: u32:5, + data: BlockData:0xABAB_ABAB_ABAB_ABAB, + length: BlockPacketLength:64 + } + }); - next(state: ()) { } -} -#[test_proc] -proc RleBlockDecoder_test { - terminator: chan out; - in_s: chan out; - out_r: chan in; + let (tok, output) = recv(tok, output_r); + assert_eq(output, Output { + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: false, + last_block: true, + id: u32:5, + data: BlockData:0xABAB_ABAB_ABAB_ABAB, + length: BlockPacketLength:64 + } + }); - config(terminator: chan out) { - let (in_s, in_r) = chan("in"); - let (out_s, out_r) = chan("out"); - spawn RleBlockDecoder(in_r, out_s); + let (tok, output) = recv(tok, output_r); + assert_eq(output, Output { + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: true, + last_block: true, + id: u32:5, + data: BlockData:0xABAB_ABAB_ABAB_ABAB, + length: BlockPacketLength:64 + } + }); - (terminator, in_s, out_r) - } + let tok = send(tok, req_s, Req { id: u32:1, symbol: u8:0xAB, length: Length:0, last_block: true }); + let (tok, resp) = recv(tok, resp_r); + assert_eq(resp, Resp { status: RleBlockDecoderStatus::OKAY }); - init { } + let (tok, output) = recv(tok, output_r); + assert_eq(output, Output { + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: true, + last_block: true, + id: u32:1, + data: BlockData:0xABAB_ABAB_ABAB_ABAB, + length: BlockPacketLength:0 + } + }); + + let tok = send(tok, req_s, Req { id: u32:10, symbol: u8:0xAB, length: Length:0, last_block: false }); + let (tok, resp) = recv(tok, resp_r); + assert_eq(resp, Resp { status: RleBlockDecoderStatus::OKAY }); + + let (tok, output) = recv(tok, output_r); + assert_eq(output, Output { + msg_type: SequenceExecutorMessageType::LITERAL, + packet: BlockDataPacket { + last: true, + last_block: false, + id: u32:10, + data: BlockData:0xABAB_ABAB_ABAB_ABAB, + length: BlockPacketLength:0 + } + }); - next(state: ()) { - let tok = join(); - let EncodedRleBlocks: RleTestVector[6] = [ - (Symbol:0x1, SymbolCount:0x1), - (Symbol:0x2, SymbolCount:0x2), - (Symbol:0x3, SymbolCount:0x4), - (Symbol:0x4, SymbolCount:0x8), - (Symbol:0x5, SymbolCount:0x10), - (Symbol:0x6, SymbolCount:0x1F), - ]; - let tok = for ((counter, block), tok): ((u32, RleTestVector), token) in enumerate(EncodedRleBlocks) { - let last_block = (counter == (array_size(EncodedRleBlocks) - u32:1)); - let data_in = BlockDataPacket { - last: true, // RLE block fits into single packet, each will be last for given block - last_block, - id: counter, - data: block.0 as BlockData, - length: block.1 as BlockPacketLength - }; - let tok = send(tok, in_s, data_in); - trace_fmt!("Sent #{} raw encoded block, {:#x}", counter + u32:1, data_in); - (tok) - }(tok); - - let BatchedDecodedRleSymbols: ExtendedBlockDataPacket[10] = [ - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:true, last_block: bool:false, id: u32:0, data: BlockData:0x01, length: BlockPacketLength:8}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:true, last_block: bool:false, id: u32:1, data: BlockData:0x0202, length: BlockPacketLength:16}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:true, last_block: bool:false, id: u32:2, data: BlockData:0x03030303, length: BlockPacketLength:32}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:true, last_block: bool:false, id: u32:3, data: BlockData:0x0404040404040404, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:false, last_block: bool:false, id: u32:4, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:true, last_block: bool:false, id: u32:4, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:false, last_block: bool:true, id: u32:5, data: BlockData:0x0606060606060606, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:false, last_block: bool:true, id: u32:5, data: BlockData:0x0606060606060606, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:false, last_block: bool:true, id: u32:5, data: BlockData:0x0606060606060606, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket { last: bool:true, last_block: bool:true, id: u32:5, data: BlockData:0x06060606060606, length: BlockPacketLength:56}}, - ]; - - let tok = for ((counter, expected), tok): ((u32, ExtendedBlockDataPacket), token) in enumerate(BatchedDecodedRleSymbols) { - let (tok, dec_output) = recv(tok, out_r); - trace_fmt!("Received #{} batched decoded rle symbols, {:#x}", counter + u32:1, dec_output); - assert_eq(dec_output, expected); - (tok) - }(tok); send(tok, terminator, true); } } -#[test_proc] -proc RleBlockDecoder_empty_blocks_test { - terminator: chan out; - in_s: chan out; - out_r: chan in; - config(terminator: chan out) { - let (in_s, in_r) = chan("in"); - let (out_s, out_r) = chan("out"); +const INST_DATA_W = u32:64; - spawn RleBlockDecoder(in_r, out_s); +proc RleBlockDecoderInst { + type Req = RleBlockDecoderReq; + type Resp = RleBlockDecoderResp; + type Output = ExtendedBlockDataPacket; - (terminator, in_s, out_r) + type Data = uN[INST_DATA_W]; + type Length = uN[common::BLOCK_SIZE_WIDTH]; + + config( + req_r: chan in, + resp_s: chan out, + output_s: chan out, + ) { + spawn RleBlockDecoder(req_r, resp_s, output_s); } - init { } - next(state: ()) { - let tok = join(); - let EncodedRleBlocks: RleTestVector[8] = [ - (Symbol:0xFF, SymbolCount:0x0), - (Symbol:0x1, SymbolCount:0x1), - (Symbol:0xFF, SymbolCount:0x0), - (Symbol:0x3, SymbolCount:0x4), - (Symbol:0xFF, SymbolCount:0x0), - (Symbol:0x5, SymbolCount:0x10), - (Symbol:0xFF, SymbolCount:0x0), - (Symbol:0xFF, SymbolCount:0x0), - ]; - let tok = for ((counter, block), tok): ((u32, RleTestVector), token) in enumerate(EncodedRleBlocks) { - let last_block = (counter == (array_size(EncodedRleBlocks) - u32:1)); - let data_in = BlockDataPacket { - last: true, // RLE block fits into single packet, each will be last for given block - last_block, - id: counter, - data: block.0 as BlockData, - length: block.1 as BlockPacketLength - }; - let tok = send(tok, in_s, data_in); - trace_fmt!("Sent #{} raw encoded block, {:#x}", counter + u32:1, data_in); - (tok) - }(tok); - - let BatchedDecodedRleSymbols: ExtendedBlockDataPacket[9] = [ - // 0 block - // EMPTY - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:0, data: BlockData:0x0, length: BlockPacketLength:0}}, - // 1st block - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:1, data: BlockData:0x01, length: BlockPacketLength:8}}, - // 2nd block - // EMPTY - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:2, data: BlockData:0x0, length: BlockPacketLength:0}}, - // 3rd block - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:3, data: BlockData:0x03030303, length: BlockPacketLength:32}}, - // 4th block - // EMPTY - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:4, data: BlockData:0x0, length: BlockPacketLength:0}}, - // 5th block - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:false, last_block: bool:false, id: u32:5, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:5, data: BlockData:0x0505050505050505, length: BlockPacketLength:64}}, - // 6th block - // EMPTY - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:false, id: u32:6, data: BlockData:0x0, length: BlockPacketLength:0}}, - // 7th block - // EMPTY with LAST_BLOCK - ExtendedBlockDataPacket {msg_type: SequenceExecutorMessageType::LITERAL, packet: BlockDataPacket {last: bool:true, last_block: bool:true, id: u32:7, data: BlockData:0x0, length: BlockPacketLength:0}}, - ]; - - let tok = for ((counter, expected), tok): ((u32, ExtendedBlockDataPacket), token) in enumerate(BatchedDecodedRleSymbols) { - let (tok, dec_output) = recv(tok, out_r); - trace_fmt!("Received #{} batched decoded rle symbols, {:#x}", counter + u32:1, dec_output); - assert_eq(dec_output, expected); - (tok) - }(tok); - send(tok, terminator, true); - } + init { } + + next (state: ()) {} } diff --git a/xls/modules/zstd/sequence_executor.x b/xls/modules/zstd/sequence_executor.x index a1fea91d50..6fb707141f 100644 --- a/xls/modules/zstd/sequence_executor.x +++ b/xls/modules/zstd/sequence_executor.x @@ -14,6 +14,7 @@ import std; import xls.modules.zstd.common as common; +import xls.modules.zstd.memory.mem_writer as mem_writer; import xls.modules.zstd.ram_printer as ram_printer; import xls.examples.ram; @@ -53,6 +54,8 @@ fn ram_addr_width(hb_size_kb: u32) -> u32 { std::clog2(ram_size(hb_size_kb)) } // RAM related constants common for tests const TEST_HISTORY_BUFFER_SIZE_KB = u32:1; +const TEST_DATA_W = u32:64; +const TEST_ADDR_W = u32:16; const TEST_RAM_SIZE = ram_size(TEST_HISTORY_BUFFER_SIZE_KB); const TEST_RAM_ADDR_WIDTH = ram_addr_width(TEST_HISTORY_BUFFER_SIZE_KB); pub const TEST_RAM_INITIALIZED = true; @@ -114,6 +117,36 @@ fn test_decode_literal_packet() { }) } +fn convert_output_packet(packet: ZstdDecodedPacket) -> mem_writer::MemWriterDataPacket { + type MemWriterDataPacket = mem_writer::MemWriterDataPacket; + MemWriterDataPacket { + data: packet.data as uN[DATA_W], + length: std::div_pow2(packet.length, u32:8) as uN[ADDR_W], + last: packet.last + } +} + +#[test] +fn test_convert_output_packet() { + const DATA_W = u32:64; + const ADDR_W = u32:16; + + type MemWriterDataPacket = mem_writer::MemWriterDataPacket; + + let packet = ZstdDecodedPacket { + data: CopyOrMatchContent:0xAA00BB11CC22DD33, + length: BlockPacketLength:64, + last: false + }; + let expected = MemWriterDataPacket { + data: uN[DATA_W]:0xAA00BB11CC22DD33, + length: uN[ADDR_W]:8, + last: false + }; + + assert_eq(convert_output_packet(packet), expected) +} + fn round_up_to_pow2(x: uN[N]) -> uN[N] { let base = x[Y_CLOG2 as s32:]; let reminder = x[0:Y_CLOG2 as s32] != bits[Y_CLOG2]:0; @@ -808,14 +841,17 @@ fn handle_reapeated_offset_for_sequences } pub proc SequenceExecutor { + type MemWriterDataPacket = mem_writer::MemWriterDataPacket; + input_r: chan in; - output_s: chan out; + output_mem_wr_data_in_s: chan out; ram_comp_input_s: chan> out; ram_comp_output_r: chan> in; ram_resp_input_s: chan out; @@ -839,7 +875,7 @@ pub proc SequenceExecutor in, - output_s: chan out, + output_mem_wr_data_in_s: chan out, ram_resp_output_r: chan in, ram_resp_output_s: chan out, rd_req_m0_s: chan> out, @@ -890,7 +926,7 @@ pub proc SequenceExecutor(decode_literal_packet(packet)); + if do_write_output { trace_fmt!("Sending output MemWriter data: {:#x}", output_mem_wr_data_in); } else { }; + let tok2_10_1 = send_if(tok1, output_mem_wr_data_in_s, do_write_output, output_mem_wr_data_in); // Ask for response let tok2_11 = send_if(tok1, rd_req_m0_s, (read_reqs[0]).mask != RAM_REQ_MASK_NONE, read_reqs[0]); @@ -1140,16 +1176,19 @@ pub proc SequenceExecutor; init { } config( input_r: chan in, - output_s: chan out, + output_mem_wr_data_in_s: chan out, looped_channel_r: chan in, looped_channel_s: chan out, rd_req_m0_s: chan> out, @@ -1185,8 +1224,9 @@ pub proc SequenceExecutorZstd { wr_resp_m6_r: chan in, wr_resp_m7_r: chan in ) { - spawn SequenceExecutor ( - input_r, output_s, + spawn SequenceExecutor ( + input_r, output_mem_wr_data_in_s, looped_channel_r, looped_channel_s, rd_req_m0_s, rd_req_m1_s, rd_req_m2_s, rd_req_m3_s, rd_req_m4_s, rd_req_m5_s, rd_req_m6_s, rd_req_m7_s, @@ -1298,10 +1338,11 @@ const LITERAL_TEST_MEMORY_CONTENT:(TestRamAddr, RamData)[3][RAM_NUM] = [ #[test_proc] proc SequenceExecutorLiteralsTest { + type MemWriterDataPacket = mem_writer::MemWriterDataPacket; terminator: chan out; input_s: chan> out; - output_r: chan in; + output_mem_wr_data_in_r: chan in; print_start_s: chan<()> out; print_finish_r: chan<()> in; @@ -1313,7 +1354,7 @@ proc SequenceExecutorLiteralsTest { config(terminator: chan out) { let (input_s, input_r) = chan>("input"); - let (output_s, output_r) = chan("output"); + let (output_mem_wr_data_in_s, output_mem_wr_data_in_r) = chan("output_mem_wr_data_in"); let (looped_channel_s, looped_channel_r) = chan("looped_channels"); @@ -1328,11 +1369,12 @@ proc SequenceExecutorLiteralsTest { let INIT_HB_PTR_ADDR = u32:127; spawn SequenceExecutor< TEST_HISTORY_BUFFER_SIZE_KB, + TEST_DATA_W, TEST_ADDR_W, TEST_RAM_SIZE, TEST_RAM_ADDR_WIDTH, INIT_HB_PTR_ADDR, > ( - input_r, output_s, + input_r, output_mem_wr_data_in_s, looped_channel_r, looped_channel_s, ram_rd_req_s[0], ram_rd_req_s[1], ram_rd_req_s[2], ram_rd_req_s[3], ram_rd_req_s[4], ram_rd_req_s[5], ram_rd_req_s[6], ram_rd_req_s[7], @@ -1384,7 +1426,7 @@ proc SequenceExecutorLiteralsTest { ( terminator, - input_s, output_r, + input_s, output_mem_wr_data_in_r, print_start_s, print_finish_r, ram_rd_req_s, ram_rd_resp_r, ram_wr_req_s, ram_wr_resp_r @@ -1401,9 +1443,10 @@ proc SequenceExecutorLiteralsTest { if (LITERAL_TEST_INPUT_DATA[i].msg_type != SequenceExecutorMessageType::LITERAL || LITERAL_TEST_INPUT_DATA[i].length != CopyOrMatchLength:0 || LITERAL_TEST_INPUT_DATA[i].last) { - let (tok, recv_data) = recv(tok, output_r); let expected = decode_literal_packet(LITERAL_TEST_INPUT_DATA[i]); - assert_eq(expected, recv_data); + let expected_mem_writer_data = convert_output_packet(expected); + let (tok, recv_mem_writer_data) = recv(tok, output_mem_wr_data_in_r); + assert_eq(expected_mem_writer_data, recv_mem_writer_data); } else {} }(()); @@ -1494,70 +1537,72 @@ const SEQUENCE_TEST_INPUT_SEQUENCES = SequenceExecutorPacket[11]: [ }, ]; -const SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS:ZstdDecodedPacket[11] = [ - ZstdDecodedPacket { - data: BlockData:0x8C_7E_B8_B9_7C_A3_9D_AF, - length: BlockPacketLength:64, +type TestMemWriterDataPacket = mem_writer::MemWriterDataPacket; +const SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS:TestMemWriterDataPacket[11] = [ + TestMemWriterDataPacket { + data: uN[TEST_DATA_W]:0x8C_7E_B8_B9_7C_A3_9D_AF, + length: uN[TEST_ADDR_W]:8, last: false }, - ZstdDecodedPacket { - data: BlockData:0x7D, - length: BlockPacketLength:8, + TestMemWriterDataPacket { + data: uN[TEST_DATA_W]:0x7D, + length: uN[TEST_ADDR_W]:1, last: false }, - ZstdDecodedPacket { - data: BlockData:0xB8, - length: BlockPacketLength:8, + TestMemWriterDataPacket { + data: uN[TEST_DATA_W]:0xB8, + length: uN[TEST_ADDR_W]:1, last: false }, - ZstdDecodedPacket { - data: BlockData:0xB8, - length: BlockPacketLength:8, + TestMemWriterDataPacket { + data: uN[TEST_DATA_W]:0xB8, + length: uN[TEST_ADDR_W]:1, last: false }, - ZstdDecodedPacket { - data: BlockData:0xB8_B9_7C_A3_9D, - length: BlockPacketLength:40, + TestMemWriterDataPacket { + data: uN[TEST_DATA_W]:0xB8_B9_7C_A3_9D, + length: uN[TEST_ADDR_W]:5, last: false }, - ZstdDecodedPacket { - data: BlockData:0xB9_7C_A3, - length: BlockPacketLength:24, + TestMemWriterDataPacket { + data: uN[TEST_DATA_W]:0xB9_7C_A3, + length: uN[TEST_ADDR_W]:3, last: false }, - ZstdDecodedPacket { - data: BlockData:0xB8, - length: BlockPacketLength:8, + TestMemWriterDataPacket { + data: uN[TEST_DATA_W]:0xB8, + length: uN[TEST_ADDR_W]:1, last: false }, - ZstdDecodedPacket { - data: BlockData:0x7C, - length: BlockPacketLength:8, + TestMemWriterDataPacket { + data: uN[TEST_DATA_W]:0x7C, + length: uN[TEST_ADDR_W]:1, last: false }, - ZstdDecodedPacket { - data: BlockData:0xB9_7C_A3_B8_B9_7C_A3_9D, - length: BlockPacketLength:64, + TestMemWriterDataPacket { + data: uN[TEST_DATA_W]:0xB9_7C_A3_B8_B9_7C_A3_9D, + length: uN[TEST_ADDR_W]:8, last: false }, - ZstdDecodedPacket { - data: BlockData:0x7C_B8, - length: BlockPacketLength:16, + TestMemWriterDataPacket { + data: uN[TEST_DATA_W]:0x7C_B8, + length: uN[TEST_ADDR_W]:2, last: true }, - ZstdDecodedPacket { - data: BlockData:0x9D, - length: BlockPacketLength:8, + TestMemWriterDataPacket { + data: uN[TEST_DATA_W]:0x9D, + length: uN[TEST_ADDR_W]:1, last: false } ]; #[test_proc] proc SequenceExecutorSequenceTest { + type MemWriterDataPacket = mem_writer::MemWriterDataPacket; terminator: chan out; input_s: chan out; - output_r: chan in; + output_mem_wr_data_in_r: chan in; print_start_s: chan<()> out; print_finish_r: chan<()> in; @@ -1569,7 +1614,7 @@ proc SequenceExecutorSequenceTest { config(terminator: chan out) { let (input_s, input_r) = chan("input"); - let (output_s, output_r) = chan("output"); + let (output_mem_wr_data_in_s, output_mem_wr_data_in_r) = chan("output_mem_wr_data_in"); let (looped_channel_s, looped_channel_r) = chan("looped_channel"); @@ -1584,11 +1629,12 @@ proc SequenceExecutorSequenceTest { let INIT_HB_PTR_ADDR = u32:127; spawn SequenceExecutor< TEST_HISTORY_BUFFER_SIZE_KB, + TEST_DATA_W, TEST_ADDR_W, TEST_RAM_SIZE, TEST_RAM_ADDR_WIDTH, INIT_HB_PTR_ADDR, > ( - input_r, output_s, + input_r, output_mem_wr_data_in_s, looped_channel_r, looped_channel_s, ram_rd_req_s[0], ram_rd_req_s[1], ram_rd_req_s[2], ram_rd_req_s[3], ram_rd_req_s[4], ram_rd_req_s[5], ram_rd_req_s[6], ram_rd_req_s[7], @@ -1640,7 +1686,7 @@ proc SequenceExecutorSequenceTest { ( terminator, - input_s, output_r, + input_s, output_mem_wr_data_in_r, print_start_s, print_finish_r, ram_rd_req_s, ram_rd_resp_r, ram_wr_req_s, ram_wr_resp_r ) @@ -1656,9 +1702,10 @@ proc SequenceExecutorSequenceTest { if (LITERAL_TEST_INPUT_DATA[i].msg_type != SequenceExecutorMessageType::LITERAL || LITERAL_TEST_INPUT_DATA[i].length != CopyOrMatchLength:0 || LITERAL_TEST_INPUT_DATA[i].last) { - let (tok, recv_data) = recv(tok, output_r); let expected = decode_literal_packet(LITERAL_TEST_INPUT_DATA[i]); - assert_eq(expected, recv_data); + let expected_mem_writer_data = convert_output_packet(expected); + let (tok, recv_mem_writer_data) = recv(tok, output_mem_wr_data_in_r); + assert_eq(expected_mem_writer_data, recv_mem_writer_data); } else {} }(()); @@ -1667,45 +1714,45 @@ proc SequenceExecutorSequenceTest { let (tok, _) = recv(tok, print_finish_r); let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[0]); - let (tok, recv_data) = recv(tok, output_r); + let (tok, recv_data) = recv(tok, output_mem_wr_data_in_r); assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[0], recv_data); - let (tok, recv_data) = recv(tok, output_r); + let (tok, recv_data) = recv(tok, output_mem_wr_data_in_r); assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[1], recv_data); let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[1]); - let (tok, recv_data) = recv(tok, output_r); + let (tok, recv_data) = recv(tok, output_mem_wr_data_in_r); assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[2], recv_data); let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[2]); - let (tok, recv_data) = recv(tok, output_r); + let (tok, recv_data) = recv(tok, output_mem_wr_data_in_r); assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[3], recv_data); let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[3]); - let (tok, recv_data) = recv(tok, output_r); + let (tok, recv_data) = recv(tok, output_mem_wr_data_in_r); assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[4], recv_data); let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[4]); - let (tok, recv_data) = recv(tok, output_r); + let (tok, recv_data) = recv(tok, output_mem_wr_data_in_r); assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[5], recv_data); let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[5]); - let (tok, recv_data) = recv(tok, output_r); + let (tok, recv_data) = recv(tok, output_mem_wr_data_in_r); assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[6], recv_data); let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[6]); let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[7]); - let (tok, recv_data) = recv(tok, output_r); + let (tok, recv_data) = recv(tok, output_mem_wr_data_in_r); assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[7], recv_data); let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[8]); let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[9]); - let (tok, recv_data) = recv(tok, output_r); + let (tok, recv_data) = recv(tok, output_mem_wr_data_in_r); assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[8], recv_data); - let (tok, recv_data) = recv(tok, output_r); + let (tok, recv_data) = recv(tok, output_mem_wr_data_in_r); assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[9], recv_data); let tok = send(tok, input_s, SEQUENCE_TEST_INPUT_SEQUENCES[10]); - let (tok, recv_data) = recv(tok, output_r); + let (tok, recv_data) = recv(tok, output_mem_wr_data_in_r); assert_eq(SEQUENCE_TEST_EXPECTED_SEQUENCE_RESULTS[10], recv_data); // Print RAM content diff --git a/xls/modules/zstd/xls_fifo_wrapper.v b/xls/modules/zstd/xls_fifo_wrapper.v new file mode 100644 index 0000000000..1336042b29 --- /dev/null +++ b/xls/modules/zstd/xls_fifo_wrapper.v @@ -0,0 +1,53 @@ +// simple fifo implementation +module xls_fifo_wrapper ( +clk, rst, +push_ready, push_data, push_valid, +pop_ready, pop_data, pop_valid); + parameter Width = 32, + Depth = 32, + EnableBypass = 0, + RegisterPushOutputs = 1, + RegisterPopOutputs = 1; + localparam AddrWidth = $clog2(Depth) + 1; + input wire clk; + input wire rst; + output wire push_ready; + input wire [Width-1:0] push_data; + input wire push_valid; + input wire pop_ready; + output wire [Width-1:0] pop_data; + output wire pop_valid; + + // Require depth be 1 and bypass disabled. + //initial begin + // if (EnableBypass || Depth != 1 || !RegisterPushOutputs || RegisterPopOutputs) begin + // // FIFO configuration not supported. + // $fatal(1); + // end + //end + + + reg [Width-1:0] mem; + reg full; + + assign push_ready = !full; + assign pop_valid = full; + assign pop_data = mem; + + always @(posedge clk) begin + if (rst == 1'b1) begin + full <= 1'b0; + end else begin + if (push_valid && push_ready) begin + mem <= push_data; + full <= 1'b1; + end else if (pop_valid && pop_ready) begin + mem <= mem; + full <= 1'b0; + end else begin + mem <= mem; + full <= full; + end + end + end +endmodule diff --git a/xls/modules/zstd/zstd_dec.x b/xls/modules/zstd/zstd_dec.x index 0f9fac906e..7bbd356cf4 100644 --- a/xls/modules/zstd/zstd_dec.x +++ b/xls/modules/zstd/zstd_dec.x @@ -17,482 +17,1316 @@ // https://datatracker.ietf.org/doc/html/rfc8878 import std; +import xls.examples.ram; +import xls.modules.zstd.axi_csr_accessor; +import xls.modules.zstd.common; +import xls.modules.zstd.memory.axi; +import xls.modules.zstd.csr_config; +import xls.modules.zstd.memory.mem_reader; +import xls.modules.zstd.memory.mem_writer; +import xls.modules.zstd.frame_header_dec; import xls.modules.zstd.block_header; -import xls.modules.zstd.block_dec; +import xls.modules.zstd.block_header_dec; +import xls.modules.zstd.raw_block_dec; +import xls.modules.zstd.rle_block_dec; +import xls.modules.zstd.dec_mux; import xls.modules.zstd.sequence_executor; -import xls.modules.zstd.buffer as buff; -import xls.modules.zstd.common; -import xls.modules.zstd.frame_header; -import xls.modules.zstd.frame_header_test; -import xls.modules.zstd.magic; -import xls.modules.zstd.repacketizer; -import xls.examples.ram; -type Buffer = buff::Buffer; -type BlockDataPacket = common::BlockDataPacket; -type BlockData = common::BlockData; type BlockSize = common::BlockSize; -type SequenceExecutorPacket = common::SequenceExecutorPacket; -type ZstdDecodedPacket = common::ZstdDecodedPacket; - -// TODO: all of this porboably should be in common.x -const TEST_WINDOW_LOG_MAX_LIBZSTD = frame_header_test::TEST_WINDOW_LOG_MAX_LIBZSTD; - -const ZSTD_RAM_ADDR_WIDTH = sequence_executor::ZSTD_RAM_ADDR_WIDTH; -const RAM_DATA_WIDTH = sequence_executor::RAM_DATA_WIDTH; -const RAM_NUM_PARTITIONS = sequence_executor::RAM_NUM_PARTITIONS; -const ZSTD_HISTORY_BUFFER_SIZE_KB = sequence_executor::ZSTD_HISTORY_BUFFER_SIZE_KB; - -const BUFFER_WIDTH = common::BUFFER_WIDTH; -const DATA_WIDTH = common::DATA_WIDTH; -const ZERO_FRAME_HEADER = frame_header::ZERO_FRAME_HEADER; -const ZERO_BLOCK_HEADER = block_header::ZERO_BLOCK_HEADER; - -enum ZstdDecoderStatus : u8 { - DECODE_MAGIC_NUMBER = 0, - DECODE_FRAME_HEADER = 1, - DECODE_BLOCK_HEADER = 2, - FEED_BLOCK_DECODER = 3, - DECODE_CHECKSUM = 4, - ERROR = 255, +type BlockType = common::BlockType; +type BlockHeader = block_header::BlockHeader; + +enum ZstdDecoderInternalFsm: u4 { + IDLE = 0, + READ_CONFIG = 1, + DECODE_FRAME_HEADER = 2, + DECODE_BLOCK_HEADER = 3, + DECODE_RAW_BLOCK = 4, + DECODE_RLE_BLOCK = 5, + DECODE_COMPRESSED_BLOCK = 6, + DECODE_CHECKSUM = 7, + WRITE_OUTPUT = 8, + FINISH = 9, + ERROR = 13, + INVALID = 15, } -struct ZstdDecoderState { - status: ZstdDecoderStatus, - buffer: Buffer, - frame_header: frame_header::FrameHeader, - block_size_bytes: BlockSize, - last: bool, - bytes_sent: BlockSize, +enum ZstdDecoderStatus: u5 { + IDLE = 0, + RUNNING = 1, + READ_CONFIG_OK = 2, + FRAME_HEADER_OK = 3, + FRAME_HEADER_CORRUPTED = 4, + FRAME_HEADER_UNSUPPORTED_WINDOW_SIZE = 5, + BLOCK_HEADER_OK = 6, + BLOCK_HEADER_CORRUPTED = 7, + BLOCK_HEADER_MEMORY_ACCESS_ERROR = 8, + RAW_BLOCK_OK = 9, + RAW_BLOCK_ERROR = 10, + RLE_BLOCK_OK = 11, } -const ZERO_DECODER_STATE = zero!(); - -fn decode_magic_number(state: ZstdDecoderState) -> (bool, BlockDataPacket, ZstdDecoderState) { - trace_fmt!("zstd_dec: decode_magic_number: DECODING NEW FRAME"); - trace_fmt!("zstd_dec: decode_magic_number: state: {:#x}", state); - trace_fmt!("zstd_dec: decode_magic_number: Decoding magic number"); - let magic_result = magic::parse_magic_number(state.buffer); - trace_fmt!("zstd_dec: decode_magic_number: magic_result: {:#x}", magic_result); - let new_state = match magic_result.status { - magic::MagicStatus::OK => ZstdDecoderState { - status: ZstdDecoderStatus::DECODE_FRAME_HEADER, - buffer: magic_result.buffer, - ..state - }, - magic::MagicStatus::CORRUPTED => ZstdDecoderState { - status: ZstdDecoderStatus::ERROR, - ..ZERO_DECODER_STATE - }, - magic::MagicStatus::NO_ENOUGH_DATA => state, - _ => state, - }; - trace_fmt!("zstd_dec: decode_magic_number: new_state: {:#x}", new_state); - - (false, zero!(), new_state) +pub enum Csr: u3 { + STATUS = 0, // Keeps the code describing the current state of the ZSTD Decoder + START = 1, // Writing 1 when decoder is in IDLE state starts the decoding process + RESET = 2, // Writing 1 will reset the decoder to the IDLE state + INPUT_BUFFER = 3, // Keeps the base address for the input buffer that is used for storing the frame to decode + OUTPUT_BUFFER = 4, // Keeps the base address for the output buffer, ZSTD Decoder will write the decoded frame into memory starting from this address. + WHO_AM_I = 5, // Contains the identification number of the ZSTD Decoder } -fn decode_frame_header(state: ZstdDecoderState) -> (bool, BlockDataPacket, ZstdDecoderState) { - trace_fmt!("zstd_dec: decode_frame_header: DECODING FRAME HEADER"); - trace_fmt!("zstd_dec: decode_frame_header: state: {:#x}", state); - let frame_header_result = frame_header::parse_frame_header(state.buffer); - trace_fmt!("zstd_dec: decode_frame_header: frame_header_result: {:#x}", frame_header_result); - let new_state = match frame_header_result.status { - frame_header::FrameHeaderStatus::OK => ZstdDecoderState { - status: ZstdDecoderStatus::DECODE_BLOCK_HEADER, - buffer: frame_header_result.buffer, - frame_header: frame_header_result.header, - ..state - }, - frame_header::FrameHeaderStatus::CORRUPTED => ZstdDecoderState { - status: ZstdDecoderStatus::ERROR, - ..ZERO_DECODER_STATE - }, - frame_header::FrameHeaderStatus::NO_ENOUGH_DATA => state, - frame_header::FrameHeaderStatus::UNSUPPORTED_WINDOW_SIZE => ZstdDecoderState { - status: ZstdDecoderStatus::ERROR, - ..ZERO_DECODER_STATE - }, - _ => state, - }; - trace_fmt!("zstd_dec: decode_frame_header: new_state: {:#x}", new_state); - - (false, zero!(), new_state) +fn csr(c: Csr) -> uN[LOG2_REGS_N] { + c as uN[LOG2_REGS_N] } -fn decode_block_header(state: ZstdDecoderState) -> (bool, BlockDataPacket, ZstdDecoderState) { - trace_fmt!("zstd_dec: decode_block_header: DECODING BLOCK HEADER"); - trace_fmt!("zstd_dec: decode_block_header: state: {:#x}", state); - let block_header_result = block_header::parse_block_header(state.buffer); - trace_fmt!("zstd_dec: decode_block_header: block_header_result: {:#x}", block_header_result); - let new_state = match block_header_result.status { - block_header::BlockHeaderStatus::OK => { - trace_fmt!("zstd_dec: BlockHeader: {:#x}", block_header_result.header); - match block_header_result.header.btype { - common::BlockType::RAW => ZstdDecoderState { - status: ZstdDecoderStatus::FEED_BLOCK_DECODER, - buffer: state.buffer, - block_size_bytes: block_header_result.header.size as BlockSize + BlockSize:3, - last: block_header_result.header.last, - bytes_sent: BlockSize:0, - ..state - }, - common::BlockType::RLE => ZstdDecoderState { - status: ZstdDecoderStatus::FEED_BLOCK_DECODER, - buffer: state.buffer, - block_size_bytes: BlockSize:4, - last: block_header_result.header.last, - bytes_sent: BlockSize:0, - ..state - }, - common::BlockType::COMPRESSED => ZstdDecoderState { - status: ZstdDecoderStatus::FEED_BLOCK_DECODER, - buffer: state.buffer, - block_size_bytes: block_header_result.header.size as BlockSize + BlockSize:3, - last: block_header_result.header.last, - bytes_sent: BlockSize:0, - ..state - }, - _ => { - fail!("impossible_case", state) - } - } - }, - block_header::BlockHeaderStatus::CORRUPTED => ZstdDecoderState { - status: ZstdDecoderStatus::ERROR, - ..ZERO_DECODER_STATE - }, - block_header::BlockHeaderStatus::NO_ENOUGH_DATA => state, - _ => state, - }; - trace_fmt!("zstd_dec: decode_block_header: new_state: {:#x}", new_state); - - (false, zero!(), new_state) +struct ZstdDecoderInternalState { + fsm: ZstdDecoderInternalFsm, + + // Reading CSRs + conf_cnt: uN[LOG2_REGS_N], + conf_send: bool, + input_buffer: uN[AXI_ADDR_W], + input_buffer_valid: bool, + output_buffer: uN[AXI_ADDR_W], + output_buffer_valid: bool, + + // Writing to CSRs + csr_wr_req: csr_config::CsrWrReq, + csr_wr_req_valid: bool, + + // BH address + bh_addr: uN[AXI_ADDR_W], + + // Block + block_addr: uN[AXI_ADDR_W], + block_length: uN[AXI_ADDR_W], + block_last: bool, + block_id: u32, + block_rle_symbol: u8, + + // Req + req_sent: bool, } -fn feed_block_decoder(state: ZstdDecoderState) -> (bool, BlockDataPacket, ZstdDecoderState) { - trace_fmt!("zstd_dec: feed_block_decoder: FEEDING BLOCK DECODER"); - trace_fmt!("zstd_dec: feed_block_decoder: state: {:#x}", state); - let remaining_bytes_to_send = state.block_size_bytes - state.bytes_sent; - trace_fmt!("zstd_dec: feed_block_decoder: remaining_bytes_to_send: {}", remaining_bytes_to_send); - let buffer_length_bytes = state.buffer.length >> 3; - trace_fmt!("zstd_dec: feed_block_decoder: buffer_length_bytes: {}", buffer_length_bytes); - let data_width_bytes = (DATA_WIDTH >> 3) as BlockSize; - trace_fmt!("zstd_dec: feed_block_decoder: data_width_bytes: {}", data_width_bytes); - let remaining_bytes_to_send_now = std::min(remaining_bytes_to_send, data_width_bytes); - trace_fmt!("zstd_dec: feed_block_decoder: remaining_bytes_to_send_now: {}", remaining_bytes_to_send_now); - if (buffer_length_bytes >= remaining_bytes_to_send_now as u32) { - let remaining_bits_to_send_now = (remaining_bytes_to_send_now as u32) << 3; - trace_fmt!("zstd_dec: feed_block_decoder: remaining_bits_to_send_now: {}", remaining_bits_to_send_now); - let last_packet = (remaining_bytes_to_send == remaining_bytes_to_send_now); - trace_fmt!("zstd_dec: feed_block_decoder: last_packet: {}", last_packet); - let (buffer_result, data_to_send) = buff::buffer_pop_checked(state.buffer, remaining_bits_to_send_now); - match buffer_result.status { - buff::BufferStatus::OK => { - let decoder_channel_data = BlockDataPacket { - last: last_packet, - last_block: state.last, - id: u32:0, - data: data_to_send[0: DATA_WIDTH as s32], - length: remaining_bits_to_send_now, +proc ZstdDecoderInternal< + AXI_DATA_W: u32, AXI_ADDR_W: u32, REGS_N: u32, + LOG2_REGS_N:u32 = {std::clog2(REGS_N)}, + HB_RAM_N:u32 = {u32:8}, +> { + + type State = ZstdDecoderInternalState; + type Fsm = ZstdDecoderInternalFsm; + type Reg = uN[LOG2_REGS_N]; + type Data = uN[AXI_DATA_W]; + type Addr = uN[AXI_ADDR_W]; + + type CsrRdReq = csr_config::CsrRdReq; + type CsrRdResp = csr_config::CsrRdResp; + type CsrWrReq = csr_config::CsrWrReq; + type CsrWrResp = csr_config::CsrWrResp; + type CsrChange = csr_config::CsrChange; + + type MemReaderReq = mem_reader::MemReaderReq; + type MemReaderResp = mem_reader::MemReaderResp; + + type MemWriterReq = mem_writer::MemWriterReq; + type MemWriterResp = mem_writer::MemWriterResp; + + type FrameHeaderDecoderStatus = frame_header_dec::FrameHeaderDecoderStatus; + type FrameHeaderDecoderReq = frame_header_dec::FrameHeaderDecoderReq; + type FrameHeaderDecoderResp = frame_header_dec::FrameHeaderDecoderResp; + + type BlockHeaderDecoderStatus = block_header_dec::BlockHeaderDecoderStatus; + type BlockHeaderDecoderReq = block_header_dec::BlockHeaderDecoderReq; + type BlockHeaderDecoderResp = block_header_dec::BlockHeaderDecoderResp; + + type RawBlockDecoderStatus = raw_block_dec::RawBlockDecoderStatus; + type RawBlockDecoderReq = raw_block_dec::RawBlockDecoderReq; + type RawBlockDecoderResp = raw_block_dec::RawBlockDecoderResp; + + type RleBlockDecoderStatus = rle_block_dec::RleBlockDecoderStatus; + type RleBlockDecoderReq = rle_block_dec::RleBlockDecoderReq; + type RleBlockDecoderResp = rle_block_dec::RleBlockDecoderResp; + + // CsrConfig + csr_rd_req_s: chan out; + csr_rd_resp_r: chan in; + csr_wr_req_s: chan out; + csr_wr_resp_r: chan in; + csr_change_r: chan in; + + // MemReader + FameHeaderDecoder + fh_req_s: chan out; + fh_resp_r: chan in; + + // MemReader + BlockHeaderDecoder + bh_req_s: chan out; + bh_resp_r: chan in; + + // MemReader + RawBlockDecoder + raw_req_s: chan out; + raw_resp_r: chan in; + + // MemReader + RleBlockDecoder + rle_req_s: chan out; + rle_resp_r: chan in; + + // Output MemWriter + output_mem_wr_req_s: chan out; + output_mem_wr_resp_r: chan in; + + notify_s: chan<()> out; + reset_s: chan<()> out; + + init { + zero!() + } + + config( + csr_rd_req_s: chan out, + csr_rd_resp_r: chan in, + csr_wr_req_s: chan out, + csr_wr_resp_r: chan in, + csr_change_r: chan in, + + // MemReader + FameHeaderDecoder + fh_req_s: chan out, + fh_resp_r: chan in, + + // MemReader + BlockHeaderDecoder + bh_req_s: chan out, + bh_resp_r: chan in, + + // MemReader + RawBlockDecoder + raw_req_s: chan out, + raw_resp_r: chan in, + + // MemReader + RleBlockDecoder + rle_req_s: chan out, + rle_resp_r: chan in, + + // Output MemWriter + output_mem_wr_req_s: chan out, + output_mem_wr_resp_r: chan in, + + notify_s: chan<()> out, + reset_s: chan<()> out, + ) { + ( + csr_rd_req_s, csr_rd_resp_r, csr_wr_req_s, csr_wr_resp_r, csr_change_r, + fh_req_s, fh_resp_r, + bh_req_s, bh_resp_r, + raw_req_s, raw_resp_r, + rle_req_s, rle_resp_r, + output_mem_wr_req_s, output_mem_wr_resp_r, + notify_s, reset_s, + ) + } + + next (state: State) { + let tok0 = join(); + + const CSR_REQS = CsrRdReq[2]:[ + CsrRdReq {csr: csr(Csr::INPUT_BUFFER)}, + CsrRdReq {csr: csr(Csr::OUTPUT_BUFFER)} + ]; + + const CSR_REQS_MAX = checked_cast(array_size(CSR_REQS) - u32:1); + + let (tok1_0, csr_change, csr_change_valid) = recv_non_blocking(tok0, csr_change_r, zero!()); + let is_start = (csr_change_valid && (csr_change.csr == csr(Csr::START))); + + let is_reset = (csr_change_valid && (csr_change.csr == csr(Csr::RESET))); + let tok = send_if(tok0, reset_s, is_reset, ()); + if is_reset { + trace_fmt!("[[RESET]]"); + } else {}; + + if csr_change_valid { + trace_fmt!("[CSR CHANGE] {:#x}", csr_change); + } else {}; + + let do_send_csr_req = (state.fsm == Fsm::READ_CONFIG) && (!state.conf_send); + let csr_req = CSR_REQS[state.conf_cnt]; + let tok1_1 = send_if(tok0, csr_rd_req_s, do_send_csr_req, csr_req); + if do_send_csr_req { + trace_fmt!("[READ_CONFIG] Sending read request {:#x}", csr_req); + } else {}; + + let do_recv_csr_resp = (state.fsm == Fsm::READ_CONFIG); + let (tok1_2, csr_data, csr_data_valid) = recv_if_non_blocking(tok0, csr_rd_resp_r, do_recv_csr_resp, zero!()); + if csr_data_valid { + trace_fmt!("[READ_CONFIG] Received CSR data: {:#x}", csr_data); + } else {}; + + let do_send_fh_req = (state.fsm == Fsm::DECODE_FRAME_HEADER) && !state.req_sent; + let fh_req = FrameHeaderDecoderReq { addr: state.input_buffer }; + let tok1_3 = send_if(tok0, fh_req_s, do_send_fh_req, fh_req); + if do_send_fh_req { + trace_fmt!("[DECODE_FRAME_HEADER] Sending FH request {:#x}", fh_req); + } else {}; + + let do_recv_fh_resp = (state.fsm == Fsm::DECODE_FRAME_HEADER); + let (tok1_4, fh_resp, fh_resp_valid) = recv_if_non_blocking(tok0, fh_resp_r, do_recv_fh_resp, zero!()); + if fh_resp_valid { + trace_fmt!("[DECODE_FRAME_HEADER]: Received FH {:#x}", fh_resp); + } else {}; + + let output_mem_wr_req = MemWriterReq {addr: state.output_buffer, length: fh_resp.header.frame_content_size as uN[AXI_ADDR_W]}; + let tok = send_if(tok0, output_mem_wr_req_s, fh_resp_valid, output_mem_wr_req); + + let do_recv_output_mem_wr_resp = (state.fsm == Fsm::WRITE_OUTPUT); + let (tok_x, output_write_resp, output_write_done) = recv_if_non_blocking(tok0, output_mem_wr_resp_r, do_recv_output_mem_wr_resp, zero!()); + if output_write_done { + trace_fmt!("[WRITE_OUTPUT]: Received response {:#x}", output_write_resp); + } else {}; + + let do_send_notify = (state.fsm == Fsm::ERROR || state.fsm == Fsm::FINISH); + let tok = send_if(tok0, notify_s, do_send_notify, ()); + if do_send_notify { + trace_fmt!("[[NOTIFY]]"); + } else {}; + + let tok1_5 = send_if(tok0, csr_wr_req_s, state.csr_wr_req_valid, state.csr_wr_req); + let (tok, _, _) = recv_non_blocking(tok0, csr_wr_resp_r, zero!()); + if state.csr_wr_req_valid { + trace_fmt!("[[CSR_WR_REQ]] Request: {:#x}", state.csr_wr_req); + } else {}; + + let do_send_bh_req = (state.fsm == Fsm::DECODE_BLOCK_HEADER) && !state.req_sent; + let bh_req = BlockHeaderDecoderReq { addr: state.bh_addr }; + let tok1_6 = send_if(tok0, bh_req_s, do_send_bh_req, bh_req); + if do_send_bh_req { + trace_fmt!("[DECODE_BLOCK_HEADER]: Sending BH request: {:#x}", bh_req); + } else {}; + + let do_recv_bh_resp = (state.fsm == Fsm::DECODE_BLOCK_HEADER); + let (tok1_4, bh_resp, bh_resp_valid) = recv_if_non_blocking(tok0, bh_resp_r, do_recv_bh_resp, zero!()); + if bh_resp_valid { + trace_fmt!("[DECODE_BLOCK_HEADER]: Received BH {:#x}", bh_resp); + } else {}; + + let do_send_raw_req = (state.fsm == Fsm::DECODE_RAW_BLOCK) && !state.req_sent; + let raw_req = RawBlockDecoderReq { + id: state.block_id, + last_block: state.block_last, + addr: state.block_addr, + length: state.block_length, + }; + let tok1_6 = send_if(tok0, raw_req_s, do_send_raw_req, raw_req); + if do_send_raw_req { + trace_fmt!("[DECODE_RAW_BLOCK]: Sending RAW request: {:#x}", raw_req); + } else {}; + + let do_recv_raw_resp = (state.fsm == Fsm::DECODE_RAW_BLOCK); + let (tok1_7, raw_resp, raw_resp_valid) = recv_if_non_blocking(tok0, raw_resp_r, do_recv_raw_resp, zero!()); + if raw_resp_valid { + trace_fmt!("[DECODE_RAW_BLOCK]: Received RAW {:#x}", raw_resp); + } else {}; + + let do_send_rle_req = (state.fsm == Fsm::DECODE_RLE_BLOCK) && !state.req_sent; + let rle_req = RleBlockDecoderReq { + id: state.block_id, + symbol: state.block_rle_symbol, + length: checked_cast(state.block_length), + last_block: state.block_last, + }; + let tok1_7 = send_if(tok0, rle_req_s, do_send_rle_req, rle_req); + if do_send_rle_req { + trace_fmt!("[DECODE_RLE_BLOCK]: Sending RLE request: {:#x}", rle_req); + } else {}; + + let do_recv_rle_resp = (state.fsm == Fsm::DECODE_RLE_BLOCK); + let (tok1_8, rle_resp, rle_resp_valid) = recv_if_non_blocking(tok0, rle_resp_r, do_recv_rle_resp, zero!()); + if raw_resp_valid { + trace_fmt!("[DECODE_RLE_BLOCK]: Received RAW {:#x}", raw_resp); + } else {}; + + let new_state = match (state.fsm) { + Fsm::IDLE => { + trace_fmt!("[IDLE]"); + if is_start { + let status = ZstdDecoderStatus::RUNNING; + + let csr_wr_req_valid = true; + let csr_wr_req = CsrWrReq { + csr: csr(Csr::STATUS), + value: status as Data, + }; + + State { fsm: Fsm::READ_CONFIG, csr_wr_req, csr_wr_req_valid, conf_cnt: CSR_REQS_MAX, ..zero!() } + } else { zero!() } + }, + + Fsm::READ_CONFIG => { + trace_fmt!("[READ_CONFIG]"); + let is_input_buffer_csr = (csr_data.csr == csr(Csr::INPUT_BUFFER)); + let input_buffer = if csr_data_valid && is_input_buffer_csr { checked_cast(csr_data.value) } else { state.input_buffer }; + let input_buffer_valid = if csr_data_valid && is_input_buffer_csr { true } else { state.input_buffer_valid }; + + let is_output_buffer_csr = (csr_data.csr == csr(Csr::OUTPUT_BUFFER)); + let output_buffer = if (csr_data_valid && is_output_buffer_csr) { checked_cast(csr_data.value) } else { state.output_buffer }; + let output_buffer_valid = if (csr_data_valid && is_output_buffer_csr) { true } else { state.output_buffer_valid }; + + let all_collected = input_buffer_valid & output_buffer_valid; + let fsm = if all_collected { Fsm::DECODE_FRAME_HEADER } else { Fsm::READ_CONFIG }; + + let conf_send = (state.conf_cnt == Reg:0); + let conf_cnt = if conf_send { Reg:0 } else {state.conf_cnt - Reg:1}; + + let status = match(all_collected) { + true => ZstdDecoderStatus::READ_CONFIG_OK, + _ => ZstdDecoderStatus::RUNNING, + }; + + let csr_wr_req_valid = all_collected; + let csr_wr_req = CsrWrReq { + csr: csr(Csr::STATUS), + value: status as Data, }; - let new_fsm_status = if (last_packet) { - if (state.last) { - if (state.frame_header.content_checksum_flag) { - ZstdDecoderStatus::DECODE_CHECKSUM - } else { - ZstdDecoderStatus::DECODE_MAGIC_NUMBER - } + + State { + fsm, csr_wr_req, csr_wr_req_valid, conf_cnt, conf_send, input_buffer, input_buffer_valid, output_buffer, output_buffer_valid, + ..zero!() + } + }, + + Fsm::DECODE_FRAME_HEADER => { + trace_fmt!("[DECODE_FRAME_HEADER]"); + let error = (fh_resp.status != FrameHeaderDecoderStatus::OKAY); + + let status = match(fh_resp_valid, fh_resp.status) { + (true, FrameHeaderDecoderStatus::OKAY) => ZstdDecoderStatus::FRAME_HEADER_OK, + (true, FrameHeaderDecoderStatus::CORRUPTED) => ZstdDecoderStatus::FRAME_HEADER_CORRUPTED, + (true, FrameHeaderDecoderStatus::UNSUPPORTED_WINDOW_SIZE) => ZstdDecoderStatus::FRAME_HEADER_UNSUPPORTED_WINDOW_SIZE, + (_, _) => ZstdDecoderStatus::RUNNING, + }; + + let csr_wr_req_valid = (fh_resp_valid); + let csr_wr_req = CsrWrReq { + csr: csr(Csr::STATUS), + value: status as Data, + }; + + let fsm = match (fh_resp_valid, error) { + ( true, false) => Fsm::DECODE_BLOCK_HEADER, + ( true, true) => Fsm::ERROR, + ( _, _) => Fsm::DECODE_FRAME_HEADER, + }; + + let bh_addr = state.input_buffer + fh_resp.length as Addr; + let req_sent = if !fh_resp_valid && !error { true } else { false }; + State {fsm, csr_wr_req, csr_wr_req_valid, bh_addr, req_sent, ..state } + }, + + Fsm::DECODE_BLOCK_HEADER => { + trace_fmt!("[DECODE_BLOCK_HEADER]"); + let error = (bh_resp.status != BlockHeaderDecoderStatus::OKAY); + + let status = match(bh_resp_valid, bh_resp.status) { + (true, BlockHeaderDecoderStatus::OKAY) => ZstdDecoderStatus::BLOCK_HEADER_OK, + (true, BlockHeaderDecoderStatus::CORRUPTED) => ZstdDecoderStatus::BLOCK_HEADER_CORRUPTED, + (true, BlockHeaderDecoderStatus::MEMORY_ACCESS_ERROR) => ZstdDecoderStatus::BLOCK_HEADER_MEMORY_ACCESS_ERROR, + (_, _) => ZstdDecoderStatus::RUNNING, + }; + + let csr_wr_req_valid = (bh_resp_valid); + let csr_wr_req = CsrWrReq { + csr: csr(Csr::STATUS), + value: status as Data, + }; + + let fsm = match (bh_resp_valid, error, bh_resp.header.btype) { + ( true, false, BlockType::RAW ) => Fsm::DECODE_RAW_BLOCK, + ( true, false, BlockType::RLE ) => Fsm::DECODE_RLE_BLOCK, + ( true, false, BlockType::COMPRESSED) => Fsm::ERROR, + ( true, true, _) => Fsm::ERROR, + ( _, _, _) => Fsm::DECODE_BLOCK_HEADER, + }; + + let (block_addr, block_length, block_last, block_rle_symbol, bh_addr) = if bh_resp_valid { + let block_addr = state.bh_addr + Addr:3; + let block_length = checked_cast(bh_resp.header.size); + let block_rle_symbol = bh_resp.rle_symbol; + let bh_addr = if bh_resp.header.btype == BlockType::RLE { + block_addr + Addr:1 } else { - ZstdDecoderStatus::DECODE_BLOCK_HEADER - } + block_addr + block_length + }; + + trace_fmt!("bh_addr: {:#x}", bh_addr); + + (block_addr, block_length, bh_resp.header.last, block_rle_symbol, bh_addr) } else { - ZstdDecoderStatus::FEED_BLOCK_DECODER + (state.block_addr, state.block_length, state.block_last, state.block_rle_symbol, state.bh_addr) }; - trace_fmt!("zstd_dec: feed_block_decoder: packet to decode: {:#x}", decoder_channel_data); - let new_state = (true, decoder_channel_data, ZstdDecoderState { - bytes_sent: state.bytes_sent + remaining_bytes_to_send_now, - buffer: buffer_result.buffer, - status: new_fsm_status, + + let req_sent = if !bh_resp_valid && !error { true } else { false }; + State { + fsm, bh_addr, req_sent, + block_addr, block_length, block_last, block_rle_symbol, + csr_wr_req, csr_wr_req_valid, ..state - }); - trace_fmt!("zstd_dec: feed_block_decoder: new_state: {:#x}", new_state); - new_state + } + }, + + Fsm::DECODE_RAW_BLOCK => { + trace_fmt!("[DECODE_RAW_BLOCK]"); + + let error = (raw_resp.status != RawBlockDecoderStatus::OKAY); + + let status = match(raw_resp_valid, raw_resp.status) { + (true, RawBlockDecoderStatus::OKAY) => ZstdDecoderStatus::RAW_BLOCK_OK, + (true, RawBlockDecoderStatus::ERROR) => ZstdDecoderStatus::RAW_BLOCK_ERROR, + (_, _) => ZstdDecoderStatus::RUNNING, + }; + + let csr_wr_req_valid = (raw_resp_valid); + let csr_wr_req = CsrWrReq { + csr: csr(Csr::STATUS), + value: status as Data, + }; + + let fsm = match (raw_resp_valid, error, state.block_last) { + (true, false, false) => Fsm::DECODE_BLOCK_HEADER, + (true, false, true) => Fsm::DECODE_CHECKSUM, + (true, true, _) => Fsm::ERROR, + ( _, _, _) => Fsm::DECODE_RAW_BLOCK, + }; + + let req_sent = if !raw_resp_valid && !error { true } else { false }; + let block_id = if raw_resp_valid { state.block_id + u32:1} else {state.block_id }; + + let state = State {fsm, block_id, csr_wr_req, csr_wr_req_valid, req_sent, ..state}; + if fsm == Fsm::DECODE_BLOCK_HEADER { + trace_fmt!("Going to decode block header: {:#x}", state); + } else {}; + + state }, - _ => { - fail!("should_not_happen_1", (false, zero!(), state)) - } - } - } else { - trace_fmt!("zstd_dec: feed_block_decoder: Not enough data for intermediate FEED_BLOCK_DECODER block dump"); - (false, zero!(), state) + + Fsm::DECODE_RLE_BLOCK => { + trace_fmt!("[DECODE_RLE_BLOCK]"); + let error = (rle_resp.status != RleBlockDecoderStatus::OKAY); + + let status = match(rle_resp_valid, rle_resp.status) { + (true, RleBlockDecoderStatus::OKAY) => ZstdDecoderStatus::RLE_BLOCK_OK, + (_, _) => ZstdDecoderStatus::RUNNING, + }; + + let csr_wr_req_valid = (rle_resp_valid); + let csr_wr_req = CsrWrReq { + csr: csr(Csr::STATUS), + value: status as Data, + }; + + let fsm = match (rle_resp_valid, error, state.block_last) { + (true, false, false) => Fsm::DECODE_BLOCK_HEADER, + (true, false, true) => Fsm::DECODE_CHECKSUM, + (true, true, _) => Fsm::ERROR, + ( _, _, _) => Fsm::DECODE_RLE_BLOCK, + }; + + let req_sent = if !rle_resp_valid && !error { true } else { false }; + let block_id = if rle_resp_valid { state.block_id + u32:1} else {state.block_id }; + + let state = State {fsm, block_id, csr_wr_req, csr_wr_req_valid, req_sent, ..state}; + if fsm == Fsm::DECODE_BLOCK_HEADER { + trace_fmt!("Going to decode block header: {:#x}", state); + } else {}; + + state + }, + + Fsm::DECODE_CHECKSUM => { + trace_fmt!("[DECODE_CHECKSUM]"); + State {fsm: Fsm::WRITE_OUTPUT, ..zero!() } + + }, + + Fsm::WRITE_OUTPUT => { + trace_fmt!("[WRITE_OUTPUT]"); + let error = (output_write_resp.status != mem_writer::MemWriterRespStatus::OKAY); + let fsm = match (output_write_done, error) { + (true, false) => Fsm::FINISH, + (true, true) => Fsm::ERROR, + ( _, _) => Fsm::WRITE_OUTPUT, + }; + + State {fsm: fsm, ..zero!() } + }, + + Fsm::ERROR => { + trace_fmt!("[ERROR]"); + State { fsm: Fsm::IDLE, ..zero!() } + }, + + Fsm::FINISH => { + trace_fmt!("[FINISH]"); + let csr_wr_req_valid = true; + let csr_wr_req = CsrWrReq { + csr: csr(Csr::STATUS), + value: ZstdDecoderStatus::IDLE as Data, + }; + + State { fsm: Fsm::IDLE, csr_wr_req, csr_wr_req_valid, ..zero!() } + }, + + _ => zero!(), + }; + + new_state } } -fn decode_checksum(state: ZstdDecoderState) -> (bool, BlockDataPacket, ZstdDecoderState) { - trace_fmt!("zstd_dec: decode_checksum: DECODE CHECKSUM"); - trace_fmt!("zstd_dec: decode_checksum: state: {:#x}", state); - // Pop fixed checksum size of 4 bytes - let (buffer_result, _) = buff::buffer_pop_checked(state.buffer, u32:32); +const TEST_AXI_DATA_W = u32:64; +const TEST_AXI_ADDR_W = u32:32; +const TEST_REGS_N = u32:5; +const TEST_LOG2_REGS_N = std::clog2(TEST_REGS_N); + +#[test_proc] +proc ZstdDecoderInternalTest { + + type BlockType = common::BlockType; + type BlockSize = common::BlockSize; + type BlockHeader = block_header::BlockHeader; + type BlockHeaderDecoderStatus = block_header_dec::BlockHeaderDecoderStatus; + + type FrameHeaderDecoderReq = frame_header_dec::FrameHeaderDecoderReq; + type FrameHeaderDecoderResp = frame_header_dec::FrameHeaderDecoderResp; + type FrameHeaderDecoderStatus = frame_header_dec::FrameHeaderDecoderStatus; + type FrameContentSize = frame_header_dec::FrameContentSize; + type FrameHeader = frame_header_dec::FrameHeader; + type WindowSize = frame_header_dec::WindowSize; + type DictionaryId = frame_header_dec::DictionaryId; + + type CsrRdReq = csr_config::CsrRdReq; + type CsrRdResp = csr_config::CsrRdResp; + type CsrWrReq = csr_config::CsrWrReq; + type CsrWrResp = csr_config::CsrWrResp; + type CsrChange = csr_config::CsrChange; + + type FrameHeaderDecoderReq = frame_header_dec::FrameHeaderDecoderReq; + type FrameHeaderDecoderResp = frame_header_dec::FrameHeaderDecoderResp; + + type BlockHeaderDecoderReq = block_header_dec::BlockHeaderDecoderReq; + type BlockHeaderDecoderResp = block_header_dec::BlockHeaderDecoderResp; + + type RawBlockDecoderReq = raw_block_dec::RawBlockDecoderReq; + type RawBlockDecoderResp = raw_block_dec::RawBlockDecoderResp; + type RawBlockDecoderStatus = raw_block_dec::RawBlockDecoderStatus; + + type RleBlockDecoderReq = rle_block_dec::RleBlockDecoderReq; + type RleBlockDecoderResp = rle_block_dec::RleBlockDecoderResp; + type RleBlockDecoderStatus = rle_block_dec::RleBlockDecoderStatus; + + type MemWriterReq = mem_writer::MemWriterReq; + type MemWriterResp = mem_writer::MemWriterResp; - let new_state = ZstdDecoderState { - status: ZstdDecoderStatus::DECODE_MAGIC_NUMBER, - buffer: buffer_result.buffer, - ..state - }; - trace_fmt!("zstd_dec: decode_checksum: new_state: {:#x}", new_state); + terminator: chan out; - (false, zero!(), new_state) + csr_rd_req_r: chan in; + csr_rd_resp_s: chan out; + csr_wr_req_r: chan in; + csr_wr_resp_s: chan out; + csr_change_s: chan out; + + fh_req_r: chan in; + fh_resp_s: chan out; + + bh_req_r: chan in; + bh_resp_s: chan out; + + raw_req_r: chan in; + raw_resp_s: chan out; + + rle_req_r: chan in; + rle_resp_s: chan out; + + output_mem_wr_req_r: chan in; + output_mem_wr_resp_s: chan out; + + notify_r: chan<()> in; + reset_r: chan<()> in; + + init {} + + config(terminator: chan out) { + let (csr_rd_req_s, csr_rd_req_r) = chan("csr_rd_req"); + let (csr_rd_resp_s, csr_rd_resp_r) = chan("csr_rd_resp"); + let (csr_wr_req_s, csr_wr_req_r) = chan("csr_wr_req"); + let (csr_wr_resp_s, csr_wr_resp_r) = chan("csr_wr_resp"); + let (csr_change_s, csr_change_r) = chan("csr_change"); + + let (fh_req_s, fh_req_r) = chan("fh_req"); + let (fh_resp_s, fh_resp_r) = chan("fh_resp"); + + let (bh_req_s, bh_req_r) = chan("bh_req"); + let (bh_resp_s, bh_resp_r) = chan("bh_resp"); + + let (raw_req_s, raw_req_r) = chan("raw_req"); + let (raw_resp_s, raw_resp_r) = chan("raw_resp"); + + let (rle_req_s, rle_req_r) = chan("rle_req"); + let (rle_resp_s, rle_resp_r) = chan("rle_resp"); + + let (output_mem_wr_req_s, output_mem_wr_req_r) = chan("output_mem_wr_req"); + let (output_mem_wr_resp_s, output_mem_wr_resp_r) = chan("output_mem_wr_resp"); + + let (notify_s, notify_r) = chan<()>("notify"); + let (reset_s, reset_r) = chan<()>("reset"); + + spawn ZstdDecoderInternal( + csr_rd_req_s, csr_rd_resp_r, csr_wr_req_s, csr_wr_resp_r, csr_change_r, + fh_req_s, fh_resp_r, + bh_req_s, bh_resp_r, + raw_req_s, raw_resp_r, + rle_req_s, rle_resp_r, + output_mem_wr_req_s, output_mem_wr_resp_r, + notify_s, reset_s, + ); + + ( + terminator, + csr_rd_req_r, csr_rd_resp_s, csr_wr_req_r, csr_wr_resp_s, csr_change_s, + fh_req_r, fh_resp_s, + bh_req_r, bh_resp_s, + raw_req_r, raw_resp_s, + rle_req_r, rle_resp_s, + output_mem_wr_req_r, output_mem_wr_resp_s, + notify_r, reset_r, + ) + } + + next (state: ()) { + type Addr = uN[TEST_AXI_ADDR_W]; + type Length = uN[TEST_AXI_ADDR_W]; + + let tok = join(); + + // Error in frame header + + let tok = send(tok, csr_change_s, CsrChange { csr: csr(Csr::START)}); + let (tok, csr_data) = recv(tok, csr_rd_req_r); + assert_eq(csr_data, CsrRdReq {csr: csr(Csr::OUTPUT_BUFFER)}); + let (tok, csr_data) = recv(tok, csr_rd_req_r); + assert_eq(csr_data, CsrRdReq {csr: csr(Csr::INPUT_BUFFER)}); + + send(tok, csr_rd_resp_s, CsrRdResp { + csr: csr(Csr::INPUT_BUFFER), + value: uN[TEST_AXI_DATA_W]:0x1000 + }); + send(tok, csr_rd_resp_s, CsrRdResp { + csr: csr(Csr::OUTPUT_BUFFER), + value: uN[TEST_AXI_DATA_W]:0x2000 + }); + let (tok, fh_req) = recv(tok, fh_req_r); + assert_eq(fh_req, FrameHeaderDecoderReq { addr: Addr:0x1000 }); + + let tok = send(tok, fh_resp_s, FrameHeaderDecoderResp { + status: FrameHeaderDecoderStatus::CORRUPTED, + header: FrameHeader { + window_size: WindowSize:100, + frame_content_size: FrameContentSize:200, + dictionary_id: DictionaryId:123, + content_checksum_flag: u1:1, + }, + length: u5:3, + }); + + + let (tok, ()) = recv(tok, notify_r); + + // Correct case + let tok = send(tok, csr_change_s, CsrChange { csr: csr(Csr::START)}); + let (tok, csr_data) = recv(tok, csr_rd_req_r); + assert_eq(csr_data, CsrRdReq {csr: csr(Csr::OUTPUT_BUFFER)}); + let (tok, csr_data) = recv(tok, csr_rd_req_r); + assert_eq(csr_data, CsrRdReq {csr: csr(Csr::INPUT_BUFFER)}); + + send(tok, csr_rd_resp_s, CsrRdResp { + csr: csr(Csr::INPUT_BUFFER), + value: uN[TEST_AXI_DATA_W]:0x1000 + }); + send(tok, csr_rd_resp_s, CsrRdResp { + csr: csr(Csr::OUTPUT_BUFFER), + value: uN[TEST_AXI_DATA_W]:0x2000 + }); + let (tok, fh_req) = recv(tok, fh_req_r); + assert_eq(fh_req, FrameHeaderDecoderReq { addr: Addr:0x1000 }); + + let tok = send(tok, fh_resp_s, FrameHeaderDecoderResp { + status: FrameHeaderDecoderStatus::OKAY, + header: FrameHeader { + window_size: WindowSize:100, + frame_content_size: FrameContentSize:200, + dictionary_id: DictionaryId:123, + content_checksum_flag: u1:1, + }, + length: u5:3, + }); + + let (tok, bh_req) = recv(tok, bh_req_r); + assert_eq(bh_req, BlockHeaderDecoderReq { + addr: Addr:0x1003, + }); + + let tok = send(tok, bh_resp_s, BlockHeaderDecoderResp { + status: BlockHeaderDecoderStatus::OKAY, + header: BlockHeader { + last: false, + btype: BlockType::RAW, + size: BlockSize:0x1000, + }, + rle_symbol: u8:0, + }); + + let (tok, raw_req) = recv(tok, raw_req_r); + assert_eq(raw_req, RawBlockDecoderReq { + last_block: false, + id: u32:0, + addr: Addr:0x1006, + length: Length:0x1000 + }); + + let tok = send(tok, raw_resp_s, RawBlockDecoderResp { + status: RawBlockDecoderStatus::OKAY, + }); + + let (tok, bh_req) = recv(tok, bh_req_r); + assert_eq(bh_req, BlockHeaderDecoderReq { + addr: Addr:0x2006 + }); + let tok = send(tok, bh_resp_s, BlockHeaderDecoderResp { + status: BlockHeaderDecoderStatus::OKAY, + header: BlockHeader { + last: false, + btype: BlockType::RLE, + size: BlockSize:0x1000, + }, + rle_symbol: u8:123, + }); + + let (tok, rle_req) = recv(tok, rle_req_r); + assert_eq(rle_req, RleBlockDecoderReq { + id: u32:1, + symbol: u8:123, + last_block: false, + length: checked_cast(Length:0x1000), + }); + let tok = send(tok, rle_resp_s, RleBlockDecoderResp { + status: RleBlockDecoderStatus::OKAY, + }); + + let (tok, bh_req) = recv(tok, bh_req_r); + assert_eq(bh_req, BlockHeaderDecoderReq { + addr: Addr:0x200A, + }); + + let tok = send(tok, bh_resp_s, BlockHeaderDecoderResp { + status: BlockHeaderDecoderStatus::OKAY, + header: BlockHeader { + last: true, + btype: BlockType::RAW, + size: BlockSize:0x1000, + }, + rle_symbol: u8:0, + }); + + let (tok, raw_req) = recv(tok, raw_req_r); + assert_eq(raw_req, RawBlockDecoderReq { + last_block: true, + id: u32:2, + addr: Addr:0x200D, + length: Length:0x1000 + }); + + let tok = send(tok, raw_resp_s, RawBlockDecoderResp { + status: RawBlockDecoderStatus::OKAY, + }); + + let (tok, mem_wr_req) = recv(tok, output_mem_wr_req_r); + assert_eq(mem_wr_req, MemWriterReq { + addr: uN[TEST_AXI_ADDR_W]:0x2000, + length: uN[TEST_AXI_ADDR_W]:200 + }); + let tok = send(tok, output_mem_wr_resp_s, MemWriterResp {status: mem_writer::MemWriterRespStatus::OKAY}); + + let (tok, ()) = recv(tok, notify_r); + + send(tok, terminator, true); + } } -pub proc ZstdDecoder { - input_r: chan in; - block_dec_in_s: chan out; - output_s: chan out; - looped_channel_r: chan in; - looped_channel_s: chan out; - ram_rd_req_0_s: chan> out; - ram_rd_req_1_s: chan> out; - ram_rd_req_2_s: chan> out; - ram_rd_req_3_s: chan> out; - ram_rd_req_4_s: chan> out; - ram_rd_req_5_s: chan> out; - ram_rd_req_6_s: chan> out; - ram_rd_req_7_s: chan> out; - ram_rd_resp_0_r: chan> in; - ram_rd_resp_1_r: chan> in; - ram_rd_resp_2_r: chan> in; - ram_rd_resp_3_r: chan> in; - ram_rd_resp_4_r: chan> in; - ram_rd_resp_5_r: chan> in; - ram_rd_resp_6_r: chan> in; - ram_rd_resp_7_r: chan> in; - ram_wr_req_0_s: chan> out; - ram_wr_req_1_s: chan> out; - ram_wr_req_2_s: chan> out; - ram_wr_req_3_s: chan> out; - ram_wr_req_4_s: chan> out; - ram_wr_req_5_s: chan> out; - ram_wr_req_6_s: chan> out; - ram_wr_req_7_s: chan> out; - ram_wr_resp_0_r: chan in; - ram_wr_resp_1_r: chan in; - ram_wr_resp_2_r: chan in; - ram_wr_resp_3_r: chan in; - ram_wr_resp_4_r: chan in; - ram_wr_resp_5_r: chan in; - ram_wr_resp_6_r: chan in; - ram_wr_resp_7_r: chan in; - - init {(ZERO_DECODER_STATE)} - - config ( - input_r: chan in, - output_s: chan out, - looped_channel_r: chan in, - looped_channel_s: chan out, - ram_rd_req_0_s: chan> out, - ram_rd_req_1_s: chan> out, - ram_rd_req_2_s: chan> out, - ram_rd_req_3_s: chan> out, - ram_rd_req_4_s: chan> out, - ram_rd_req_5_s: chan> out, - ram_rd_req_6_s: chan> out, - ram_rd_req_7_s: chan> out, - ram_rd_resp_0_r: chan> in, - ram_rd_resp_1_r: chan> in, - ram_rd_resp_2_r: chan> in, - ram_rd_resp_3_r: chan> in, - ram_rd_resp_4_r: chan> in, - ram_rd_resp_5_r: chan> in, - ram_rd_resp_6_r: chan> in, - ram_rd_resp_7_r: chan> in, - ram_wr_req_0_s: chan> out, - ram_wr_req_1_s: chan> out, - ram_wr_req_2_s: chan> out, - ram_wr_req_3_s: chan> out, - ram_wr_req_4_s: chan> out, - ram_wr_req_5_s: chan> out, - ram_wr_req_6_s: chan> out, - ram_wr_req_7_s: chan> out, - ram_wr_resp_0_r: chan in, - ram_wr_resp_1_r: chan in, - ram_wr_resp_2_r: chan in, - ram_wr_resp_3_r: chan in, - ram_wr_resp_4_r: chan in, - ram_wr_resp_5_r: chan in, - ram_wr_resp_6_r: chan in, - ram_wr_resp_7_r: chan in, + +pub proc ZstdDecoder< + // AXI parameters + AXI_DATA_W: u32, AXI_ADDR_W: u32, AXI_ID_W: u32, AXI_DEST_W: u32, + // decoder parameters + REGS_N: u32, WINDOW_LOG_MAX: u32, + HB_ADDR_W: u32, HB_DATA_W: u32, HB_NUM_PARTITIONS: u32, HB_SIZE_KB: u32, + // calculated parameters + AXI_DATA_W_DIV8: u32 = {AXI_DATA_W / u32:8}, + LOG2_REGS_N: u32 = {std::clog2(REGS_N)}, + HB_RAM_N: u32 = {u32:8}, + MEM_WRITER_ID: u32 = {u32:0}, +> { + type CsrAxiAr = axi::AxiAr; + type CsrAxiR = axi::AxiR; + type CsrAxiAw = axi::AxiAw; + type CsrAxiW = axi::AxiW; + type CsrAxiB = axi::AxiB; + + type CsrRdReq = csr_config::CsrRdReq; + type CsrRdResp = csr_config::CsrRdResp; + type CsrWrReq = csr_config::CsrWrReq; + type CsrWrResp = csr_config::CsrWrResp; + type CsrChange = csr_config::CsrChange; + + type MemAxiAr = axi::AxiAr; + type MemAxiR = axi::AxiR; + type MemAxiAw = axi::AxiAw; + type MemAxiW = axi::AxiW; + type MemAxiB = axi::AxiB; + + type MemReaderReq = mem_reader::MemReaderReq; + type MemReaderResp = mem_reader::MemReaderResp; + type MemWriterReq = mem_writer::MemWriterReq; + type MemWriterResp = mem_writer::MemWriterResp; + type MemWriterDataPacket = mem_writer::MemWriterDataPacket; + + type FrameHeaderDecoderReq = frame_header_dec::FrameHeaderDecoderReq; + type FrameHeaderDecoderResp = frame_header_dec::FrameHeaderDecoderResp; + + type BlockHeaderDecoderReq = block_header_dec::BlockHeaderDecoderReq; + type BlockHeaderDecoderResp = block_header_dec::BlockHeaderDecoderResp; + + type RawBlockDecoderReq = raw_block_dec::RawBlockDecoderReq; + type RawBlockDecoderResp = raw_block_dec::RawBlockDecoderResp; + type ExtendedBlockDataPacket = common::ExtendedBlockDataPacket; + + type RleBlockDecoderReq = rle_block_dec::RleBlockDecoderReq; + type RleBlockDecoderResp = rle_block_dec::RleBlockDecoderResp; + + type SequenceExecutorPacket = common::SequenceExecutorPacket; + type ZstdDecodedPacket = common::ZstdDecodedPacket; + + type RamRdReq = ram::ReadReq; + type RamRdResp = ram::ReadResp; + type RamWrReq = ram::WriteReq; + type RamWrResp = ram::WriteResp; + + // Complex Block Decoder + cmp_output_s: chan out; + + init {} + + config( + // AXI Ctrl (subordinate) + csr_axi_aw_r: chan in, + csr_axi_w_r: chan in, + csr_axi_b_s: chan out, + csr_axi_ar_r: chan in, + csr_axi_r_s: chan out, + + // AXI Frame Header Decoder (manager) + fh_axi_ar_s: chan out, + fh_axi_r_r: chan in, + + //// AXI Block Header Decoder (manager) + bh_axi_ar_s: chan out, + bh_axi_r_r: chan in, + + //// AXI RAW Block Decoder (manager) + raw_axi_ar_s: chan out, + raw_axi_r_r: chan in, + + //// AXI Output Writer (manager) + output_axi_aw_s: chan out, + output_axi_w_s: chan out, + output_axi_b_r: chan in, + + // History Buffer + ram_rd_req_0_s: chan out, + ram_rd_req_1_s: chan out, + ram_rd_req_2_s: chan out, + ram_rd_req_3_s: chan out, + ram_rd_req_4_s: chan out, + ram_rd_req_5_s: chan out, + ram_rd_req_6_s: chan out, + ram_rd_req_7_s: chan out, + ram_rd_resp_0_r: chan in, + ram_rd_resp_1_r: chan in, + ram_rd_resp_2_r: chan in, + ram_rd_resp_3_r: chan in, + ram_rd_resp_4_r: chan in, + ram_rd_resp_5_r: chan in, + ram_rd_resp_6_r: chan in, + ram_rd_resp_7_r: chan in, + ram_wr_req_0_s: chan out, + ram_wr_req_1_s: chan out, + ram_wr_req_2_s: chan out, + ram_wr_req_3_s: chan out, + ram_wr_req_4_s: chan out, + ram_wr_req_5_s: chan out, + ram_wr_req_6_s: chan out, + ram_wr_req_7_s: chan out, + ram_wr_resp_0_r: chan in, + ram_wr_resp_1_r: chan in, + ram_wr_resp_2_r: chan in, + ram_wr_resp_3_r: chan in, + ram_wr_resp_4_r: chan in, + ram_wr_resp_5_r: chan in, + ram_wr_resp_6_r: chan in, + ram_wr_resp_7_r: chan in, + + notify_s: chan<()> out, + reset_s: chan<()> out, ) { - let (block_dec_in_s, block_dec_in_r) = chan("block_dec_in"); - let (seq_exec_in_s, seq_exec_in_r) = chan("seq_exec_in"); - let (repacketizer_in_s, repacketizer_in_r) = chan("repacketizer_in"); + const CHANNEL_DEPTH = u32:1; + + // CSRs + + let (ext_csr_rd_req_s, ext_csr_rd_req_r) = chan("csr_rd_req"); + let (ext_csr_rd_resp_s, ext_csr_rd_resp_r) = chan("csr_rd_resp"); + let (ext_csr_wr_req_s, ext_csr_wr_req_r) = chan("csr_wr_req"); + let (ext_csr_wr_resp_s, ext_csr_wr_resp_r) = chan("csr_wr_resp"); + + let (csr_rd_req_s, csr_rd_req_r) = chan("csr_rd_req"); + let (csr_rd_resp_s, csr_rd_resp_r) = chan("csr_rd_resp"); + let (csr_wr_req_s, csr_wr_req_r) = chan("csr_wr_req"); + let (csr_wr_resp_s, csr_wr_resp_r) = chan("csr_wr_resp"); + + let (csr_change_s, csr_change_r) = chan("csr_change"); - spawn block_dec::BlockDecoder(block_dec_in_r, seq_exec_in_s); + spawn axi_csr_accessor::AxiCsrAccessor( + csr_axi_aw_r, csr_axi_w_r, csr_axi_b_s, // csr write from AXI + csr_axi_ar_r, csr_axi_r_s, // csr read from AXI + ext_csr_rd_req_s, ext_csr_rd_resp_r, // csr read to CsrConfig + ext_csr_wr_req_s, ext_csr_wr_resp_r, // csr write to CsrConfig + ); + + spawn csr_config::CsrConfig( + ext_csr_rd_req_r, ext_csr_rd_resp_s, // csr read from AxiCsrAccessor + ext_csr_wr_req_r, ext_csr_wr_resp_s, // csr write from AxiCsrAccessor + csr_rd_req_r, csr_rd_resp_s, // csr read from design + csr_wr_req_r, csr_wr_resp_s, // csr write from design + csr_change_s, // notification about csr change + ); + + // Frame Header + + let (fh_mem_rd_req_s, fh_mem_rd_req_r) = chan("fh_mem_rd_req"); + let (fh_mem_rd_resp_s, fh_mem_rd_resp_r) = chan("fh_mem_rd_resp"); + + spawn mem_reader::MemReader( + fh_mem_rd_req_r, fh_mem_rd_resp_s, + fh_axi_ar_s, fh_axi_r_r, + ); + + let (fh_req_s, fh_req_r) = chan("fh_req"); + let (fh_resp_s, fh_resp_r) = chan("fh_resp"); + + spawn frame_header_dec::FrameHeaderDecoder( + fh_mem_rd_req_s, fh_mem_rd_resp_r, + fh_req_r, fh_resp_s, + ); + + // Block Header + + let (bh_mem_rd_req_s, bh_mem_rd_req_r) = chan("bh_mem_rd_req"); + let (bh_mem_rd_resp_s, bh_mem_rd_resp_r) = chan("bh_mem_rd_resp"); + + spawn mem_reader::MemReader( + bh_mem_rd_req_r, bh_mem_rd_resp_s, + bh_axi_ar_s, bh_axi_r_r, + ); + + let (bh_req_s, bh_req_r) = chan("bh_req"); + let (bh_resp_s, bh_resp_r) = chan("bh_resp"); + + spawn block_header_dec::BlockHeaderDecoder( + bh_req_r, bh_resp_s, + bh_mem_rd_req_s, bh_mem_rd_resp_r, + ); + + // Raw Block Decoder - spawn sequence_executor::SequenceExecutor( - seq_exec_in_r, repacketizer_in_s, - looped_channel_r, looped_channel_s, - ram_rd_req_0_s, ram_rd_req_1_s, ram_rd_req_2_s, ram_rd_req_3_s, - ram_rd_req_4_s, ram_rd_req_5_s, ram_rd_req_6_s, ram_rd_req_7_s, + let (raw_mem_rd_req_s, raw_mem_rd_req_r) = chan("raw_mem_rd_req"); + let (raw_mem_rd_resp_s, raw_mem_rd_resp_r) = chan("raw_mem_rd_resp"); + + spawn mem_reader::MemReader( + raw_mem_rd_req_r, raw_mem_rd_resp_s, + raw_axi_ar_s, raw_axi_r_r, + ); + + let (raw_req_s, raw_req_r) = chan("raw_req"); + let (raw_resp_s, raw_resp_r) = chan("raw_resp"); + let (raw_output_s, raw_output_r) = chan("raw_output"); + + spawn raw_block_dec::RawBlockDecoder( + raw_req_r, raw_resp_s, raw_output_s, + raw_mem_rd_req_s, raw_mem_rd_resp_r, + ); + + // RLE Block Decoder + + let (rle_req_s, rle_req_r) = chan("rle_req"); + let (rle_resp_s, rle_resp_r) = chan("rle_resp"); + let (rle_output_s, rle_output_r) = chan("rle_output"); + + spawn rle_block_dec::RleBlockDecoder( + rle_req_r, rle_resp_s, rle_output_s + ); + + // Collecting Packets + + let (cmp_output_s, cmp_output_r) = chan("cmp_output"); + let (seq_exec_input_s, seq_exec_input_r) = chan("demux_output"); + + spawn dec_mux::DecoderMux( + raw_output_r, rle_output_r, cmp_output_r, + seq_exec_input_s, + ); + + // Sequence Execution + let (seq_exec_looped_s, seq_exec_looped_r) = chan("seq_exec_looped"); + let (output_mem_wr_data_in_s, output_mem_wr_data_in_r) = chan("output_mem_wr_data_in"); + + spawn sequence_executor::SequenceExecutor( + seq_exec_input_r, output_mem_wr_data_in_s, + seq_exec_looped_r, seq_exec_looped_s, + ram_rd_req_0_s, ram_rd_req_1_s, ram_rd_req_2_s, ram_rd_req_3_s, + ram_rd_req_4_s, ram_rd_req_5_s, ram_rd_req_6_s, ram_rd_req_7_s, ram_rd_resp_0_r, ram_rd_resp_1_r, ram_rd_resp_2_r, ram_rd_resp_3_r, ram_rd_resp_4_r, ram_rd_resp_5_r, ram_rd_resp_6_r, ram_rd_resp_7_r, - ram_wr_req_0_s, ram_wr_req_1_s, ram_wr_req_2_s, ram_wr_req_3_s, - ram_wr_req_4_s, ram_wr_req_5_s, ram_wr_req_6_s, ram_wr_req_7_s, + ram_wr_req_0_s, ram_wr_req_1_s, ram_wr_req_2_s, ram_wr_req_3_s, + ram_wr_req_4_s, ram_wr_req_5_s, ram_wr_req_6_s, ram_wr_req_7_s, ram_wr_resp_0_r, ram_wr_resp_1_r, ram_wr_resp_2_r, ram_wr_resp_3_r, - ram_wr_resp_4_r, ram_wr_resp_5_r, ram_wr_resp_6_r, ram_wr_resp_7_r, + ram_wr_resp_4_r, ram_wr_resp_5_r, ram_wr_resp_6_r, ram_wr_resp_7_r + ); + + // Zstd Decoder Control + let (output_mem_wr_req_s, output_mem_wr_req_r) = chan("output_mem_wr_req"); + let (output_mem_wr_resp_s, output_mem_wr_resp_r) = chan("output_mem_wr_resp"); + + spawn mem_writer::MemWriter( + output_mem_wr_req_r, output_mem_wr_data_in_r, + output_axi_aw_s, output_axi_w_s, output_axi_b_r, output_mem_wr_resp_s + ); + + spawn ZstdDecoderInternal ( + csr_rd_req_s, csr_rd_resp_r, csr_wr_req_s, csr_wr_resp_r, csr_change_r, + fh_req_s, fh_resp_r, + bh_req_s, bh_resp_r, + raw_req_s, raw_resp_r, + rle_req_s, rle_resp_r, + output_mem_wr_req_s, output_mem_wr_resp_r, + notify_s, reset_s, ); - spawn repacketizer::Repacketizer(repacketizer_in_r, output_s); - - (input_r, block_dec_in_s, output_s, looped_channel_r, looped_channel_s, - ram_rd_req_0_s, ram_rd_req_1_s, ram_rd_req_2_s, ram_rd_req_3_s, - ram_rd_req_4_s, ram_rd_req_5_s, ram_rd_req_6_s, ram_rd_req_7_s, - ram_rd_resp_0_r, ram_rd_resp_1_r, ram_rd_resp_2_r, ram_rd_resp_3_r, - ram_rd_resp_4_r, ram_rd_resp_5_r, ram_rd_resp_6_r, ram_rd_resp_7_r, - ram_wr_req_0_s, ram_wr_req_1_s, ram_wr_req_2_s, ram_wr_req_3_s, - ram_wr_req_4_s, ram_wr_req_5_s, ram_wr_req_6_s, ram_wr_req_7_s, - ram_wr_resp_0_r, ram_wr_resp_1_r, ram_wr_resp_2_r, ram_wr_resp_3_r, - ram_wr_resp_4_r, ram_wr_resp_5_r, ram_wr_resp_6_r, ram_wr_resp_7_r) + (cmp_output_s,) } - next (state: ZstdDecoderState) { - let tok = join(); - trace_fmt!("zstd_dec: next(): state: {:#x}", state); - let can_fit = buff::buffer_can_fit(state.buffer, BlockData:0); - trace_fmt!("zstd_dec: next(): can_fit: {}", can_fit); - let (tok, data, recv_valid) = recv_if_non_blocking(tok, input_r, can_fit, BlockData:0); - let state = if (can_fit && recv_valid) { - let buffer = buff::buffer_append(state.buffer, data); - trace_fmt!("zstd_dec: next(): received more data: {:#x}", data); - ZstdDecoderState {buffer, ..state} - } else { - state - }; - trace_fmt!("zstd_dec: next(): state after receive: {:#x}", state); - - let (do_send, data_to_send, state) = match state.status { - ZstdDecoderStatus::DECODE_MAGIC_NUMBER => - decode_magic_number(state), - ZstdDecoderStatus::DECODE_FRAME_HEADER => - decode_frame_header(state), - ZstdDecoderStatus::DECODE_BLOCK_HEADER => - decode_block_header(state), - ZstdDecoderStatus::FEED_BLOCK_DECODER => - feed_block_decoder(state), - ZstdDecoderStatus::DECODE_CHECKSUM => - decode_checksum(state), - _ => (false, zero!(), state) - }; + next (state: ()) { + send_if(join(), cmp_output_s, false, zero!()); + } +} + +const INST_AXI_DATA_W = u32:64; +const INST_AXI_ADDR_W = u32:16; +const INST_AXI_ID_W = u32:4; +const INST_AXI_DEST_W = u32:4; +const INST_REGS_N = u32:16; +const INST_WINDOW_LOG_MAX = u32:30; +const INST_HB_ADDR_W = sequence_executor::ZSTD_RAM_ADDR_WIDTH; +const INST_HB_DATA_W = sequence_executor::RAM_DATA_WIDTH; +const INST_HB_NUM_PARTITIONS = sequence_executor::RAM_NUM_PARTITIONS; +const INST_HB_SIZE_KB = sequence_executor::ZSTD_HISTORY_BUFFER_SIZE_KB; + +const INST_LOG2_REGS_N = std::clog2(INST_REGS_N); +const INST_AXI_DATA_W_DIV8 = INST_AXI_DATA_W / u32:8; +const INST_HB_RAM_N = u32:8; + +proc ZstdDecoderInternalInst { + type State = ZstdDecoderInternalState; + type Fsm = ZstdDecoderInternalFsm; + + type CsrRdReq = csr_config::CsrRdReq; + type CsrRdResp = csr_config::CsrRdResp; + type CsrWrReq = csr_config::CsrWrReq; + type CsrWrResp = csr_config::CsrWrResp; + type CsrChange = csr_config::CsrChange; + + type FrameHeaderDecoderReq = frame_header_dec::FrameHeaderDecoderReq; + type FrameHeaderDecoderResp = frame_header_dec::FrameHeaderDecoderResp; + + type BlockHeaderDecoderReq = block_header_dec::BlockHeaderDecoderReq; + type BlockHeaderDecoderResp = block_header_dec::BlockHeaderDecoderResp; + + type RawBlockDecoderReq = raw_block_dec::RawBlockDecoderReq; + type RawBlockDecoderResp = raw_block_dec::RawBlockDecoderResp; + + type RleBlockDecoderReq = rle_block_dec::RleBlockDecoderReq; + type RleBlockDecoderResp = rle_block_dec::RleBlockDecoderResp; + + type MemWriterReq = mem_writer::MemWriterReq; + type MemWriterResp = mem_writer::MemWriterResp; + + init { } - trace_fmt!("zstd_dec: next(): do_send: {:#x}, data_to_send: {:#x}, state: {:#x}", do_send, data_to_send, state); - let tok = send_if(tok, block_dec_in_s, do_send, data_to_send); + config( + csr_rd_req_s: chan out, + csr_rd_resp_r: chan in, + csr_wr_req_s: chan out, + csr_wr_resp_r: chan in, + csr_change_r: chan in, + + // MemReader + FameHeaderDecoder + fh_req_s: chan out, + fh_resp_r: chan in, + + // MemReader + BlockHeaderDecoder + bh_req_s: chan out, + bh_resp_r: chan in, + + // MemReader + RawBlockDecoder + raw_req_s: chan out, + raw_resp_r: chan in, + + // MemReader + RleBlockDecoder + rle_req_s: chan out, + rle_resp_r: chan in, + + // Output MemWriter + output_mem_wr_req_s: chan out, + output_mem_wr_resp_r: chan in, + + // IRQ + notify_s: chan<()> out, + reset_s: chan<()> out, + ) { + spawn ZstdDecoderInternal< + INST_AXI_DATA_W, INST_AXI_ADDR_W, INST_REGS_N, + > ( + csr_rd_req_s, csr_rd_resp_r, csr_wr_req_s, csr_wr_resp_r, csr_change_r, + fh_req_s, fh_resp_r, + bh_req_s, bh_resp_r, + raw_req_s, raw_resp_r, + rle_req_s, rle_resp_r, + output_mem_wr_req_s, output_mem_wr_resp_r, + notify_s, reset_s, + ); - state } + + next(state: ()) {} } -const TEST_RAM_SIZE = sequence_executor::ram_size(ZSTD_HISTORY_BUFFER_SIZE_KB); -const RAM_WORD_PARTITION_SIZE = sequence_executor::RAM_WORD_PARTITION_SIZE; -const TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR = sequence_executor::TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR; -const TEST_RAM_INITIALIZED = sequence_executor::TEST_RAM_INITIALIZED; -const TEST_RAM_ASSERT_VALID_READ:bool = {false}; +proc ZstdDecoderInst { + type CsrAxiAr = axi::AxiAr; + type CsrAxiR = axi::AxiR; + type CsrAxiAw = axi::AxiAw; + type CsrAxiW = axi::AxiW; + type CsrAxiB = axi::AxiB; -pub proc ZstdDecoderTest { - input_r: chan in; - output_s: chan out; + type MemAxiAr = axi::AxiAr; + type MemAxiR = axi::AxiR; + type MemAxiAw = axi::AxiAw; + type MemAxiW = axi::AxiW; + type MemAxiB = axi::AxiB; - init {()} + type RamRdReq = ram::ReadReq; + type RamRdResp = ram::ReadResp; + type RamWrReq = ram::WriteReq; + type RamWrResp = ram::WriteResp; - config ( - input_r: chan in, - output_s: chan out, + type ZstdDecodedPacket = common::ZstdDecodedPacket; + + init { } + + config( + // AXI Ctrl (subordinate) + csr_axi_aw_r: chan in, + csr_axi_w_r: chan in, + csr_axi_b_s: chan out, + csr_axi_ar_r: chan in, + csr_axi_r_s: chan out, + + // AXI Frame Header Decoder (manager) + fh_axi_ar_s: chan out, + fh_axi_r_r: chan in, + + // AXI Block Header Decoder (manager) + bh_axi_ar_s: chan out, + bh_axi_r_r: chan in, + + // AXI RAW Block Decoder (manager) + raw_axi_ar_s: chan out, + raw_axi_r_r: chan in, + + //// AXI Output Writer (manager) + output_axi_aw_s: chan out, + output_axi_w_s: chan out, + output_axi_b_r: chan in, + + // History Buffer + ram_rd_req_0_s: chan out, + ram_rd_req_1_s: chan out, + ram_rd_req_2_s: chan out, + ram_rd_req_3_s: chan out, + ram_rd_req_4_s: chan out, + ram_rd_req_5_s: chan out, + ram_rd_req_6_s: chan out, + ram_rd_req_7_s: chan out, + ram_rd_resp_0_r: chan in, + ram_rd_resp_1_r: chan in, + ram_rd_resp_2_r: chan in, + ram_rd_resp_3_r: chan in, + ram_rd_resp_4_r: chan in, + ram_rd_resp_5_r: chan in, + ram_rd_resp_6_r: chan in, + ram_rd_resp_7_r: chan in, + ram_wr_req_0_s: chan out, + ram_wr_req_1_s: chan out, + ram_wr_req_2_s: chan out, + ram_wr_req_3_s: chan out, + ram_wr_req_4_s: chan out, + ram_wr_req_5_s: chan out, + ram_wr_req_6_s: chan out, + ram_wr_req_7_s: chan out, + ram_wr_resp_0_r: chan in, + ram_wr_resp_1_r: chan in, + ram_wr_resp_2_r: chan in, + ram_wr_resp_3_r: chan in, + ram_wr_resp_4_r: chan in, + ram_wr_resp_5_r: chan in, + ram_wr_resp_6_r: chan in, + ram_wr_resp_7_r: chan in, + + notify_s: chan<()> out, + reset_s: chan<()> out, ) { - let (looped_channel_s, looped_channel_r) = chan("looped_channel"); - - let (ram_rd_req_0_s, ram_rd_req_0_r) = chan, u32:1>("ram_rd_req_0"); - let (ram_rd_req_1_s, ram_rd_req_1_r) = chan, u32:1>("ram_rd_req_1"); - let (ram_rd_req_2_s, ram_rd_req_2_r) = chan, u32:1>("ram_rd_req_2"); - let (ram_rd_req_3_s, ram_rd_req_3_r) = chan, u32:1>("ram_rd_req_3"); - let (ram_rd_req_4_s, ram_rd_req_4_r) = chan, u32:1>("ram_rd_req_4"); - let (ram_rd_req_5_s, ram_rd_req_5_r) = chan, u32:1>("ram_rd_req_5"); - let (ram_rd_req_6_s, ram_rd_req_6_r) = chan, u32:1>("ram_rd_req_6"); - let (ram_rd_req_7_s, ram_rd_req_7_r) = chan, u32:1>("ram_rd_req_7"); - - let (ram_rd_resp_0_s, ram_rd_resp_0_r) = chan, u32:1>("ram_rd_resp_0"); - let (ram_rd_resp_1_s, ram_rd_resp_1_r) = chan, u32:1>("ram_rd_resp_1"); - let (ram_rd_resp_2_s, ram_rd_resp_2_r) = chan, u32:1>("ram_rd_resp_2"); - let (ram_rd_resp_3_s, ram_rd_resp_3_r) = chan, u32:1>("ram_rd_resp_3"); - let (ram_rd_resp_4_s, ram_rd_resp_4_r) = chan, u32:1>("ram_rd_resp_4"); - let (ram_rd_resp_5_s, ram_rd_resp_5_r) = chan, u32:1>("ram_rd_resp_5"); - let (ram_rd_resp_6_s, ram_rd_resp_6_r) = chan, u32:1>("ram_rd_resp_6"); - let (ram_rd_resp_7_s, ram_rd_resp_7_r) = chan, u32:1>("ram_rd_resp_7"); - - let (ram_wr_req_0_s, ram_wr_req_0_r) = chan, u32:1>("ram_wr_req_0"); - let (ram_wr_req_1_s, ram_wr_req_1_r) = chan, u32:1>("ram_wr_req_1"); - let (ram_wr_req_2_s, ram_wr_req_2_r) = chan, u32:1>("ram_wr_req_2"); - let (ram_wr_req_3_s, ram_wr_req_3_r) = chan, u32:1>("ram_wr_req_3"); - let (ram_wr_req_4_s, ram_wr_req_4_r) = chan, u32:1>("ram_wr_req_4"); - let (ram_wr_req_5_s, ram_wr_req_5_r) = chan, u32:1>("ram_wr_req_5"); - let (ram_wr_req_6_s, ram_wr_req_6_r) = chan, u32:1>("ram_wr_req_6"); - let (ram_wr_req_7_s, ram_wr_req_7_r) = chan, u32:1>("ram_wr_req_7"); - - let (ram_wr_resp_0_s, ram_wr_resp_0_r) = chan("ram_wr_resp_0"); - let (ram_wr_resp_1_s, ram_wr_resp_1_r) = chan("ram_wr_resp_1"); - let (ram_wr_resp_2_s, ram_wr_resp_2_r) = chan("ram_wr_resp_2"); - let (ram_wr_resp_3_s, ram_wr_resp_3_r) = chan("ram_wr_resp_3"); - let (ram_wr_resp_4_s, ram_wr_resp_4_r) = chan("ram_wr_resp_4"); - let (ram_wr_resp_5_s, ram_wr_resp_5_r) = chan("ram_wr_resp_5"); - let (ram_wr_resp_6_s, ram_wr_resp_6_r) = chan("ram_wr_resp_6"); - let (ram_wr_resp_7_s, ram_wr_resp_7_r) = chan("ram_wr_resp_7"); - - spawn ZstdDecoder( - input_r, output_s, - looped_channel_r, looped_channel_s, - ram_rd_req_0_s, ram_rd_req_1_s, ram_rd_req_2_s, ram_rd_req_3_s, - ram_rd_req_4_s, ram_rd_req_5_s, ram_rd_req_6_s, ram_rd_req_7_s, + spawn ZstdDecoder< + INST_AXI_DATA_W, INST_AXI_ADDR_W, INST_AXI_ID_W, INST_AXI_DEST_W, + INST_REGS_N, INST_WINDOW_LOG_MAX, + INST_HB_ADDR_W, INST_HB_DATA_W, INST_HB_NUM_PARTITIONS, INST_HB_SIZE_KB, + >( + csr_axi_aw_r, csr_axi_w_r, csr_axi_b_s, csr_axi_ar_r, csr_axi_r_s, + fh_axi_ar_s, fh_axi_r_r, + bh_axi_ar_s, bh_axi_r_r, + raw_axi_ar_s, raw_axi_r_r, + output_axi_aw_s, output_axi_w_s, output_axi_b_r, + ram_rd_req_0_s, ram_rd_req_1_s, ram_rd_req_2_s, ram_rd_req_3_s, + ram_rd_req_4_s, ram_rd_req_5_s, ram_rd_req_6_s, ram_rd_req_7_s, ram_rd_resp_0_r, ram_rd_resp_1_r, ram_rd_resp_2_r, ram_rd_resp_3_r, ram_rd_resp_4_r, ram_rd_resp_5_r, ram_rd_resp_6_r, ram_rd_resp_7_r, - ram_wr_req_0_s, ram_wr_req_1_s, ram_wr_req_2_s, ram_wr_req_3_s, - ram_wr_req_4_s, ram_wr_req_5_s, ram_wr_req_6_s, ram_wr_req_7_s, + ram_wr_req_0_s, ram_wr_req_1_s, ram_wr_req_2_s, ram_wr_req_3_s, + ram_wr_req_4_s, ram_wr_req_5_s, ram_wr_req_6_s, ram_wr_req_7_s, ram_wr_resp_0_r, ram_wr_resp_1_r, ram_wr_resp_2_r, ram_wr_resp_3_r, ram_wr_resp_4_r, ram_wr_resp_5_r, ram_wr_resp_6_r, ram_wr_resp_7_r, + notify_s, reset_s, ); - - spawn ram::RamModel< - RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, - TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> - (ram_rd_req_0_r, ram_rd_resp_0_s, ram_wr_req_0_r, ram_wr_resp_0_s); - spawn ram::RamModel< - RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, - TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> - (ram_rd_req_1_r, ram_rd_resp_1_s, ram_wr_req_1_r, ram_wr_resp_1_s); - spawn ram::RamModel< - RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, - TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> - (ram_rd_req_2_r, ram_rd_resp_2_s, ram_wr_req_2_r, ram_wr_resp_2_s); - spawn ram::RamModel< - RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, - TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> - (ram_rd_req_3_r, ram_rd_resp_3_s, ram_wr_req_3_r, ram_wr_resp_3_s); - spawn ram::RamModel< - RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, - TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> - (ram_rd_req_4_r, ram_rd_resp_4_s, ram_wr_req_4_r, ram_wr_resp_4_s); - spawn ram::RamModel< - RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, - TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> - (ram_rd_req_5_r, ram_rd_resp_5_s, ram_wr_req_5_r, ram_wr_resp_5_s); - spawn ram::RamModel< - RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, - TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> - (ram_rd_req_6_r, ram_rd_resp_6_s, ram_wr_req_6_r, ram_wr_resp_6_s); - spawn ram::RamModel< - RAM_DATA_WIDTH, TEST_RAM_SIZE, RAM_WORD_PARTITION_SIZE, - TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, TEST_RAM_ASSERT_VALID_READ> - (ram_rd_req_7_r, ram_rd_resp_7_s, ram_wr_req_7_r, ram_wr_resp_7_s); - - (input_r, output_s) } next (state: ()) {} diff --git a/xls/modules/zstd/zstd_dec_cocotb_test.py b/xls/modules/zstd/zstd_dec_cocotb_test.py new file mode 100644 index 0000000000..618d0591f9 --- /dev/null +++ b/xls/modules/zstd/zstd_dec_cocotb_test.py @@ -0,0 +1,335 @@ +#!/usr/bin/env python +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from enum import Enum +from pathlib import Path +import tempfile + +import cocotb +from cocotb.clock import Clock +from cocotb.triggers import ClockCycles, Event +from cocotb.binary import BinaryValue +from cocotb_bus.scoreboard import Scoreboard + +from cocotbext.axi.axi_master import AxiMaster +from cocotbext.axi.axi_channels import AxiAWBus, AxiWBus, AxiBBus, AxiWriteBus, AxiARBus, AxiRBus, AxiReadBus, AxiBus, AxiBTransaction, AxiBSource, AxiBSink, AxiBMonitor, AxiRTransaction, AxiRSource, AxiRSink, AxiRMonitor +from cocotbext.axi.axi_ram import AxiRam +from cocotbext.axi.sparse_memory import SparseMemory + +from xls.common import runfiles +from xls.modules.zstd.cocotb.channel import ( + XLSChannel, + XLSChannelDriver, + XLSChannelMonitor, +) +from xls.modules.zstd.cocotb.data_generator import GenerateFrame, DecompressFrame, BlockType +from xls.modules.zstd.cocotb.memory import init_axi_mem, AxiRamFromFile +from xls.modules.zstd.cocotb.utils import reset, run_test +from xls.modules.zstd.cocotb.xlsstruct import XLSStruct, xls_dataclass + +AXI_DATA_W = 64 +AXI_DATA_W_BYTES = AXI_DATA_W // 8 +MAX_ENCODED_FRAME_SIZE_B = 16384 +NOTIFY_CHANNEL = "notify" +RESET_CHANNEL = "reset" + +# Override default widths of AXI response signals +signal_widths = {"bresp": 3} +AxiBBus._signal_widths = signal_widths +AxiBTransaction._signal_widths = signal_widths +AxiBSource._signal_widths = signal_widths +AxiBSink._signal_widths = signal_widths +AxiBMonitor._signal_widths = signal_widths +signal_widths = {"rresp": 3, "rlast": 1} +AxiRBus._signal_widths = signal_widths +AxiRTransaction._signal_widths = signal_widths +AxiRSource._signal_widths = signal_widths +AxiRSink._signal_widths = signal_widths +AxiRMonitor._signal_widths = signal_widths + +@xls_dataclass +class NotifyStruct(XLSStruct): + pass + +@xls_dataclass +class ResetStruct(XLSStruct): + pass + +class CSR(Enum): + """ + Maps the offsets to the ZSTD Decoder registers + """ + Status = 0 + Start = 1 + Reset = 2 + InputBuffer = 3 + OutputBuffer = 4 + +class Status(Enum): + """ + Codes for the Status register + """ + IDLE = 0x0 + RUNNING = 0x1 + +def set_termination_event(monitor, event, transactions): + def terminate_cb(_): + if monitor.stats.received_transactions == transactions: + event.set() + monitor.add_callback(terminate_cb) + +def connect_axi_read_bus(dut, name=""): + AXI_AR = "axi_ar" + AXI_R = "axi_r" + + if name != "": + name += "_" + + bus_axi_ar = AxiARBus.from_prefix(dut, name + AXI_AR) + bus_axi_r = AxiRBus.from_prefix(dut, name + AXI_R) + + return AxiReadBus(bus_axi_ar, bus_axi_r) + +def connect_axi_write_bus(dut, name=""): + AXI_AW = "axi_aw" + AXI_W = "axi_w" + AXI_B = "axi_b" + + if name != "": + name += "_" + + bus_axi_aw = AxiAWBus.from_prefix(dut, name + AXI_AW) + bus_axi_w = AxiWBus.from_prefix(dut, name + AXI_W) + bus_axi_b = AxiBBus.from_prefix(dut, name + AXI_B) + + return AxiWriteBus(bus_axi_aw, bus_axi_w, bus_axi_b) + +def connect_axi_bus(dut, name=""): + bus_axi_read = connect_axi_read_bus(dut, name) + bus_axi_write = connect_axi_write_bus(dut, name) + + return AxiBus(bus_axi_write, bus_axi_read) + +async def csr_write(cpu, csr, data): + if type(data) is int: + data = data.to_bytes(AXI_DATA_W_BYTES, byteorder='little') + assert len(data) <= AXI_DATA_W_BYTES + await cpu.write(csr.value * AXI_DATA_W_BYTES, data) + +async def csr_read(cpu, csr): + return await cpu.read(csr.value * AXI_DATA_W_BYTES, AXI_DATA_W_BYTES) + +async def test_csr(dut): + + clock = Clock(dut.clk, 10, units="us") + cocotb.start_soon(clock.start()) + + await reset_dut(dut, 50) + + csr_bus = connect_axi_bus(dut, "csr") + + cpu = AxiMaster(csr_bus, dut.clk, dut.rst) + + await ClockCycles(dut.clk, 10) + i = 0 + for reg in CSR: + # Reset CSR tested in a separate test case + if (reg == CSR.Reset): + continue + expected_src = bytearray.fromhex("0DF0AD8BEFBEADDE") + assert len(expected_src) >= AXI_DATA_W_BYTES + expected = expected_src[-AXI_DATA_W_BYTES:] + expected[0] += i + await csr_write(cpu, reg, expected) + read = await csr_read(cpu, reg) + assert read.data == expected, "Expected data doesn't match contents of the {}".format(reg) + i += 1 + await ClockCycles(dut.clk, 10) + +async def test_reset(dut): + clock = Clock(dut.clk, 10, units="us") + cocotb.start_soon(clock.start()) + + await reset_dut(dut, 50) + + (reset_channel, reset_monitor) = connect_xls_channel(dut, RESET_CHANNEL, ResetStruct) + + csr_bus = connect_axi_bus(dut, "csr") + cpu = AxiMaster(csr_bus, dut.clk, dut.rst) + + scoreboard = Scoreboard(dut) + + rst_struct = ResetStruct() + # Expect single reset signal on reset output channel + expected_reset = [rst_struct] + scoreboard.add_interface(reset_monitor, expected_reset) + + await ClockCycles(dut.clk, 10) + await start_decoder(cpu) + timeout = 10 + status = await csr_read(cpu, CSR.Status) + while ((int.from_bytes(status.data, byteorder='little') == Status.IDLE.value) & (timeout != 0)): + status = await csr_read(cpu, CSR.Status) + timeout -= 1 + assert (timeout != 0) + + await csr_write(cpu, CSR.Reset, 0x1) + await wait_for_idle(cpu, 10) + + await ClockCycles(dut.clk, 10) + +async def configure_decoder(cpu, ibuf_addr, obuf_addr): + status = await csr_read(cpu, CSR.Status) + if int.from_bytes(status.data, byteorder='little') != Status.IDLE.value: + await csr_write(cpu, CSR.Reset, 0x1) + await csr_write(cpu, CSR.InputBuffer, ibuf_addr) + await csr_write(cpu, CSR.OutputBuffer, obuf_addr) + +async def start_decoder(cpu): + await csr_write(cpu, CSR.Start, 0x1) + +async def wait_for_idle(cpu, timeout=100): + status = await csr_read(cpu, CSR.Status) + while ((int.from_bytes(status.data, byteorder='little') != Status.IDLE.value) & (timeout != 0)): + status = await csr_read(cpu, CSR.Status) + timeout -= 1 + assert (timeout != 0) + +async def reset_dut(dut, rst_len=10): + dut.rst.setimmediatevalue(0) + await ClockCycles(dut.clk, rst_len) + dut.rst.setimmediatevalue(1) + await ClockCycles(dut.clk, rst_len) + dut.rst.setimmediatevalue(0) + +def connect_xls_channel(dut, channel_name, xls_struct): + channel = XLSChannel(dut, channel_name, dut.clk, start_now=True) + monitor = XLSChannelMonitor(dut, channel_name, dut.clk, xls_struct) + + return (channel, monitor) + +def prepare_test_environment(dut): + clock = Clock(dut.clk, 10, units="us") + cocotb.start_soon(clock.start()) + + memory_bus = connect_axi_bus(dut, "memory") + csr_bus = connect_axi_bus(dut, "csr") + axi_buses = { + "memory": memory_bus, + "csr": csr_bus + } + + cpu = AxiMaster(csr_bus, dut.clk, dut.rst) + + return (axi_buses, cpu) + +async def test_decoder(dut, seed, block_type, axi_buses, cpu): + memory_bus = axi_buses["memory"] + csr_bus = axi_buses["csr"] + + (notify_channel, notify_monitor) = connect_xls_channel(dut, NOTIFY_CHANNEL, NotifyStruct) + assert_notify = Event() + set_termination_event(notify_monitor, assert_notify, 1) + + mem_size = MAX_ENCODED_FRAME_SIZE_B + ibuf_addr = 0x0 + obuf_addr = mem_size // 2 + + #FIXME: use delete_on_close=False after moving to python 3.12 + with tempfile.NamedTemporaryFile(delete=False) as encoded: + await reset_dut(dut, 50) + + # Generate ZSTD frame to temporary file + GenerateFrame(seed, block_type, encoded.name) + + expected_decoded_frame = DecompressFrame(encoded.read()) + encoded.close() + reference_memory = SparseMemory(mem_size) + reference_memory.write(obuf_addr, expected_decoded_frame) + + # Initialise testbench memory with generated ZSTD frame + memory = AxiRamFromFile(bus=memory_bus, clock=dut.clk, reset=dut.rst, path=encoded.name, size=mem_size) + + await configure_decoder(cpu, ibuf_addr, obuf_addr) + await start_decoder(cpu) + await assert_notify.wait() + await wait_for_idle(cpu) + # Read decoded frame in chunks of AXI_DATA_W length + # Compare against frame decompressed with the reference library + for read_op in range(0, ((len(expected_decoded_frame) + (AXI_DATA_W_BYTES - 1)) // AXI_DATA_W_BYTES)): + addr = obuf_addr + (read_op * AXI_DATA_W_BYTES) + mem_contents = memory.read(addr, AXI_DATA_W_BYTES) + exp_mem_contents = reference_memory.read(addr, AXI_DATA_W_BYTES) + assert mem_contents == exp_mem_contents, "{} bytes of memory contents at address {} don't match the expected contents:\n{}\nvs\n{}".format(AXI_DATA_W_BYTES, hex(addr), hex(int.from_bytes(mem_contents, byteorder='little')), hex(int.from_bytes(exp_mem_contents, byteorder='little'))) + + await ClockCycles(dut.clk, 20) + +async def testing_routine(dut, test_cases=1, block_type=BlockType.RANDOM): + (axi_buses, cpu) = prepare_test_environment(dut) + for test_case in range(test_cases): + await test_decoder(dut, test_case, block_type, axi_buses, cpu) + print("Decoding {} ZSTD frames done".format(block_type.name)) + +@cocotb.test(timeout_time=50, timeout_unit="ms") +async def zstd_csr_test(dut): + await test_csr(dut) + +@cocotb.test(timeout_time=50, timeout_unit="ms") +async def zstd_reset_test(dut): + await test_reset(dut) + +@cocotb.test(timeout_time=500, timeout_unit="ms") +async def zstd_raw_frames_test(dut): + test_cases = 5 + block_type = BlockType.RAW + await testing_routine(dut, test_cases, block_type) + +@cocotb.test(timeout_time=500, timeout_unit="ms") +async def zstd_rle_frames_test(dut): + test_cases = 5 + block_type = BlockType.RLE + await testing_routine(dut, test_cases, block_type) + +#@cocotb.test(timeout_time=1000, timeout_unit="ms") +#async def zstd_compressed_frames_test(dut): +# test_cases = 1 +# block_type = BlockType.COMPRESSED +# await testing_routine(dut, test_cases, block_type) + +#@cocotb.test(timeout_time=1000, timeout_unit="ms") +#async def zstd_random_frames_test(dut): +# test_cases = 1 +# block_type = BlockType.RANDOM +# await testing_routine(dut, test_cases, block_type) + +if __name__ == "__main__": + toplevel = "zstd_dec_wrapper" + verilog_sources = [ + "xls/modules/zstd/zstd_dec.v", + "xls/modules/zstd/xls_fifo_wrapper.v", + "xls/modules/zstd/zstd_dec_wrapper.v", + "xls/modules/zstd/external/axi_crossbar_wrapper.v", + "xls/modules/zstd/external/axi_crossbar.v", + "xls/modules/zstd/external/axi_crossbar_rd.v", + "xls/modules/zstd/external/axi_crossbar_wr.v", + "xls/modules/zstd/external/axi_crossbar_addr.v", + "xls/modules/zstd/external/axi_register_rd.v", + "xls/modules/zstd/external/axi_register_wr.v", + "xls/modules/zstd/external/arbiter.v", + "xls/modules/zstd/external/priority_encoder.v", + ] + test_module=[Path(__file__).stem] + run_test(toplevel, test_module, verilog_sources) diff --git a/xls/modules/zstd/zstd_dec_test.cc b/xls/modules/zstd/zstd_dec_test.cc deleted file mode 100644 index 0a6679a11d..0000000000 --- a/xls/modules/zstd/zstd_dec_test.cc +++ /dev/null @@ -1,297 +0,0 @@ -// Copyright 2024 The XLS Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// - -#include -#include -#include -#include -#include -#include -#include // NOLINT -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "gtest/gtest.h" -#include "absl/container/flat_hash_map.h" -#include "absl/log/log.h" -#include "absl/status/statusor.h" -#include "absl/types/span.h" -#include "xls/common/file/filesystem.h" -#include "xls/common/file/get_runfile_path.h" -#include "xls/common/status/matchers.h" -#include "xls/common/status/ret_check.h" -#include "xls/interpreter/channel_queue.h" -#include "xls/interpreter/serial_proc_runtime.h" -#include "xls/ir/bits.h" -#include "xls/ir/channel.h" -#include "xls/ir/events.h" -#include "xls/ir/ir_parser.h" -#include "xls/ir/package.h" -#include "xls/ir/proc.h" -#include "xls/ir/value.h" -#include "xls/jit/jit_proc_runtime.h" -#include "xls/modules/zstd/data_generator.h" -#include "external/zstd/lib/zstd.h" - -namespace xls { -namespace { - -class ZstdDecodedPacket { - public: - static absl::StatusOr MakeZstdDecodedPacket( - const Value& packet) { - // Expect tuple - XLS_RET_CHECK(packet.IsTuple()); - // Expect exactly 3 fields - XLS_RET_CHECK(packet.size() == 3); - for (int i = 0; i < 3; i++) { - // Expect fields to be Bits - XLS_RET_CHECK(packet.element(i).IsBits()); - // All fields must fit in 64 bits - XLS_RET_CHECK(packet.element(i).bits().FitsInUint64()); - } - - std::vector data = packet.element(0).bits().ToBytes(); - absl::StatusOr len = packet.element(1).bits().ToUint64(); - XLS_RET_CHECK(len.ok()); - uint64_t length = *len; - bool last = packet.element(2).bits().IsOne(); - - return ZstdDecodedPacket(data, length, last); - } - - std::vector& GetData() { return data; } - - uint64_t GetLength() { return length; } - - bool IsLast() { return last; } - - std::string ToString() const { - std::stringstream s; - for (int j = 0; j < sizeof(uint64_t) && j < data.size(); j++) { - s << "0x" << std::setw(2) << std::setfill('0') << std::right << std::hex - << static_cast(data[j]) << std::dec << ", "; - } - return s.str(); - } - - private: - ZstdDecodedPacket(std::vector data, uint64_t length, bool last) - : data(std::move(data)), length(length), last(last) {} - - std::vector data; - uint64_t length; - bool last; -}; - -class ZstdDecoderTest : public ::testing::Test { - public: - void SetUp() override { - XLS_ASSERT_OK_AND_ASSIGN(std::filesystem::path ir_path, - xls::GetXlsRunfilePath(this->kIrFile)); - XLS_ASSERT_OK_AND_ASSIGN(std::string ir_text, - xls::GetFileContents(ir_path)); - XLS_ASSERT_OK_AND_ASSIGN(this->package, xls::Parser::ParsePackage(ir_text)); - XLS_ASSERT_OK_AND_ASSIGN(this->interpreter, - CreateJitSerialProcRuntime(this->package.get())); - - auto& queue_manager = this->interpreter->queue_manager(); - XLS_ASSERT_OK_AND_ASSIGN( - this->recv_queue, queue_manager.GetQueueByName(this->kRecvChannelName)); - XLS_ASSERT_OK_AND_ASSIGN( - this->send_queue, queue_manager.GetQueueByName(this->kSendChannelName)); - } - - void PrintTraceMessages(const std::string& pname) { - XLS_ASSERT_OK_AND_ASSIGN(Proc * proc, this->package->GetProc(pname)); - const InterpreterEvents& events = - this->interpreter->GetInterpreterEvents(proc); - - if (!events.trace_msgs.empty()) { - for (const auto& tm : events.trace_msgs) { - LOG(INFO) << "[TRACE] " << tm.message << "\n"; - } - } - } - - const std::string_view kProcName = "__zstd_dec__ZstdDecoderTest_0_next"; - const std::string_view kRecvChannelName = "zstd_dec__output_s"; - const std::string_view kSendChannelName = "zstd_dec__input_r"; - - const std::string_view kIrFile = "xls/modules/zstd/zstd_dec_test.ir"; - - std::unique_ptr package; - std::unique_ptr interpreter; - ChannelQueue *recv_queue, *send_queue; - - void PrintVector(absl::Span vec) { - for (int i = 0; i < vec.size(); i += 8) { - LOG(INFO) << "0x" << std::hex << std::setw(3) << std::left << i - << std::dec << ": "; - for (int j = 0; j < sizeof(uint64_t) && (i + j) < vec.size(); j++) { - LOG(INFO) << std::setfill('0') << std::setw(2) << std::hex - << static_cast(vec[i + j]) << std::dec << " "; - } - LOG(INFO) << "\n"; - } - } - - void DecompressWithLibZSTD(std::vector encoded_frame, - std::vector& decoded_frame) { - size_t buff_out_size = ZSTD_DStreamOutSize(); - uint8_t* const buff_out = new uint8_t[buff_out_size]; - - ZSTD_DCtx* const dctx = ZSTD_createDCtx(); - EXPECT_FALSE(dctx == nullptr); - - void* const frame = static_cast(encoded_frame.data()); - size_t const frame_size = encoded_frame.size(); - // Put the whole frame in the buffer - ZSTD_inBuffer input_buffer = {frame, frame_size, 0}; - - while (input_buffer.pos < input_buffer.size) { - ZSTD_outBuffer output_buffer = {buff_out, buff_out_size, 0}; - size_t decomp_result = - ZSTD_decompressStream(dctx, &output_buffer, &input_buffer); - bool decomp_success = ZSTD_isError(decomp_result) != 0u; - EXPECT_FALSE(decomp_success); - - // Append output buffer contents to output vector - decoded_frame.insert( - decoded_frame.end(), static_cast(output_buffer.dst), - (static_cast(output_buffer.dst) + output_buffer.pos)); - - EXPECT_TRUE(decomp_result == 0 && output_buffer.pos < output_buffer.size); - } - - ZSTD_freeDCtx(dctx); - delete[] buff_out; - } - - void ParseAndCompareWithZstd(std::vector frame) { - std::vector lib_decomp; - DecompressWithLibZSTD(frame, lib_decomp); - size_t lib_decomp_size = lib_decomp.size(); - std::cerr << "lib_decomp_size: " << lib_decomp_size << "\n"; - - std::vector sim_decomp; - size_t sim_decomp_size_words = - (lib_decomp_size + sizeof(uint64_t) - 1) / sizeof(uint64_t); - size_t sim_decomp_size_bytes = - (lib_decomp_size + sizeof(uint64_t) - 1) * sizeof(uint64_t); - sim_decomp.reserve(sim_decomp_size_bytes); - - // Send compressed frame to decoder simulation - for (int i = 0; i < frame.size(); i += 8) { - // Pad packet w/ zeros to match the frame size expected by the design. - std::array packet_data = {}; - auto frame_packet_begin = frame.begin() + i; - auto frame_packet_end = frame_packet_begin + 8 < frame.end() - ? frame_packet_begin + 8 - : frame.end(); - std::copy(frame_packet_begin, frame_packet_end, packet_data.begin()); - auto span = absl::MakeSpan(packet_data.data(), 8); - auto value = Value(Bits::FromBytes(span, 64)); - XLS_EXPECT_OK(this->send_queue->Write(value)); - XLS_EXPECT_OK(this->interpreter->Tick()); - } - PrintTraceMessages("__zstd_dec__ZstdDecoderTest_0_next"); - - // Tick decoder simulation until we get expected amount of output data - // batches on output channel queue - std::optional ticks_timeout = std::nullopt; - absl::flat_hash_map output_counts = { - {this->recv_queue->channel(), sim_decomp_size_words}}; - XLS_EXPECT_OK( - this->interpreter->TickUntilOutput(output_counts, ticks_timeout)); - - // Read decompressed data from output channel queue - for (int i = 0; i < sim_decomp_size_words; i++) { - auto read_value = this->recv_queue->Read(); - EXPECT_EQ(read_value.has_value(), true); - auto packet = - ZstdDecodedPacket::MakeZstdDecodedPacket(read_value.value()); - XLS_EXPECT_OK(packet); - auto word_vec = packet->GetData(); - auto valid_length = packet->GetLength() / CHAR_BIT; - std::copy(begin(word_vec), begin(word_vec) + valid_length, - back_inserter(sim_decomp)); - } - - EXPECT_EQ(lib_decomp_size, sim_decomp.size()); - for (int i = 0; i < lib_decomp_size; i++) { - EXPECT_EQ(lib_decomp[i], sim_decomp[i]); - } - } -}; - -/* TESTS */ - -TEST_F(ZstdDecoderTest, ParseFrameWithRawBlocks) { - int seed = 3; // Arbitrary seed value for small ZSTD frame - auto frame = zstd::GenerateFrame(seed, zstd::BlockType::RAW); - EXPECT_TRUE(frame.ok()); - this->ParseAndCompareWithZstd(frame.value()); -} - -TEST_F(ZstdDecoderTest, ParseFrameWithRleBlocks) { - int seed = 3; // Arbitrary seed value for small ZSTD frame - auto frame = zstd::GenerateFrame(seed, zstd::BlockType::RLE); - EXPECT_TRUE(frame.ok()); - this->ParseAndCompareWithZstd(frame.value()); -} - -class ZstdDecoderSeededTest : public ZstdDecoderTest, - public ::testing::WithParamInterface { - public: - static const uint32_t seed_generator_start = 0; - static const uint32_t random_frames_count = 100; -}; - -// Test `random_frames_count` instances of randomly generated valid -// frames, generated with `decodecorpus` tool. - -TEST_P(ZstdDecoderSeededTest, ParseMultipleFramesWithRawBlocks) { - auto seed = GetParam(); - auto frame = zstd::GenerateFrame(seed, zstd::BlockType::RAW); - EXPECT_TRUE(frame.ok()); - this->ParseAndCompareWithZstd(frame.value()); -} - -TEST_P(ZstdDecoderSeededTest, ParseMultipleFramesWithRleBlocks) { - auto seed = GetParam(); - auto frame = zstd::GenerateFrame(seed, zstd::BlockType::RLE); - EXPECT_TRUE(frame.ok()); - this->ParseAndCompareWithZstd(frame.value()); -} - -INSTANTIATE_TEST_SUITE_P( - ZstdDecoderSeededTest, ZstdDecoderSeededTest, - ::testing::Range(ZstdDecoderSeededTest::seed_generator_start, - ZstdDecoderSeededTest::seed_generator_start + - ZstdDecoderSeededTest::random_frames_count)); - -} // namespace -} // namespace xls diff --git a/xls/modules/zstd/zstd_dec_test.x b/xls/modules/zstd/zstd_dec_test.x new file mode 100644 index 0000000000..bd90210ef1 --- /dev/null +++ b/xls/modules/zstd/zstd_dec_test.x @@ -0,0 +1,437 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +import std; +import xls.examples.ram; +import xls.modules.zstd.common; +import xls.modules.zstd.memory.axi; +import xls.modules.zstd.csr_config; +import xls.modules.zstd.sequence_executor; +import xls.modules.zstd.zstd_frame_testcases; +import xls.modules.zstd.memory.axi_ram; +import xls.modules.zstd.zstd_dec; + +const TEST_WINDOW_LOG_MAX = u32:30; + +const TEST_AXI_DATA_W = u32:64; +const TEST_AXI_ADDR_W = u32:32; +const TEST_AXI_ID_W = u32:8; +const TEST_AXI_DEST_W = u32:8; +const TEST_AXI_DATA_W_DIV8 = TEST_AXI_DATA_W / u32:8; + +const TEST_REGS_N = u32:5; +const TEST_LOG2_REGS_N = std::clog2(TEST_REGS_N); + +const TEST_HB_RAM_N = u32:8; +const TEST_HB_ADDR_W = sequence_executor::ZSTD_RAM_ADDR_WIDTH; +const TEST_HB_DATA_W = sequence_executor::RAM_DATA_WIDTH; +const TEST_HB_NUM_PARTITIONS = sequence_executor::RAM_NUM_PARTITIONS; +const TEST_HB_SIZE_KB = sequence_executor::ZSTD_HISTORY_BUFFER_SIZE_KB; +const TEST_HB_RAM_SIZE = sequence_executor::ZSTD_RAM_SIZE; +const TEST_HB_RAM_WORD_PARTITION_SIZE = sequence_executor::RAM_WORD_PARTITION_SIZE; +const TEST_HB_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR = sequence_executor::TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR; +const TEST_HB_RAM_INITIALIZED = sequence_executor::TEST_RAM_INITIALIZED; +const TEST_HB_RAM_ASSERT_VALID_READ:bool = false; + +const TEST_RAM_DATA_W:u32 = TEST_AXI_DATA_W; +const TEST_RAM_SIZE:u32 = u32:16384; +const TEST_RAM_ADDR_W:u32 = std::clog2(TEST_RAM_SIZE); +const TEST_RAM_WORD_PARTITION_SIZE:u32 = u32:8; +const TEST_RAM_NUM_PARTITIONS:u32 = ram::num_partitions(TEST_RAM_WORD_PARTITION_SIZE, TEST_RAM_DATA_W); +const TEST_RAM_BASE_ADDR:u32 = u32:0; +const TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR = ram::SimultaneousReadWriteBehavior::READ_BEFORE_WRITE; +const TEST_RAM_INITIALIZED = true; + +const TEST_MOCK_OUTPUT_RAM_SIZE:u32 = TEST_RAM_SIZE / TEST_AXI_DATA_W_DIV8; + +fn csr_addr(c: zstd_dec::Csr) -> uN[TEST_AXI_ADDR_W] { + (c as uN[TEST_AXI_ADDR_W]) << 3 +} + +#[test_proc] +proc ZstdDecoderTest { + type CsrAxiAr = axi::AxiAr; + type CsrAxiR = axi::AxiR; + type CsrAxiAw = axi::AxiAw; + type CsrAxiW = axi::AxiW; + type CsrAxiB = axi::AxiB; + + type CsrRdReq = csr_config::CsrRdReq; + type CsrRdResp = csr_config::CsrRdResp; + type CsrWrReq = csr_config::CsrWrReq; + type CsrWrResp = csr_config::CsrWrResp; + type CsrChange = csr_config::CsrChange; + + type MemAxiAr = axi::AxiAr; + type MemAxiR = axi::AxiR; + type MemAxiAw = axi::AxiAw; + type MemAxiW = axi::AxiW; + type MemAxiB = axi::AxiB; + + type RamRdReqHB = ram::ReadReq; + type RamRdRespHB = ram::ReadResp; + type RamWrReqHB = ram::WriteReq; + type RamWrRespHB = ram::WriteResp; + + type RamRdReq = ram::ReadReq; + type RamRdResp = ram::ReadResp; + type RamWrReq = ram::WriteReq; + type RamWrResp = ram::WriteResp; + + type ZstdDecodedPacket = common::ZstdDecodedPacket; + terminator: chan out; + csr_axi_aw_s: chan out; + csr_axi_w_s: chan out; + csr_axi_b_r: chan in; + csr_axi_ar_s: chan out; + csr_axi_r_r: chan in; + fh_axi_ar_r: chan in; + fh_axi_r_s: chan out; + bh_axi_ar_r: chan in; + bh_axi_r_s: chan out; + raw_axi_ar_r: chan in; + raw_axi_r_s: chan out; + output_axi_aw_r: chan in; + output_axi_w_r: chan in; + output_axi_b_s: chan out; + + ram_rd_req_r: chan[8] in; + ram_rd_resp_s: chan[8] out; + ram_wr_req_r: chan[8] in; + ram_wr_resp_s: chan[8] out; + + ram_wr_req_fh_s: chan out; + ram_wr_req_bh_s: chan out; + ram_wr_req_raw_s: chan out; + raw_wr_resp_fh_r: chan in; + raw_wr_resp_bh_r: chan in; + raw_wr_resp_raw_r: chan in; + + notify_r: chan<()> in; + reset_r: chan<()> in; + + init {} + + config(terminator: chan out) { + + let (csr_axi_aw_s, csr_axi_aw_r) = chan("csr_axi_aw"); + let (csr_axi_w_s, csr_axi_w_r) = chan("csr_axi_w"); + let (csr_axi_b_s, csr_axi_b_r) = chan("csr_axi_b"); + let (csr_axi_ar_s, csr_axi_ar_r) = chan("csr_axi_ar"); + let (csr_axi_r_s, csr_axi_r_r) = chan("csr_axi_r"); + + let (fh_axi_ar_s, fh_axi_ar_r) = chan("fh_axi_ar"); + let (fh_axi_r_s, fh_axi_r_r) = chan("fh_axi_r"); + + let (bh_axi_ar_s, bh_axi_ar_r) = chan("bh_axi_ar"); + let (bh_axi_r_s, bh_axi_r_r) = chan("bh_axi_r"); + + let (raw_axi_ar_s, raw_axi_ar_r) = chan("raw_axi_ar"); + let (raw_axi_r_s, raw_axi_r_r) = chan("raw_axi_r"); + + let (output_axi_aw_s, output_axi_aw_r) = chan("output_axi_aw"); + let (output_axi_w_s, output_axi_w_r) = chan("output_axi_w"); + let (output_axi_b_s, output_axi_b_r) = chan("output_axi_b"); + + let (ram_rd_req_s, ram_rd_req_r) = chan[8]("ram_rd_req"); + let (ram_rd_resp_s, ram_rd_resp_r) = chan[8]("ram_rd_resp"); + let (ram_wr_req_s, ram_wr_req_r) = chan[8]("ram_wr_req"); + let (ram_wr_resp_s, ram_wr_resp_r) = chan[8]("ram_wr_resp"); + + let (ram_rd_req_fh_s, ram_rd_req_fh_r) = chan("ram_rd_req_fh"); + let (ram_rd_req_bh_s, ram_rd_req_bh_r) = chan("ram_rd_req_bh"); + let (ram_rd_req_raw_s, ram_rd_req_raw_r) = chan("ram_rd_req_raw"); + let (ram_rd_resp_fh_s, ram_rd_resp_fh_r) = chan("ram_rd_resp_fh"); + let (ram_rd_resp_bh_s, ram_rd_resp_bh_r) = chan("ram_rd_resp_bh"); + let (ram_rd_resp_raw_s, ram_rd_resp_raw_r) = chan("ram_rd_resp_raw"); + + let (ram_wr_req_fh_s, ram_wr_req_fh_r) = chan("ram_wr_req_fh"); + let (ram_wr_req_bh_s, ram_wr_req_bh_r) = chan("ram_wr_req_bh"); + let (ram_wr_req_raw_s, ram_wr_req_raw_r) = chan("ram_wr_req_raw"); + let (ram_wr_resp_fh_s, ram_wr_resp_fh_r) = chan("ram_wr_resp_fh"); + let (ram_wr_resp_bh_s, ram_wr_resp_bh_r) = chan("ram_wr_resp_bh"); + let (ram_wr_resp_raw_s, ram_wr_resp_raw_r) = chan("ram_wr_resp_raw"); + + let (notify_s, notify_r) = chan<()>("notify"); + let (reset_s, reset_r) = chan<()>("reset"); + + spawn zstd_dec::ZstdDecoder< + TEST_AXI_DATA_W, TEST_AXI_ADDR_W, TEST_AXI_ID_W, TEST_AXI_DEST_W, + TEST_REGS_N, TEST_WINDOW_LOG_MAX, + TEST_HB_ADDR_W, TEST_HB_DATA_W, TEST_HB_NUM_PARTITIONS, TEST_HB_SIZE_KB, + >( + csr_axi_aw_r, csr_axi_w_r, csr_axi_b_s, csr_axi_ar_r, csr_axi_r_s, + fh_axi_ar_s, fh_axi_r_r, + bh_axi_ar_s, bh_axi_r_r, + raw_axi_ar_s, raw_axi_r_r, + output_axi_aw_s, output_axi_w_s, output_axi_b_r, + ram_rd_req_s[0], ram_rd_req_s[1], ram_rd_req_s[2], ram_rd_req_s[3], + ram_rd_req_s[4], ram_rd_req_s[5], ram_rd_req_s[6], ram_rd_req_s[7], + ram_rd_resp_r[0], ram_rd_resp_r[1], ram_rd_resp_r[2], ram_rd_resp_r[3], + ram_rd_resp_r[4], ram_rd_resp_r[5], ram_rd_resp_r[6], ram_rd_resp_r[7], + ram_wr_req_s[0], ram_wr_req_s[1], ram_wr_req_s[2], ram_wr_req_s[3], + ram_wr_req_s[4], ram_wr_req_s[5], ram_wr_req_s[6], ram_wr_req_s[7], + ram_wr_resp_r[0], ram_wr_resp_r[1], ram_wr_resp_r[2], ram_wr_resp_r[3], + ram_wr_resp_r[4], ram_wr_resp_r[5], ram_wr_resp_r[6], ram_wr_resp_r[7], + notify_s, reset_s, + ); + + spawn ram::RamModel< + TEST_HB_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_WORD_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_HB_RAM_INITIALIZED, + TEST_HB_RAM_ASSERT_VALID_READ + > (ram_rd_req_r[0], ram_rd_resp_s[0], ram_wr_req_r[0], ram_wr_resp_s[0]); + + spawn ram::RamModel< + TEST_HB_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_WORD_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_HB_RAM_INITIALIZED, + TEST_HB_RAM_ASSERT_VALID_READ + > (ram_rd_req_r[1], ram_rd_resp_s[1], ram_wr_req_r[1], ram_wr_resp_s[1]); + + spawn ram::RamModel< + TEST_HB_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_WORD_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_HB_RAM_INITIALIZED, + TEST_HB_RAM_ASSERT_VALID_READ + > (ram_rd_req_r[2], ram_rd_resp_s[2], ram_wr_req_r[2], ram_wr_resp_s[2]); + + spawn ram::RamModel< + TEST_HB_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_WORD_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_HB_RAM_INITIALIZED, + TEST_HB_RAM_ASSERT_VALID_READ + > (ram_rd_req_r[3], ram_rd_resp_s[3], ram_wr_req_r[3], ram_wr_resp_s[3]); + + spawn ram::RamModel< + TEST_HB_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_WORD_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_HB_RAM_INITIALIZED, + TEST_HB_RAM_ASSERT_VALID_READ + > (ram_rd_req_r[4], ram_rd_resp_s[4], ram_wr_req_r[4], ram_wr_resp_s[4]); + + spawn ram::RamModel< + TEST_HB_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_WORD_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_HB_RAM_INITIALIZED, + TEST_HB_RAM_ASSERT_VALID_READ + > (ram_rd_req_r[5], ram_rd_resp_s[5], ram_wr_req_r[5], ram_wr_resp_s[5]); + + spawn ram::RamModel< + TEST_HB_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_WORD_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_HB_RAM_INITIALIZED, + TEST_HB_RAM_ASSERT_VALID_READ + > (ram_rd_req_r[6], ram_rd_resp_s[6], ram_wr_req_r[6], ram_wr_resp_s[6]); + + spawn ram::RamModel< + TEST_HB_DATA_W, TEST_HB_RAM_SIZE, TEST_HB_RAM_WORD_PARTITION_SIZE, + TEST_HB_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_HB_RAM_INITIALIZED, + TEST_HB_RAM_ASSERT_VALID_READ + > (ram_rd_req_r[7], ram_rd_resp_s[7], ram_wr_req_r[7], ram_wr_resp_s[7]); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, + > (ram_rd_req_fh_r, ram_rd_resp_fh_s, ram_wr_req_fh_r, ram_wr_resp_fh_s); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, + > (ram_rd_req_bh_r, ram_rd_resp_bh_s, ram_wr_req_bh_r, ram_wr_resp_bh_s); + + spawn ram::RamModel< + TEST_RAM_DATA_W, TEST_RAM_SIZE, TEST_RAM_WORD_PARTITION_SIZE, + TEST_RAM_SIMULTANEOUS_READ_WRITE_BEHAVIOR, TEST_RAM_INITIALIZED, + > (ram_rd_req_raw_r, ram_rd_resp_raw_s, ram_wr_req_raw_r, ram_wr_resp_raw_s); + + spawn axi_ram::AxiRamReader< + TEST_AXI_ADDR_W, TEST_AXI_DATA_W, TEST_AXI_DEST_W, TEST_AXI_ID_W, + TEST_RAM_SIZE, TEST_RAM_BASE_ADDR, TEST_RAM_DATA_W, TEST_RAM_ADDR_W, + >(fh_axi_ar_r, fh_axi_r_s, ram_rd_req_fh_s, ram_rd_resp_fh_r); + + spawn axi_ram::AxiRamReader< + TEST_AXI_ADDR_W, TEST_AXI_DATA_W, TEST_AXI_DEST_W, TEST_AXI_ID_W, + TEST_RAM_SIZE, TEST_RAM_BASE_ADDR, TEST_RAM_DATA_W, TEST_RAM_ADDR_W, + >(bh_axi_ar_r, bh_axi_r_s, ram_rd_req_bh_s, ram_rd_resp_bh_r); + + spawn axi_ram::AxiRamReader< + TEST_AXI_ADDR_W, TEST_AXI_DATA_W, TEST_AXI_DEST_W, TEST_AXI_ID_W, + TEST_RAM_SIZE, TEST_RAM_BASE_ADDR, TEST_RAM_DATA_W, TEST_RAM_ADDR_W, + >(raw_axi_ar_r, raw_axi_r_s, ram_rd_req_raw_s, ram_rd_resp_raw_r); + + ( + terminator, + csr_axi_aw_s, csr_axi_w_s, csr_axi_b_r, csr_axi_ar_s, csr_axi_r_r, + fh_axi_ar_r, fh_axi_r_s, + bh_axi_ar_r, bh_axi_r_s, + raw_axi_ar_r, raw_axi_r_s, + output_axi_aw_r, output_axi_w_r, output_axi_b_s, + ram_rd_req_r, ram_rd_resp_s, ram_wr_req_r, ram_wr_resp_s, + ram_wr_req_fh_s, ram_wr_req_bh_s, ram_wr_req_raw_s, + ram_wr_resp_fh_r, ram_wr_resp_bh_r, ram_wr_resp_raw_r, + notify_r, reset_r, + ) + } + + next (state: ()) { + trace_fmt!("Test start"); + let frames_count = array_size(zstd_frame_testcases::FRAMES); + + let tok = join(); + let tok = unroll_for! (test_i, tok): (u32, token) in range(u32:0, frames_count) { + trace_fmt!("Loading testcase {:x}", test_i + u32:1); + let frame = zstd_frame_testcases::FRAMES[test_i]; + let tok = for (i, tok): (u32, token) in range(u32:0, frame.array_length) { + let req = RamWrReq { + addr: i as uN[TEST_RAM_ADDR_W], + data: frame.data[i] as uN[TEST_RAM_DATA_W], + mask: uN[TEST_RAM_NUM_PARTITIONS]:0xFF + }; + let tok = send(tok, ram_wr_req_fh_s, req); + let tok = send(tok, ram_wr_req_bh_s, req); + let tok = send(tok, ram_wr_req_raw_s, req); + tok + }(tok); + + trace_fmt!("Running decoder on testcase {:x}", test_i + u32:1); + let addr_req = axi::AxiAw { + id: uN[TEST_AXI_ID_W]:0, + addr: uN[TEST_AXI_ADDR_W]:0, + size: axi::AxiAxSize::MAX_4B_TRANSFER, + len: u8:0, + burst: axi::AxiAxBurst::FIXED, + }; + let data_req = axi::AxiW { + data: uN[TEST_AXI_DATA_W]:0, + strb: uN[TEST_AXI_DATA_W_DIV8]:0xFF, + last: u1:1, + }; + + // reset the decoder + trace_fmt!("Sending reset"); + let tok = send(tok, csr_axi_aw_s, axi::AxiAw { + addr: csr_addr(zstd_dec::Csr::RESET), + ..addr_req + }); + let tok = send(tok, csr_axi_w_s, axi::AxiW { + data: uN[TEST_AXI_DATA_W]:0x1, + ..data_req + }); + trace_fmt!("Sent reset"); + let (tok, _) = recv(tok, csr_axi_b_r); + // Wait for reset notification before issuing further CSR writes + let (tok, _) = recv(tok, reset_r); + // configure input buffer address + let tok = send(tok, csr_axi_aw_s, axi::AxiAw { + addr: csr_addr(zstd_dec::Csr::INPUT_BUFFER), + ..addr_req + }); + let tok = send(tok, csr_axi_w_s, axi::AxiW { + data: uN[TEST_AXI_DATA_W]:0x0, + ..data_req + }); + let (tok, _) = recv(tok, csr_axi_b_r); + // configure output buffer address + let tok = send(tok, csr_axi_aw_s, axi::AxiAw { + addr: csr_addr(zstd_dec::Csr::OUTPUT_BUFFER), + ..addr_req + }); + let tok = send(tok, csr_axi_w_s, axi::AxiW { + data: uN[TEST_AXI_DATA_W]:0x1000, + ..data_req + }); + let (tok, _) = recv(tok, csr_axi_b_r); + // start decoder + let tok = send(tok, csr_axi_aw_s, axi::AxiAw { + addr: csr_addr(zstd_dec::Csr::START), + ..addr_req + }); + let tok = send(tok, csr_axi_w_s, axi::AxiW { + data: uN[TEST_AXI_DATA_W]:0x1, + ..data_req + }); + let (tok, _) = recv(tok, csr_axi_b_r); + + let decomp_frame = zstd_frame_testcases::DECOMPRESSED_FRAMES[test_i]; + // Test ZstdDecoder memory output interface + // Mock the output memory buffer as a DSLX array + // It is required to handle AXI write transactions and to write the incoming data to + // the DSXL array. + // The number of AXI transactions is not known beforehand because it depends on the + // length of the decoded data and the address of the output buffer. The same goes + // with the lengths of the particular AXI burst transactions (the number of transfers). + // Because of that we cannot write for loops to handle AXI transactions dynamically. + // As a workaround, the loops are constrained with upper bounds for AXI transactions + // required for writing maximal supported payload and maximal possible burst transfer + // size. + + // It is possible to decode payloads up to 16kB + // The smallest possible AXI transaction will transfer 1 byte of data + let MAX_AXI_TRANSACTIONS = u32:16384; + // The maximal number if beats in AXI burst transaction + let MAX_AXI_TRANSFERS = u32:256; + // Actual size of decompressed payload for current test + let DECOMPRESSED_BYTES = zstd_frame_testcases::DECOMPRESSED_FRAMES[test_i].length; + trace_fmt!("ZstdDecTest: Start receiving output"); + let (tok, final_output_memory, final_output_memory_id, final_transfered_bytes) = + for (axi_transaction, (tok, output_memory, output_memory_id, transfered_bytes)): + (u32, (token, uN[TEST_AXI_DATA_W][TEST_MOCK_OUTPUT_RAM_SIZE], u32, u32)) + in range(u32:0, MAX_AXI_TRANSACTIONS) { + if (transfered_bytes < DECOMPRESSED_BYTES) { + trace_fmt!("ZstdDecTest: Handle AXI Write transaction #{}", axi_transaction); + let (tok, axi_aw) = recv(tok, output_axi_aw_r); + trace_fmt!("ZstdDecTest: Received AXI AW: {:#x}", axi_aw); + let (tok, internal_output_memory, internal_output_memory_id, internal_transfered_bytes) = + for (axi_transfer, (tok, out_mem, out_mem_id, transf_bytes)): + (u32, (token, uN[TEST_AXI_DATA_W][TEST_MOCK_OUTPUT_RAM_SIZE], u32, u32)) + in range(u32:0, MAX_AXI_TRANSFERS) { + if (axi_transfer as u8 <= axi_aw.len) { + // Receive AXI burst beat transfers + let (tok, axi_w) = recv(tok, output_axi_w_r); + trace_fmt!("ZstdDecTest: Received AXI W #{}: {:#x}", axi_transfer, axi_w); + let strobe_cnt = std::popcount(axi_w.strb) as u32; + // Assume continuous strobe, e.g.: 0b1111; 0b0111; 0b0011; 0b0001; 0b0000 + let strobe_mask = (uN[TEST_AXI_DATA_W]:1 << (strobe_cnt * u32:8) as uN[TEST_AXI_DATA_W]) - uN[TEST_AXI_DATA_W]:1; + let strobed_data = axi_w.data & strobe_mask; + trace_fmt!("ZstdDecTest: write out_mem[{}] = {:#x}", out_mem_id, strobed_data); + let mem = update(out_mem, out_mem_id, (out_mem[out_mem_id] & !strobe_mask) | strobed_data); + let id = out_mem_id + u32:1; + let bytes_written = transf_bytes + strobe_cnt; + trace_fmt!("ZstdDecTest: bytes written: {}", bytes_written); + (tok, mem, id, bytes_written) + } else { + (tok, out_mem, out_mem_id, transf_bytes) + } + // Pass outer loop accumulator as initial accumulator for inner loop + }((tok, output_memory, output_memory_id, transfered_bytes)); + let axi_b = axi::AxiB{resp: axi::AxiWriteResp::OKAY, id: axi_aw.id}; + let tok = send(tok, output_axi_b_s, axi_b); + trace_fmt!("ZstdDecTest: Sent AXI B #{}: {:#x}", axi_transaction, axi_b); + (tok, internal_output_memory, internal_output_memory_id, internal_transfered_bytes) + } else { + (tok, output_memory, output_memory_id, transfered_bytes) + } + }((tok, uN[TEST_AXI_DATA_W][TEST_MOCK_OUTPUT_RAM_SIZE]:[uN[TEST_AXI_DATA_W]:0, ...], u32:0, u32:0)); + trace_fmt!("ZstdDecTest: Finished receiving output"); + + assert_eq(final_transfered_bytes, DECOMPRESSED_BYTES); + assert_eq(final_output_memory_id, decomp_frame.array_length); + for (memory_id, _): (u32, ()) in range(u32:0, decomp_frame.array_length) { + assert_eq(final_output_memory[memory_id], decomp_frame.data[memory_id]); + }(()); + + let (tok, ()) = recv(tok, notify_r); + trace_fmt!("Finished decoding testcase {:x} correctly", test_i + u32:1); + tok + }(tok); + + send(tok, terminator, true); + } +} + diff --git a/xls/modules/zstd/zstd_dec_wrapper.v b/xls/modules/zstd/zstd_dec_wrapper.v new file mode 100644 index 0000000000..45ff86d91a --- /dev/null +++ b/xls/modules/zstd/zstd_dec_wrapper.v @@ -0,0 +1,845 @@ +// Copyright 2024 The XLS Authors +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +`default_nettype none + +module zstd_dec_wrapper #( + parameter AXI_DATA_W = 64, + parameter AXI_ADDR_W = 16, + parameter S_AXI_ID_W = 4, + parameter M_AXI_ID_W = 6, + parameter AXI_STRB_W = 8, + parameter AWUSER_WIDTH = 1, + parameter WUSER_WIDTH = 1, + parameter BUSER_WIDTH = 1, + parameter ARUSER_WIDTH = 1, + parameter RUSER_WIDTH = 1 +) ( + input wire clk, + input wire rst, + + // AXI Master interface for the memory connection + output wire [M_AXI_ID_W-1:0] memory_axi_aw_awid, + output wire [AXI_ADDR_W-1:0] memory_axi_aw_awaddr, + output wire [7:0] memory_axi_aw_awlen, + output wire [2:0] memory_axi_aw_awsize, + output wire [1:0] memory_axi_aw_awburst, + output wire memory_axi_aw_awlock, + output wire [3:0] memory_axi_aw_awcache, + output wire [2:0] memory_axi_aw_awprot, + output wire [3:0] memory_axi_aw_awqos, + output wire [3:0] memory_axi_aw_awregion, + output wire [AWUSER_WIDTH-1:0] memory_axi_aw_awuser, + output wire memory_axi_aw_awvalid, + input wire memory_axi_aw_awready, + output wire [AXI_DATA_W-1:0] memory_axi_w_wdata, + output wire [AXI_STRB_W-1:0] memory_axi_w_wstrb, + output wire memory_axi_w_wlast, + output wire [WUSER_WIDTH-1:0] memory_axi_w_wuser, + output wire memory_axi_w_wvalid, + input wire memory_axi_w_wready, + input wire [M_AXI_ID_W-1:0] memory_axi_b_bid, + input wire [2:0] memory_axi_b_bresp, + input wire [BUSER_WIDTH-1:0] memory_axi_b_buser, + input wire memory_axi_b_bvalid, + output wire memory_axi_b_bready, + output wire [M_AXI_ID_W-1:0] memory_axi_ar_arid, + output wire [AXI_ADDR_W-1:0] memory_axi_ar_araddr, + output wire [7:0] memory_axi_ar_arlen, + output wire [2:0] memory_axi_ar_arsize, + output wire [1:0] memory_axi_ar_arburst, + output wire memory_axi_ar_arlock, + output wire [3:0] memory_axi_ar_arcache, + output wire [2:0] memory_axi_ar_arprot, + output wire [3:0] memory_axi_ar_arqos, + output wire [3:0] memory_axi_ar_arregion, + output wire [ARUSER_WIDTH-1:0] memory_axi_ar_aruser, + output wire memory_axi_ar_arvalid, + input wire memory_axi_ar_arready, + input wire [M_AXI_ID_W-1:0] memory_axi_r_rid, + input wire [AXI_DATA_W-1:0] memory_axi_r_rdata, + input wire [2:0] memory_axi_r_rresp, + input wire memory_axi_r_rlast, + input wire [RUSER_WIDTH-1:0] memory_axi_r_ruser, + input wire memory_axi_r_rvalid, + output wire memory_axi_r_rready, + + // AXI Slave interface for the CSR access + input wire [S_AXI_ID_W-1:0] csr_axi_aw_awid, + input wire [AXI_ADDR_W-1:0] csr_axi_aw_awaddr, + input wire [7:0] csr_axi_aw_awlen, + input wire [2:0] csr_axi_aw_awsize, + input wire [1:0] csr_axi_aw_awburst, + input wire csr_axi_aw_awlock, + input wire [3:0] csr_axi_aw_awcache, + input wire [2:0] csr_axi_aw_awprot, + input wire [3:0] csr_axi_aw_awqos, + input wire [3:0] csr_axi_aw_awregion, + input wire [AWUSER_WIDTH-1:0] csr_axi_aw_awuser, + input wire csr_axi_aw_awvalid, + output wire csr_axi_aw_awready, + input wire [AXI_DATA_W-1:0] csr_axi_w_wdata, + input wire [AXI_STRB_W-1:0] csr_axi_w_wstrb, + input wire csr_axi_w_wlast, + input wire [WUSER_WIDTH-1:0] csr_axi_w_wuser, + input wire csr_axi_w_wvalid, + output wire csr_axi_w_wready, + output wire [S_AXI_ID_W-1:0] csr_axi_b_bid, + output wire [2:0] csr_axi_b_bresp, + output wire [BUSER_WIDTH-1:0] csr_axi_b_buser, + output wire csr_axi_b_bvalid, + input wire csr_axi_b_bready, + input wire [S_AXI_ID_W-1:0] csr_axi_ar_arid, + input wire [AXI_ADDR_W-1:0] csr_axi_ar_araddr, + input wire [7:0] csr_axi_ar_arlen, + input wire [2:0] csr_axi_ar_arsize, + input wire [1:0] csr_axi_ar_arburst, + input wire csr_axi_ar_arlock, + input wire [3:0] csr_axi_ar_arcache, + input wire [2:0] csr_axi_ar_arprot, + input wire [3:0] csr_axi_ar_arqos, + input wire [3:0] csr_axi_ar_arregion, + input wire [ARUSER_WIDTH-1:0] csr_axi_ar_aruser, + input wire csr_axi_ar_arvalid, + output wire csr_axi_ar_arready, + output wire [S_AXI_ID_W-1:0] csr_axi_r_rid, + output wire [AXI_DATA_W-1:0] csr_axi_r_rdata, + output wire [2:0] csr_axi_r_rresp, + output wire csr_axi_r_rlast, + output wire [RUSER_WIDTH-1:0] csr_axi_r_ruser, + output wire csr_axi_r_rvalid, + input wire csr_axi_r_rready, + + output wire notify_data, + output wire notify_vld, + input wire notify_rdy +); + + /* + * Reset loopback + */ + wire reset_vld; + wire reset_rdy; + // Required for monitoring simple XLS channel in cocotb + wire reset_data; + // OR-ed generic reset and loopback reset in response to write to RESET CSR + wire reset; + + /* + * MemReader AXI interfaces + */ + // RawBlockDecoder + wire raw_block_decoder_axi_ar_arvalid; + wire raw_block_decoder_axi_ar_arready; + wire [S_AXI_ID_W-1:0] raw_block_decoder_axi_ar_arid; + wire [AXI_ADDR_W-1:0] raw_block_decoder_axi_ar_araddr; + wire [ 3:0] raw_block_decoder_axi_ar_arregion; + wire [ 7:0] raw_block_decoder_axi_ar_arlen; + wire [ 2:0] raw_block_decoder_axi_ar_arsize; + wire [ 1:0] raw_block_decoder_axi_ar_arburst; + wire [ 3:0] raw_block_decoder_axi_ar_arcache; + wire [ 2:0] raw_block_decoder_axi_ar_arprot; + wire [ 3:0] raw_block_decoder_axi_ar_arqos; + + wire raw_block_decoder_axi_r_rvalid; + wire raw_block_decoder_axi_r_rready; + wire [S_AXI_ID_W-1:0] raw_block_decoder_axi_r_rid; + wire [AXI_DATA_W-1:0] raw_block_decoder_axi_r_rdata; + wire [ 2:0] raw_block_decoder_axi_r_rresp; + wire raw_block_decoder_axi_r_rlast; + + + // BlockHeaderDecoder + wire block_header_decoder_axi_ar_arvalid; + wire block_header_decoder_axi_ar_arready; + wire [S_AXI_ID_W-1:0] block_header_decoder_axi_ar_arid; + wire [AXI_ADDR_W-1:0] block_header_decoder_axi_ar_araddr; + wire [ 3:0] block_header_decoder_axi_ar_arregion; + wire [ 7:0] block_header_decoder_axi_ar_arlen; + wire [ 2:0] block_header_decoder_axi_ar_arsize; + wire [ 1:0] block_header_decoder_axi_ar_arburst; + wire [ 3:0] block_header_decoder_axi_ar_arcache; + wire [ 2:0] block_header_decoder_axi_ar_arprot; + wire [ 3:0] block_header_decoder_axi_ar_arqos; + + wire block_header_decoder_axi_r_rvalid; + wire block_header_decoder_axi_r_rready; + wire [S_AXI_ID_W-1:0] block_header_decoder_axi_r_rid; + wire [AXI_DATA_W-1:0] block_header_decoder_axi_r_rdata; + wire [ 2:0] block_header_decoder_axi_r_rresp; + wire block_header_decoder_axi_r_rlast; + + + // FrameHeaderDecoder + wire frame_header_decoder_axi_ar_arvalid; + wire frame_header_decoder_axi_ar_arready; + wire [S_AXI_ID_W-1:0] frame_header_decoder_axi_ar_arid; + wire [AXI_ADDR_W-1:0] frame_header_decoder_axi_ar_araddr; + wire [ 3:0] frame_header_decoder_axi_ar_arregion; + wire [ 7:0] frame_header_decoder_axi_ar_arlen; + wire [ 2:0] frame_header_decoder_axi_ar_arsize; + wire [ 1:0] frame_header_decoder_axi_ar_arburst; + wire [ 3:0] frame_header_decoder_axi_ar_arcache; + wire [ 2:0] frame_header_decoder_axi_ar_arprot; + wire [ 3:0] frame_header_decoder_axi_ar_arqos; + + wire frame_header_decoder_axi_r_rvalid; + wire frame_header_decoder_axi_r_rready; + wire [S_AXI_ID_W-1:0] frame_header_decoder_axi_r_rid; + wire [AXI_DATA_W-1:0] frame_header_decoder_axi_r_rdata; + wire [ 2:0] frame_header_decoder_axi_r_rresp; + wire frame_header_decoder_axi_r_rlast; + + + /* + * MemWriter AXI interfaces + */ + + // Output Writer + wire [S_AXI_ID_W-1:0] output_axi_aw_awid; + wire [AXI_ADDR_W-1:0] output_axi_aw_awaddr; + wire [ 2:0] output_axi_aw_awsize; + wire [ 7:0] output_axi_aw_awlen; + wire [ 1:0] output_axi_aw_awburst; + wire output_axi_aw_awvalid; + wire output_axi_aw_awready; + + wire [AXI_DATA_W-1:0] output_axi_w_wdata; + wire [AXI_STRB_W-1:0] output_axi_w_wstrb; + wire output_axi_w_wlast; + wire output_axi_w_wvalid; + wire output_axi_w_wready; + + wire [S_AXI_ID_W-1:0] output_axi_b_bid; + wire [ 2:0] output_axi_b_bresp; + wire output_axi_b_bvalid; + wire output_axi_b_bready; + + /* + * XLS Channels representing AXI interfaces + */ + + localparam XLS_AXI_AW_W = AXI_ADDR_W + S_AXI_ID_W + 3 + 2 + 8; + localparam XLS_AXI_W_W = AXI_DATA_W + AXI_STRB_W + 1; + localparam XLS_AXI_B_W = 3 + S_AXI_ID_W; + localparam XLS_AXI_AR_W = S_AXI_ID_W + AXI_ADDR_W + 4 + 8 + 3 + 2 + 4 + 3 + 4; + localparam XLS_AXI_R_W = S_AXI_ID_W + AXI_DATA_W + 3 + 1; + // CSR + wire [XLS_AXI_AW_W-1:0] zstd_dec__csr_axi_aw; + wire zstd_dec__csr_axi_aw_rdy; + wire zstd_dec__csr_axi_aw_vld; + wire [XLS_AXI_W_W-1:0] zstd_dec__csr_axi_w; + wire zstd_dec__csr_axi_w_rdy; + wire zstd_dec__csr_axi_w_vld; + wire [ XLS_AXI_B_W-1:0] zstd_dec__csr_axi_b; + wire zstd_dec__csr_axi_b_rdy; + wire zstd_dec__csr_axi_b_vld; + wire [XLS_AXI_AR_W-1:0] zstd_dec__csr_axi_ar; + wire zstd_dec__csr_axi_ar_rdy; + wire zstd_dec__csr_axi_ar_vld; + wire [ XLS_AXI_R_W-1:0] zstd_dec__csr_axi_r; + wire zstd_dec__csr_axi_r_rdy; + wire zstd_dec__csr_axi_r_vld; + + // Frame Header Decoder + wire [XLS_AXI_AR_W-1:0] zstd_dec__fh_axi_ar; + wire zstd_dec__fh_axi_ar_rdy; + wire zstd_dec__fh_axi_ar_vld; + wire [ XLS_AXI_R_W-1:0] zstd_dec__fh_axi_r; + wire zstd_dec__fh_axi_r_rdy; + wire zstd_dec__fh_axi_r_vld; + + // Block Header Decoder + wire [XLS_AXI_AR_W-1:0] zstd_dec__bh_axi_ar; + wire zstd_dec__bh_axi_ar_rdy; + wire zstd_dec__bh_axi_ar_vld; + wire [ XLS_AXI_R_W-1:0] zstd_dec__bh_axi_r; + wire zstd_dec__bh_axi_r_rdy; + wire zstd_dec__bh_axi_r_vld; + + // Raw Block Decoder + wire [XLS_AXI_AR_W-1:0] zstd_dec__raw_axi_ar; + wire zstd_dec__raw_axi_ar_rdy; + wire zstd_dec__raw_axi_ar_vld; + wire [ XLS_AXI_R_W-1:0] zstd_dec__raw_axi_r; + wire zstd_dec__raw_axi_r_rdy; + wire zstd_dec__raw_axi_r_vld; + + // Output Memory Interface + wire [XLS_AXI_AW_W-1:0] zstd_dec__output_axi_aw; + wire zstd_dec__output_axi_aw_rdy; + wire zstd_dec__output_axi_aw_vld; + wire [XLS_AXI_W_W-1:0] zstd_dec__output_axi_w; + wire zstd_dec__output_axi_w_rdy; + wire zstd_dec__output_axi_w_vld; + wire [XLS_AXI_B_W-1:0] zstd_dec__output_axi_b; + wire zstd_dec__output_axi_b_rdy; + wire zstd_dec__output_axi_b_vld; + + /* + * Mapping XLS Channels to AXI channels fields + */ + + // CSR + assign zstd_dec__csr_axi_aw = { + csr_axi_aw_awid, + csr_axi_aw_awaddr, + csr_axi_aw_awsize, + csr_axi_aw_awlen, + csr_axi_aw_awburst + }; + assign zstd_dec__csr_axi_aw_vld = csr_axi_aw_awvalid; + assign csr_axi_aw_awready = zstd_dec__csr_axi_aw_rdy; + assign zstd_dec__csr_axi_w = { + csr_axi_w_wdata, + csr_axi_w_wstrb, + csr_axi_w_wlast + }; + assign zstd_dec__csr_axi_w_vld = csr_axi_w_wvalid; + assign csr_axi_w_wready = zstd_dec__csr_axi_w_rdy; + assign { + csr_axi_b_bresp, + csr_axi_b_bid + } = zstd_dec__csr_axi_b; + assign csr_axi_b_bvalid = zstd_dec__csr_axi_b_vld; + assign zstd_dec__csr_axi_b_rdy = csr_axi_b_bready; + assign zstd_dec__csr_axi_ar = { + csr_axi_ar_arid, + csr_axi_ar_araddr, + csr_axi_ar_arregion, + csr_axi_ar_arlen, + csr_axi_ar_arsize, + csr_axi_ar_arburst, + csr_axi_ar_arcache, + csr_axi_ar_arprot, + csr_axi_ar_arqos + }; + assign zstd_dec__csr_axi_ar_vld = csr_axi_ar_arvalid; + assign csr_axi_ar_arready = zstd_dec__csr_axi_ar_rdy; + assign { + csr_axi_r_rid, + csr_axi_r_rdata, + csr_axi_r_rresp, + csr_axi_r_rlast + } = zstd_dec__csr_axi_r; + assign csr_axi_r_rvalid = zstd_dec__csr_axi_r_vld; + assign zstd_dec__csr_axi_r_rdy = csr_axi_r_rready; + + // Frame Header Decoder + assign { + frame_header_decoder_axi_ar_arid, + frame_header_decoder_axi_ar_araddr, + frame_header_decoder_axi_ar_arregion, + frame_header_decoder_axi_ar_arlen, + frame_header_decoder_axi_ar_arsize, + frame_header_decoder_axi_ar_arburst, + frame_header_decoder_axi_ar_arcache, + frame_header_decoder_axi_ar_arprot, + frame_header_decoder_axi_ar_arqos + } = zstd_dec__fh_axi_ar; + assign frame_header_decoder_axi_ar_arvalid = zstd_dec__fh_axi_ar_vld; + assign zstd_dec__fh_axi_ar_rdy = frame_header_decoder_axi_ar_arready; + assign zstd_dec__fh_axi_r = { + frame_header_decoder_axi_r_rid, + frame_header_decoder_axi_r_rdata, + frame_header_decoder_axi_r_rresp, + frame_header_decoder_axi_r_rlast}; + assign zstd_dec__fh_axi_r_vld = frame_header_decoder_axi_r_rvalid; + assign frame_header_decoder_axi_r_rready = zstd_dec__fh_axi_r_rdy; + + // Block Header Decoder + assign { + block_header_decoder_axi_ar_arid, + block_header_decoder_axi_ar_araddr, + block_header_decoder_axi_ar_arregion, + block_header_decoder_axi_ar_arlen, + block_header_decoder_axi_ar_arsize, + block_header_decoder_axi_ar_arburst, + block_header_decoder_axi_ar_arcache, + block_header_decoder_axi_ar_arprot, + block_header_decoder_axi_ar_arqos + } = zstd_dec__bh_axi_ar; + assign block_header_decoder_axi_ar_arvalid = zstd_dec__bh_axi_ar_vld; + assign zstd_dec__bh_axi_ar_rdy = block_header_decoder_axi_ar_arready; + assign zstd_dec__bh_axi_r = { + block_header_decoder_axi_r_rid, + block_header_decoder_axi_r_rdata, + block_header_decoder_axi_r_rresp, + block_header_decoder_axi_r_rlast}; + assign zstd_dec__bh_axi_r_vld = block_header_decoder_axi_r_rvalid; + assign block_header_decoder_axi_r_rready = zstd_dec__bh_axi_r_rdy; + + // Raw Block Decoder + assign { + raw_block_decoder_axi_ar_arid, + raw_block_decoder_axi_ar_araddr, + raw_block_decoder_axi_ar_arregion, + raw_block_decoder_axi_ar_arlen, + raw_block_decoder_axi_ar_arsize, + raw_block_decoder_axi_ar_arburst, + raw_block_decoder_axi_ar_arcache, + raw_block_decoder_axi_ar_arprot, + raw_block_decoder_axi_ar_arqos + } = zstd_dec__raw_axi_ar; + assign raw_block_decoder_axi_ar_arvalid = zstd_dec__raw_axi_ar_vld; + assign zstd_dec__raw_axi_ar_rdy = raw_block_decoder_axi_ar_arready; + assign zstd_dec__raw_axi_r = { + raw_block_decoder_axi_r_rid, + raw_block_decoder_axi_r_rdata, + raw_block_decoder_axi_r_rresp, + raw_block_decoder_axi_r_rlast}; + assign zstd_dec__raw_axi_r_vld = raw_block_decoder_axi_r_rvalid; + assign raw_block_decoder_axi_r_rready = zstd_dec__raw_axi_r_rdy; + + // Output Writer + assign { + output_axi_aw_awid, + output_axi_aw_awaddr, + output_axi_aw_awsize, + output_axi_aw_awlen, + output_axi_aw_awburst + } = zstd_dec__output_axi_aw; + assign output_axi_aw_awvalid = zstd_dec__output_axi_aw_vld; + assign zstd_dec__output_axi_aw_rdy = output_axi_aw_awready; + assign { + output_axi_w_wdata, + output_axi_w_wstrb, + output_axi_w_wlast + } = zstd_dec__output_axi_w; + assign output_axi_w_wvalid = zstd_dec__output_axi_w_vld; + assign zstd_dec__output_axi_w_rdy = output_axi_w_wready; + assign zstd_dec__output_axi_b = { + output_axi_b_bresp, + output_axi_b_bid + }; + assign zstd_dec__output_axi_b_vld = output_axi_b_bvalid; + assign output_axi_b_bready = zstd_dec__output_axi_b_rdy; + + assign csr_axi_b_buser = 1'b0; + assign csr_axi_r_ruser = 1'b0; + assign notify_data = notify_vld; + assign reset_data = reset_vld; + assign reset = reset_vld | rst; + + /* + * ZSTD Decoder instance + */ + ZstdDecoder ZstdDecoder ( + .clk(clk), + .rst(reset), + + // CSR Interface + .zstd_dec__csr_axi_aw_r(zstd_dec__csr_axi_aw), + .zstd_dec__csr_axi_aw_r_vld(zstd_dec__csr_axi_aw_vld), + .zstd_dec__csr_axi_aw_r_rdy(zstd_dec__csr_axi_aw_rdy), + .zstd_dec__csr_axi_w_r(zstd_dec__csr_axi_w), + .zstd_dec__csr_axi_w_r_vld(zstd_dec__csr_axi_w_vld), + .zstd_dec__csr_axi_w_r_rdy(zstd_dec__csr_axi_w_rdy), + .zstd_dec__csr_axi_b_s(zstd_dec__csr_axi_b), + .zstd_dec__csr_axi_b_s_vld(zstd_dec__csr_axi_b_vld), + .zstd_dec__csr_axi_b_s_rdy(zstd_dec__csr_axi_b_rdy), + .zstd_dec__csr_axi_ar_r(zstd_dec__csr_axi_ar), + .zstd_dec__csr_axi_ar_r_vld(zstd_dec__csr_axi_ar_vld), + .zstd_dec__csr_axi_ar_r_rdy(zstd_dec__csr_axi_ar_rdy), + .zstd_dec__csr_axi_r_s(zstd_dec__csr_axi_r), + .zstd_dec__csr_axi_r_s_vld(zstd_dec__csr_axi_r_vld), + .zstd_dec__csr_axi_r_s_rdy(zstd_dec__csr_axi_r_rdy), + + // FrameHeaderDecoder + .zstd_dec__fh_axi_ar_s(zstd_dec__fh_axi_ar), + .zstd_dec__fh_axi_ar_s_vld(zstd_dec__fh_axi_ar_vld), + .zstd_dec__fh_axi_ar_s_rdy(zstd_dec__fh_axi_ar_rdy), + .zstd_dec__fh_axi_r_r(zstd_dec__fh_axi_r), + .zstd_dec__fh_axi_r_r_vld(zstd_dec__fh_axi_r_vld), + .zstd_dec__fh_axi_r_r_rdy(zstd_dec__fh_axi_r_rdy), + + // BlockHeaderDecoder + .zstd_dec__bh_axi_ar_s(zstd_dec__bh_axi_ar), + .zstd_dec__bh_axi_ar_s_vld(zstd_dec__bh_axi_ar_vld), + .zstd_dec__bh_axi_ar_s_rdy(zstd_dec__bh_axi_ar_rdy), + .zstd_dec__bh_axi_r_r(zstd_dec__bh_axi_r), + .zstd_dec__bh_axi_r_r_vld(zstd_dec__bh_axi_r_vld), + .zstd_dec__bh_axi_r_r_rdy(zstd_dec__bh_axi_r_rdy), + + // RawBlockDecoder + .zstd_dec__raw_axi_ar_s(zstd_dec__raw_axi_ar), + .zstd_dec__raw_axi_ar_s_vld(zstd_dec__raw_axi_ar_vld), + .zstd_dec__raw_axi_ar_s_rdy(zstd_dec__raw_axi_ar_rdy), + .zstd_dec__raw_axi_r_r(zstd_dec__raw_axi_r), + .zstd_dec__raw_axi_r_r_vld(zstd_dec__raw_axi_r_vld), + .zstd_dec__raw_axi_r_r_rdy(zstd_dec__raw_axi_r_rdy), + + // Output Writer + .zstd_dec__output_axi_aw_s(zstd_dec__output_axi_aw), + .zstd_dec__output_axi_aw_s_vld(zstd_dec__output_axi_aw_vld), + .zstd_dec__output_axi_aw_s_rdy(zstd_dec__output_axi_aw_rdy), + .zstd_dec__output_axi_w_s(zstd_dec__output_axi_w), + .zstd_dec__output_axi_w_s_vld(zstd_dec__output_axi_w_vld), + .zstd_dec__output_axi_w_s_rdy(zstd_dec__output_axi_w_rdy), + .zstd_dec__output_axi_b_r(zstd_dec__output_axi_b), + .zstd_dec__output_axi_b_r_vld(zstd_dec__output_axi_b_vld), + .zstd_dec__output_axi_b_r_rdy(zstd_dec__output_axi_b_rdy), + + // Other ports + .zstd_dec__notify_s_vld(notify_vld), + .zstd_dec__notify_s_rdy(notify_rdy), + // Reset loopback - response for write to RESET CSR + // Should be looped back to generic reset input + .zstd_dec__reset_s_vld(reset_vld), + .zstd_dec__reset_s_rdy(reset_rdy), + + .zstd_dec__ram_rd_req_0_s(), + .zstd_dec__ram_rd_req_1_s(), + .zstd_dec__ram_rd_req_2_s(), + .zstd_dec__ram_rd_req_3_s(), + .zstd_dec__ram_rd_req_4_s(), + .zstd_dec__ram_rd_req_5_s(), + .zstd_dec__ram_rd_req_6_s(), + .zstd_dec__ram_rd_req_7_s(), + .zstd_dec__ram_rd_req_0_s_vld(), + .zstd_dec__ram_rd_req_1_s_vld(), + .zstd_dec__ram_rd_req_2_s_vld(), + .zstd_dec__ram_rd_req_3_s_vld(), + .zstd_dec__ram_rd_req_4_s_vld(), + .zstd_dec__ram_rd_req_5_s_vld(), + .zstd_dec__ram_rd_req_6_s_vld(), + .zstd_dec__ram_rd_req_7_s_vld(), + .zstd_dec__ram_rd_req_0_s_rdy('1), + .zstd_dec__ram_rd_req_1_s_rdy('1), + .zstd_dec__ram_rd_req_2_s_rdy('1), + .zstd_dec__ram_rd_req_3_s_rdy('1), + .zstd_dec__ram_rd_req_4_s_rdy('1), + .zstd_dec__ram_rd_req_5_s_rdy('1), + .zstd_dec__ram_rd_req_6_s_rdy('1), + .zstd_dec__ram_rd_req_7_s_rdy('1), + + .zstd_dec__ram_rd_resp_0_r('0), + .zstd_dec__ram_rd_resp_1_r('0), + .zstd_dec__ram_rd_resp_2_r('0), + .zstd_dec__ram_rd_resp_3_r('0), + .zstd_dec__ram_rd_resp_4_r('0), + .zstd_dec__ram_rd_resp_5_r('0), + .zstd_dec__ram_rd_resp_6_r('0), + .zstd_dec__ram_rd_resp_7_r('0), + .zstd_dec__ram_rd_resp_0_r_vld('1), + .zstd_dec__ram_rd_resp_1_r_vld('1), + .zstd_dec__ram_rd_resp_2_r_vld('1), + .zstd_dec__ram_rd_resp_3_r_vld('1), + .zstd_dec__ram_rd_resp_4_r_vld('1), + .zstd_dec__ram_rd_resp_5_r_vld('1), + .zstd_dec__ram_rd_resp_6_r_vld('1), + .zstd_dec__ram_rd_resp_7_r_vld('1), + .zstd_dec__ram_rd_resp_0_r_rdy(), + .zstd_dec__ram_rd_resp_1_r_rdy(), + .zstd_dec__ram_rd_resp_2_r_rdy(), + .zstd_dec__ram_rd_resp_3_r_rdy(), + .zstd_dec__ram_rd_resp_4_r_rdy(), + .zstd_dec__ram_rd_resp_5_r_rdy(), + .zstd_dec__ram_rd_resp_6_r_rdy(), + .zstd_dec__ram_rd_resp_7_r_rdy(), + + .zstd_dec__ram_wr_req_0_s(), + .zstd_dec__ram_wr_req_1_s(), + .zstd_dec__ram_wr_req_2_s(), + .zstd_dec__ram_wr_req_3_s(), + .zstd_dec__ram_wr_req_4_s(), + .zstd_dec__ram_wr_req_5_s(), + .zstd_dec__ram_wr_req_6_s(), + .zstd_dec__ram_wr_req_7_s(), + .zstd_dec__ram_wr_req_0_s_vld(), + .zstd_dec__ram_wr_req_1_s_vld(), + .zstd_dec__ram_wr_req_2_s_vld(), + .zstd_dec__ram_wr_req_3_s_vld(), + .zstd_dec__ram_wr_req_4_s_vld(), + .zstd_dec__ram_wr_req_5_s_vld(), + .zstd_dec__ram_wr_req_6_s_vld(), + .zstd_dec__ram_wr_req_7_s_vld(), + .zstd_dec__ram_wr_req_0_s_rdy('1), + .zstd_dec__ram_wr_req_1_s_rdy('1), + .zstd_dec__ram_wr_req_2_s_rdy('1), + .zstd_dec__ram_wr_req_3_s_rdy('1), + .zstd_dec__ram_wr_req_4_s_rdy('1), + .zstd_dec__ram_wr_req_5_s_rdy('1), + .zstd_dec__ram_wr_req_6_s_rdy('1), + .zstd_dec__ram_wr_req_7_s_rdy('1), + + .zstd_dec__ram_wr_resp_0_r_vld('1), + .zstd_dec__ram_wr_resp_1_r_vld('1), + .zstd_dec__ram_wr_resp_2_r_vld('1), + .zstd_dec__ram_wr_resp_3_r_vld('1), + .zstd_dec__ram_wr_resp_4_r_vld('1), + .zstd_dec__ram_wr_resp_5_r_vld('1), + .zstd_dec__ram_wr_resp_6_r_vld('1), + .zstd_dec__ram_wr_resp_7_r_vld('1), + .zstd_dec__ram_wr_resp_0_r_rdy(), + .zstd_dec__ram_wr_resp_1_r_rdy(), + .zstd_dec__ram_wr_resp_2_r_rdy(), + .zstd_dec__ram_wr_resp_3_r_rdy(), + .zstd_dec__ram_wr_resp_4_r_rdy(), + .zstd_dec__ram_wr_resp_5_r_rdy(), + .zstd_dec__ram_wr_resp_6_r_rdy(), + .zstd_dec__ram_wr_resp_7_r_rdy() + ); + + assign frame_header_decoder_axi_r_rresp[2] = '0; + assign block_header_decoder_axi_r_rresp[2] = '0; + assign raw_block_decoder_axi_r_rresp[2] = '0; + assign output_axi_b_bresp[2] = '0; + assign memory_axi_b_bresp[2] = '0; + assign memory_axi_r_rresp[2] = '0; + /* + * AXI Interconnect + */ + axi_crossbar_wrapper #( + .DATA_WIDTH(AXI_DATA_W), + .ADDR_WIDTH(AXI_ADDR_W), + .M00_ADDR_WIDTH(AXI_ADDR_W), + .M00_BASE_ADDR(32'd0), + .STRB_WIDTH(AXI_STRB_W), + .S_ID_WIDTH(S_AXI_ID_W), + .M_ID_WIDTH(M_AXI_ID_W) + ) axi_memory_interconnect ( + .clk(clk), + .rst(rst), + + /* + * AXI slave interfaces + */ + // FrameHeaderDecoder + .s00_axi_awid('0), + .s00_axi_awaddr('0), + .s00_axi_awlen('0), + .s00_axi_awsize('0), + .s00_axi_awburst('0), + .s00_axi_awlock('0), + .s00_axi_awcache('0), + .s00_axi_awprot('0), + .s00_axi_awqos('0), + .s00_axi_awuser('0), + .s00_axi_awvalid('0), + .s00_axi_awready(), + .s00_axi_wdata('0), + .s00_axi_wstrb('0), + .s00_axi_wlast('0), + .s00_axi_wuser('0), + .s00_axi_wvalid(), + .s00_axi_wready(), + .s00_axi_bid(), + .s00_axi_bresp(), + .s00_axi_buser(), + .s00_axi_bvalid(), + .s00_axi_bready('0), + .s00_axi_arid(frame_header_decoder_axi_ar_arid), + .s00_axi_araddr(frame_header_decoder_axi_ar_araddr), + .s00_axi_arlen(frame_header_decoder_axi_ar_arlen), + .s00_axi_arsize(frame_header_decoder_axi_ar_arsize), + .s00_axi_arburst(frame_header_decoder_axi_ar_arburst), + .s00_axi_arlock('0), + .s00_axi_arcache(frame_header_decoder_axi_ar_arcache), + .s00_axi_arprot(frame_header_decoder_axi_ar_arprot), + .s00_axi_arqos(frame_header_decoder_axi_ar_arqos), + .s00_axi_aruser('0), + .s00_axi_arvalid(frame_header_decoder_axi_ar_arvalid), + .s00_axi_arready(frame_header_decoder_axi_ar_arready), + .s00_axi_rid(frame_header_decoder_axi_r_rid), + .s00_axi_rdata(frame_header_decoder_axi_r_rdata), + .s00_axi_rresp(frame_header_decoder_axi_r_rresp[1:0]), + .s00_axi_rlast(frame_header_decoder_axi_r_rlast), + .s00_axi_ruser(), + .s00_axi_rvalid(frame_header_decoder_axi_r_rvalid), + .s00_axi_rready(frame_header_decoder_axi_r_rready), + + // BlockHeaderDecoder + .s01_axi_awid('0), + .s01_axi_awaddr('0), + .s01_axi_awlen('0), + .s01_axi_awsize('0), + .s01_axi_awburst('0), + .s01_axi_awlock('0), + .s01_axi_awcache('0), + .s01_axi_awprot('0), + .s01_axi_awqos('0), + .s01_axi_awuser('0), + .s01_axi_awvalid('0), + .s01_axi_awready(), + .s01_axi_wdata('0), + .s01_axi_wstrb('0), + .s01_axi_wlast('0), + .s01_axi_wuser('0), + .s01_axi_wvalid(), + .s01_axi_wready(), + .s01_axi_bid(), + .s01_axi_bresp(), + .s01_axi_buser(), + .s01_axi_bvalid(), + .s01_axi_bready('0), + .s01_axi_arid(block_header_decoder_axi_ar_arid), + .s01_axi_araddr(block_header_decoder_axi_ar_araddr), + .s01_axi_arlen(block_header_decoder_axi_ar_arlen), + .s01_axi_arsize(block_header_decoder_axi_ar_arsize), + .s01_axi_arburst(block_header_decoder_axi_ar_arburst), + .s01_axi_arlock('0), + .s01_axi_arcache(block_header_decoder_axi_ar_arcache), + .s01_axi_arprot(block_header_decoder_axi_ar_arprot), + .s01_axi_arqos(block_header_decoder_axi_ar_arqos), + .s01_axi_aruser('0), + .s01_axi_arvalid(block_header_decoder_axi_ar_arvalid), + .s01_axi_arready(block_header_decoder_axi_ar_arready), + .s01_axi_rid(block_header_decoder_axi_r_rid), + .s01_axi_rdata(block_header_decoder_axi_r_rdata), + .s01_axi_rresp(block_header_decoder_axi_r_rresp[1:0]), + .s01_axi_rlast(block_header_decoder_axi_r_rlast), + .s01_axi_ruser(), + .s01_axi_rvalid(block_header_decoder_axi_r_rvalid), + .s01_axi_rready(block_header_decoder_axi_r_rready), + + // RawBlockDecoder + .s02_axi_awid('0), + .s02_axi_awaddr('0), + .s02_axi_awlen('0), + .s02_axi_awsize('0), + .s02_axi_awburst('0), + .s02_axi_awlock('0), + .s02_axi_awcache('0), + .s02_axi_awprot('0), + .s02_axi_awqos('0), + .s02_axi_awuser('0), + .s02_axi_awvalid('0), + .s02_axi_awready(), + .s02_axi_wdata('0), + .s02_axi_wstrb('0), + .s02_axi_wlast('0), + .s02_axi_wuser('0), + .s02_axi_wvalid(), + .s02_axi_wready(), + .s02_axi_bid(), + .s02_axi_bresp(), + .s02_axi_buser(), + .s02_axi_bvalid(), + .s02_axi_bready('0), + .s02_axi_arid(raw_block_decoder_axi_ar_arid), + .s02_axi_araddr(raw_block_decoder_axi_ar_araddr), + .s02_axi_arlen(raw_block_decoder_axi_ar_arlen), + .s02_axi_arsize(raw_block_decoder_axi_ar_arsize), + .s02_axi_arburst(raw_block_decoder_axi_ar_arburst), + .s02_axi_arlock('0), + .s02_axi_arcache(raw_block_decoder_axi_ar_arcache), + .s02_axi_arprot(raw_block_decoder_axi_ar_arprot), + .s02_axi_arqos(raw_block_decoder_axi_ar_arqos), + .s02_axi_aruser('0), + .s02_axi_arvalid(raw_block_decoder_axi_ar_arvalid), + .s02_axi_arready(raw_block_decoder_axi_ar_arready), + .s02_axi_rid(raw_block_decoder_axi_r_rid), + .s02_axi_rdata(raw_block_decoder_axi_r_rdata), + .s02_axi_rresp(raw_block_decoder_axi_r_rresp[1:0]), + .s02_axi_rlast(raw_block_decoder_axi_r_rlast), + .s02_axi_ruser(), + .s02_axi_rvalid(raw_block_decoder_axi_r_rvalid), + .s02_axi_rready(raw_block_decoder_axi_r_rready), + + // SequenceExecutor + .s03_axi_awid(output_axi_aw_awid), + .s03_axi_awaddr(output_axi_aw_awaddr), + .s03_axi_awlen(output_axi_aw_awlen), + .s03_axi_awsize(output_axi_aw_awsize), + .s03_axi_awburst(output_axi_aw_awburst), + .s03_axi_awlock('0), + .s03_axi_awcache('0), + .s03_axi_awprot('0), + .s03_axi_awqos('0), + .s03_axi_awuser('0), + .s03_axi_awvalid(output_axi_aw_awvalid), + .s03_axi_awready(output_axi_aw_awready), + .s03_axi_wdata(output_axi_w_wdata), + .s03_axi_wstrb(output_axi_w_wstrb), + .s03_axi_wlast(output_axi_w_wlast), + .s03_axi_wuser('0), + .s03_axi_wvalid(output_axi_w_wvalid), + .s03_axi_wready(output_axi_w_wready), + .s03_axi_bid(output_axi_b_bid), + .s03_axi_bresp(output_axi_b_bresp), + .s03_axi_buser(), + .s03_axi_bvalid(output_axi_b_bvalid), + .s03_axi_bready(output_axi_b_bready), + .s03_axi_arid('0), + .s03_axi_araddr('0), + .s03_axi_arlen('0), + .s03_axi_arsize('0), + .s03_axi_arburst('0), + .s03_axi_arlock('0), + .s03_axi_arcache('0), + .s03_axi_arprot('0), + .s03_axi_arqos('0), + .s03_axi_aruser('0), + .s03_axi_arvalid('0), + .s03_axi_arready(), + .s03_axi_rid(), + .s03_axi_rdata(), + .s03_axi_rresp(), + .s03_axi_rlast(), + .s03_axi_ruser(), + .s03_axi_rvalid(), + .s03_axi_rready('0), + + /* + * AXI master interface + */ + // Outside-facing AXI interface of the ZSTD Decoder + .m00_axi_awid(memory_axi_aw_awid), + .m00_axi_awaddr(memory_axi_aw_awaddr), + .m00_axi_awlen(memory_axi_aw_awlen), + .m00_axi_awsize(memory_axi_aw_awsize), + .m00_axi_awburst(memory_axi_aw_awburst), + .m00_axi_awlock(memory_axi_aw_awlock), + .m00_axi_awcache(memory_axi_aw_awcache), + .m00_axi_awprot(memory_axi_aw_awprot), + .m00_axi_awqos(memory_axi_aw_awqos), + .m00_axi_awregion(memory_axi_aw_awregion), + .m00_axi_awuser(memory_axi_aw_awuser), + .m00_axi_awvalid(memory_axi_aw_awvalid), + .m00_axi_awready(memory_axi_aw_awready), + .m00_axi_wdata(memory_axi_w_wdata), + .m00_axi_wstrb(memory_axi_w_wstrb), + .m00_axi_wlast(memory_axi_w_wlast), + .m00_axi_wuser(memory_axi_w_wuser), + .m00_axi_wvalid(memory_axi_w_wvalid), + .m00_axi_wready(memory_axi_w_wready), + .m00_axi_bid(memory_axi_b_bid), + .m00_axi_bresp(memory_axi_b_bresp[1:0]), + .m00_axi_buser(memory_axi_b_buser), + .m00_axi_bvalid(memory_axi_b_bvalid), + .m00_axi_bready(memory_axi_b_bready), + .m00_axi_arid(memory_axi_ar_arid), + .m00_axi_araddr(memory_axi_ar_araddr), + .m00_axi_arlen(memory_axi_ar_arlen), + .m00_axi_arsize(memory_axi_ar_arsize), + .m00_axi_arburst(memory_axi_ar_arburst), + .m00_axi_arlock(memory_axi_ar_arlock), + .m00_axi_arcache(memory_axi_ar_arcache), + .m00_axi_arprot(memory_axi_ar_arprot), + .m00_axi_arqos(memory_axi_ar_arqos), + .m00_axi_arregion(memory_axi_ar_arregion), + .m00_axi_aruser(memory_axi_ar_aruser), + .m00_axi_arvalid(memory_axi_ar_arvalid), + .m00_axi_arready(memory_axi_ar_arready), + .m00_axi_rid(memory_axi_r_rid), + .m00_axi_rdata(memory_axi_r_rdata), + .m00_axi_rresp(memory_axi_r_rresp[1:0]), + .m00_axi_rlast(memory_axi_r_rlast), + .m00_axi_ruser(memory_axi_r_ruser), + .m00_axi_rvalid(memory_axi_r_rvalid), + .m00_axi_rready(memory_axi_r_rready) + ); + +endmodule : zstd_dec_wrapper diff --git a/xls/modules/zstd/zstd_frame_dslx.py b/xls/modules/zstd/zstd_frame_dslx.py new file mode 100644 index 0000000000..6e36f6c563 --- /dev/null +++ b/xls/modules/zstd/zstd_frame_dslx.py @@ -0,0 +1,134 @@ +# Copyright 2024 The XLS Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import argparse +import math +import random +import tempfile +from pathlib import Path + +from xls.modules.zstd.cocotb.data_generator import ( + BlockType, + DecompressFrame, + GenerateFrame, +) + + +def GenerateTestData(seed, btype): + with tempfile.NamedTemporaryFile() as tmp: + GenerateFrame(seed, btype, tmp.name) + tmp.seek(0) + return tmp.read() + + +def Bytes2DSLX(frames, bytes_per_word, array_name): + frames_hex = [] + maxlen = max(len(frame) for frame in frames) + maxlen_size = math.ceil(maxlen / bytes_per_word) + bits_per_word = bytes_per_word * 8 + for i, frame in enumerate(frames): + frame_hex = [] + for i in range(0, len(frame), bytes_per_word): + # reverse byte order to make them little endian + word = bytes(reversed(frame[i : i + bytes_per_word])).hex() + frame_hex.append(f"uN[{bits_per_word}]:0x{word}") + + array_length = len(frame_hex) + if len(frame) < maxlen: + frame_hex += [f"uN[{bits_per_word}]:0x0", "..."] + + frame_array = ( + f"DataArray<{bits_per_word}, {maxlen_size}>{{\n" + f" length: u32:{len(frame)},\n" + f" array_length: u32:{array_length},\n" + f" data: uN[{bits_per_word}][{maxlen_size}]:[{', '.join(frame_hex)}]\n" + f"}}" + ) + frames_hex.append(frame_array) + + frames_str = ",\n".join(frames_hex) + frames_array = ( + f"pub const {array_name}:DataArray<\n" + f" u32:{bits_per_word},\n" + f" u32:{maxlen_size}\n" + f">[{len(frames_hex)}] = [{frames_str}];\n" + ) + return frames_array + + +def GenerateDataStruct(): + return ( + f"pub struct DataArray{{\n" + f" data: uN[BITS_PER_WORD][LENGTH],\n" + f" length: u32,\n" + f" array_length: u32\n" + f"}}\n" + ) + + +def main(): + parser = argparse.ArgumentParser() + parser.add_argument( + "-n", help="Number of testcases to generate", type=int, default=1 + ) + parser.add_argument( + "--seed", help="Seed for the testcases generator", type=int, default=0 + ) + parser.add_argument( + "--btype", + help=( + "Block types allowed in the generated testcases. If multiple block types " + "are supplied, generated testcases will cycle through them" + ), + type=BlockType.from_string, + choices=list(BlockType), + default=BlockType.RANDOM, + nargs="+", + ) + parser.add_argument( + "-o", + "--output", + help="Filename of the DSLX output file", + type=Path, + default=Path("frames_test_data.x"), + ) + parser.add_argument( + "--bytes-per-word", + help="Width of a word in memory, in bytes", + type=int, + default=8, + ) + args = parser.parse_args() + + seed = random.seed(args.seed) + byte_frames = [ + GenerateTestData(random.randrange(2**32), args.btype[i % len(args.btype)]) + for i in range(args.n) + ] + with open(args.output, "w") as dslx_output: + dslx_output.write(GenerateDataStruct()) + + dslx_frames = Bytes2DSLX(byte_frames, args.bytes_per_word, "FRAMES") + dslx_output.write(dslx_frames) + + byte_frames_decompressed = list(map(DecompressFrame, byte_frames)) + dslx_frames_decompressed = Bytes2DSLX( + byte_frames_decompressed, args.bytes_per_word, "DECOMPRESSED_FRAMES" + ) + dslx_output.write(dslx_frames_decompressed) + + +if __name__ == "__main__": + main()