RasaHQ · rasabot · Jul 12, 2023 · Jul 12, 2023 · Nov 16, 2023 · Nov 17, 2023
@@ -287,6 +287,13 @@ jobs:
         run: |
           poetry self add setuptools
 
+      - name: Prevent race condition in poetry build
+        # More context about race condition during poetry build can be found here:
+        # https://github.com/python-poetry/poetry/issues/7611#issuecomment-1747836233
+        if: needs.changes.outputs.backend == 'true'
+        run: |
+          poetry config installer.max-workers 1
+
       - name: Load Poetry Cached Libraries ⬇
         id: cache-poetry
         if: needs.changes.outputs.backend == 'true'

@@ -129,24 +129,3 @@ jobs:
       - name: Run Bandit 🔪
         if: needs.changes.outputs.backend == 'true'
         run: make lint-security
-
-  snyk:
-    runs-on: ubuntu-22.04
-    steps:
-      - uses: actions/checkout@ac593985615ec2ede58e132d2e21d2b1cbd6127c
-      - name: Run Snyk Open Source to check for Python vulnerabilities
-        uses: snyk/actions/python-3.8@master
-        continue-on-error: true
-        env:
-          SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
-        with:
-          command: monitor
-          args: --all-projects --org=rasa --skip-unresolved
-      - name: Run Snyk Open Source to check for JS vulnerabilities
-        uses: snyk/actions/node@master
-        continue-on-error: true
-        env:
-          SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }}
-        with:
-          command: monitor
-          args: --org=rasa --yarn-workspaces --strict-out-of-sync=false --prune-repeated-subdependencies
diff --git a/CHANGELOG.mdx b/CHANGELOG.mdx
@@ -16,6 +16,20 @@ https://github.com/RasaHQ/rasa/tree/main/changelog/ . -->
 
 <!-- TOWNCRIER -->
 
+## [3.6.15] - 2023-11-30
+
+Rasa 3.6.15 (2023-11-30)                          
+### Bugfixes
+- [#12965](https://github.com/rasahq/rasa/issues/12965): Fixed connection timeout to action server by setting KEEP_ALIVE_TIMEOUT to 120, and reverting changes introduced in #12886.
+
+
+## [3.6.14] - 2023-11-17
+
+Rasa 3.6.14 (2023-11-17)                         
+### Bugfixes
+- [#12948](https://github.com/rasahq/rasa/issues/12948): Fixed UnexpecTEDIntentlessPolicy training errors that resulted from a change to batching behavior. Changed the batching behavior back to the original for all components. Made the changed batching behavior accessible in DietClassifier using `drop_small_last_batch: True`.
+
+
 ## [3.6.13] - 2023-10-23
 
 Rasa 3.6.13 (2023-10-23)                         

diff --git a/Makefile b/Makefile
@@ -136,7 +136,7 @@ prepare-tests-windows:
 # It will retry the installation 5 times if it fails
 # See: https://github.com/actions/virtual-environments/blob/main/images/win/scripts/ImageHelpers/ChocoHelpers.ps1
 prepare-tests-windows-gha:
-	powershell -command "Choco-Install wget graphviz"
+	powershell -command "Install-ChocoPackage wget graphviz"
 
 test: clean
 	# OMP_NUM_THREADS can improve overall performance using one thread by process (on tensorflow), avoiding overload

diff --git a/changelog/12983.bugfix.md b/changelog/12983.bugfix.md
@@ -0,0 +1 @@
+Upgrade Cryptography to fix improper certificate validation.
diff --git a/changelog/712.misc.md b/changelog/712.misc.md
@@ -0,0 +1 @@
+Prevent race condition in poetry build to fix dependency install failures on windows.
diff --git a/docs/docs/monitoring/load-testing-guidelines.mdx b/docs/docs/monitoring/load-testing-guidelines.mdx
@@ -12,12 +12,26 @@ In order to gather metrics on our system's ability to handle increased loads and
 In each test case we spawned the following number of concurrent users at peak concurrency using a [spawn rate](https://docs.locust.io/en/1.5.0/configuration.html#all-available-configuration-options) of 1000 users per second.
 In our tests we used the Rasa [HTTP-API](https://rasa.com/docs/rasa/pages/http-api) and the [Locust](https://locust.io/) open source load testing tool.
 
+
 |        Users             |               CPU                            |      Memory   |
 |--------------------------|----------------------------------------------|---------------|
 | Up to 50,000             |         6vCPU                                |      16 GB    |
 | Up to 80,000             |         6vCPU, with almost 90% CPU usage     |      16 GB    |
 
 
+### Some recommendations to improve latency
+- Sanic Workers must be mapped 1:1 to CPU for both Rasa Pro and Rasa Action Server
+- Create `async` actions to avoid any blocking I/O
+- `enable_selective_domain: true` : Domain is only sent for actions that needs it. This massively trims the payload between the two pods.
+- Consider using compute efficient machines on cloud which are optimized for high performance computing such as the C5 instances on AWS.
+  However, as they are low on memory, models need to be trained lightweight.
+
+
+|        Machine                 |               RasaPro                          |      Rasa Action Server                          |
+|--------------------------------|------------------------------------------------|--------------------------------------------------|
+| AWS C5 or Azure F or Gcloud C2 |   3-7vCPU, 10-16Gb Memory, 3-7 Sanic Threads   |    3-7vCPU, 2-12Gb Memory, 3-7 Sanic Threads     |
+
+
 ### Debugging bot related issues while scaling up
 
 To test the Rasa [HTTP-API](https://rasa.com/docs/rasa/pages/http-api) ability to handle a large number of concurrent user activity we used the Rasa Pro [tracing](./tracing.mdx) capability

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -156,7 +156,7 @@ structlog-sentry = "^2.0.2"
 dnspython = "2.3.0"
 wheel = ">=0.38.1"
 certifi = ">=2023.7.22"
-cryptography = ">=41.0.2"
+cryptography = ">=41.0.7"
 [[tool.poetry.dependencies.tensorflow-io-gcs-filesystem]]
 version = "==0.31"
 markers = "sys_platform == 'win32'"

@@ -112,53 +112,59 @@ async def _pull_model_and_fingerprint(
 
     logger.debug(f"Requesting model from server {model_server.url}...")
 
-    try:
-        params = model_server.combine_parameters()
-        async with model_server.session.request(
-            "GET",
-            model_server.url,
-            timeout=DEFAULT_REQUEST_TIMEOUT,
-            headers=headers,
-            params=params,
-        ) as resp:
-            if resp.status in [204, 304]:
-                logger.debug(
-                    "Model server returned {} status code, "
-                    "indicating that no new model is available. "
-                    "Current fingerprint: {}"
-                    "".format(resp.status, fingerprint)
-                )
-                return None
-            elif resp.status == 404:
-                logger.debug(
-                    "Model server could not find a model at the requested "
-                    "endpoint '{}'. It's possible that no model has been "
-                    "trained, or that the requested tag hasn't been "
-                    "assigned.".format(model_server.url)
-                )
-                return None
-            elif resp.status != 200:
-                logger.debug(
-                    "Tried to fetch model from server, but server response "
-                    "status code is {}. We'll retry later..."
-                    "".format(resp.status)
+    async with model_server.session() as session:
+        try:
+            params = model_server.combine_parameters()
+            async with session.request(
+                "GET",
+                model_server.url,
+                timeout=DEFAULT_REQUEST_TIMEOUT,
+                headers=headers,
+                params=params,
+            ) as resp:
+
+                if resp.status in [204, 304]:
+                    logger.debug(
+                        "Model server returned {} status code, "
+                        "indicating that no new model is available. "
+                        "Current fingerprint: {}"
+                        "".format(resp.status, fingerprint)
+                    )
+                    return None
+                elif resp.status == 404:
+                    logger.debug(
+                        "Model server could not find a model at the requested "
+                        "endpoint '{}'. It's possible that no model has been "
+                        "trained, or that the requested tag hasn't been "
+                        "assigned.".format(model_server.url)
+                    )
+                    return None
+                elif resp.status != 200:
+                    logger.debug(
+                        "Tried to fetch model from server, but server response "
+                        "status code is {}. We'll retry later..."
+                        "".format(resp.status)
+                    )
+                    return None
+
+                model_path = Path(model_directory) / resp.headers.get(
+                    "filename", "model.tar.gz"
                 )
-                return None
-            model_path = Path(model_directory) / resp.headers.get(
-                "filename", "model.tar.gz"
+                with open(model_path, "wb") as file:
+                    file.write(await resp.read())
+
+                logger.debug("Saved model to '{}'".format(os.path.abspath(model_path)))
+
+                # return the new fingerprint
+                return resp.headers.get("ETag")
+
+        except aiohttp.ClientError as e:
+            logger.debug(
+                "Tried to fetch model from server, but "
+                "couldn't reach server. We'll retry later... "
+                "Error: {}.".format(e)
             )
-            with open(model_path, "wb") as file:
-                file.write(await resp.read())
-            logger.debug("Saved model to '{}'".format(os.path.abspath(model_path)))
-            # return the new fingerprint
-            return resp.headers.get("ETag")
-    except aiohttp.ClientError as e:
-        logger.debug(
-            "Tried to fetch model from server, but "
-            "couldn't reach server. We'll retry later... "
-            "Error: {}.".format(e)
-        )
-        return None
+            return None
 
 
 async def _run_model_pulling_worker(model_server: EndpointConfig, agent: Agent) -> None:

@@ -24,6 +24,8 @@
 
 DEFAULT_LOCK_LIFETIME = 60  # in seconds
 
+DEFAULT_KEEP_ALIVE_TIMEOUT = 120  # in seconds
+
 BEARER_TOKEN_PREFIX = "Bearer "
 
 # The lowest priority is intended to be used by machine learning policies.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Upgrade Cryptography to fix improper certificate validation.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Prevent race condition in poetry build to fix dependency install failures on windows.