diff --git a/.backportrc.json b/.backportrc.json index f20034bca..32140b880 100644 --- a/.backportrc.json +++ b/.backportrc.json @@ -3,8 +3,7 @@ { "name": "main", "checked": true }, "8.x", "8.17", - "8.16", - "8.15" + "8.16" ], "fork": false, "targetPRLabels": ["backport"], diff --git a/.buildkite/publish/dra/init_dra_publishing.sh b/.buildkite/publish/dra/init_dra_publishing.sh index 4007cbe0d..a15aa67fe 100755 --- a/.buildkite/publish/dra/init_dra_publishing.sh +++ b/.buildkite/publish/dra/init_dra_publishing.sh @@ -57,6 +57,10 @@ fi if [[ "${BUILDKITE_BRANCH:-}" =~ ([0-9]\.[0-9x]*$) ]]; then export PUBLISH_STAGING="true" fi +if [ -n "${VERSION_QUALIFIER:-}" ]; then + # this is a special case where we will release a pre-release artifact, regardless of branch + export PUBLISH_STAGING="true" +fi # Sanity check in the logs to list the downloaded artifacts chmod -R a+rw "${RELEASE_DIR}/dist" @@ -149,12 +153,21 @@ fi # generate the dependency report and publish STAGING artifacts if [[ "${PUBLISH_STAGING:-}" == "true" ]]; then - dependencyReportName="dependencies-${VERSION}.csv"; + if [ -n "${VERSION_QUALIFIER:-}" ]; then + dependencyReportName="dependencies-${VERSION}-${VERSION_QUALIFIER}.csv"; + zip_artifact_name="connectors-${VERSION}-${VERSION_QUALIFIER}.zip" + cp $DRA_ARTIFACTS_DIR/$PROJECT_NAME-$VERSION-docker-image-linux-amd64.tar.gz $DRA_ARTIFACTS_DIR/$PROJECT_NAME-$VERSION-$VERSION_QUALIFIER-docker-image-linux-amd64.tar.gz + cp $DRA_ARTIFACTS_DIR/$PROJECT_NAME-$VERSION-docker-image-linux-arm64.tar.gz $DRA_ARTIFACTS_DIR/$PROJECT_NAME-$VERSION-$VERSION_QUALIFIER-docker-image-linux-arm64.tar.gz + else + dependencyReportName="dependencies-${VERSION}.csv"; + zip_artifact_name="connectors-${VERSION}.zip" + fi + echo "-------- Generating STAGING dependency report: ${dependencyReportName}" generateDependencyReport $DEPENDENCIES_REPORTS_DIR/$dependencyReportName echo "-------- Publishing STAGING DRA Artifacts" - cp $RELEASE_DIR/dist/elasticsearch_connectors-${VERSION}.zip $DRA_ARTIFACTS_DIR/connectors-${VERSION}.zip + cp $RELEASE_DIR/dist/elasticsearch_connectors-${VERSION}.zip $DRA_ARTIFACTS_DIR/${zip_artifact_name} setDraVaultCredentials export WORKFLOW="staging" diff --git a/.buildkite/publish/dra/publish-daily-release-artifact.sh b/.buildkite/publish/dra/publish-daily-release-artifact.sh index 13f11f413..378d1b981 100755 --- a/.buildkite/publish/dra/publish-daily-release-artifact.sh +++ b/.buildkite/publish/dra/publish-daily-release-artifact.sh @@ -55,6 +55,14 @@ if [[ "${WORKFLOW:-}" != "staging" && "${WORKFLOW:-}" != "snapshot" ]]; then exit 2 fi +# snapshot workflows do not use qualifiers +if [[ "${WORKFLOW:-}" == "snapshot" ]]; then + echo "SNAPSHOT workflows ignore version qualifier" + version_qualifier="" +else + version_qualifier="${VERSION_QUALIFIER:-}" +fi + # Version. This is pulled from config/product_version. if [[ "${VERSION:-}" == "" ]]; then echo "ERROR: VERSION required!" @@ -87,4 +95,5 @@ docker run --rm \ --commit "${REVISION}" \ --workflow "${WORKFLOW}" \ --version "${VERSION}" \ + --qualifier "${version_qualifier:-}" \ --artifact-set main diff --git a/.buildkite/run_linter.sh b/.buildkite/run_linter.sh index efbe60572..5a8e5e83c 100755 --- a/.buildkite/run_linter.sh +++ b/.buildkite/run_linter.sh @@ -9,6 +9,10 @@ init_python if is_pr && ! is_fork; then echo "We're on PR, running autoformat" + + export GH_TOKEN="$VAULT_GITHUB_TOKEN" + source .buildkite/publish/git-setup.sh + if ! make autoformat ; then echo "make autoformat ran with errors, exiting" exit 1 @@ -18,10 +22,9 @@ if is_pr && ! is_fork; then echo "Nothing to be fixed by autoformat" exit 0 else - source .buildkite/publish/git-setup.sh + git --no-pager diff echo "linting errors are fixed, pushing the diff" - export GH_TOKEN="$VAULT_GITHUB_TOKEN" git add . git commit -m"make autoformat" diff --git a/.buildkite/run_notice_check.sh b/.buildkite/run_notice_check.sh index 753854027..cf0bbf0da 100755 --- a/.buildkite/run_notice_check.sh +++ b/.buildkite/run_notice_check.sh @@ -7,28 +7,28 @@ source .buildkite/shared.sh init_python -make notice +if is_pr && ! is_fork; then + echo 'Running on a PR that is not a fork, will commit changes' -if [ -z "$(git status --porcelain | grep NOTICE.txt)" ]; then - echo 'Nothing changed' - exit 0 -else - echo 'New changes to NOTICE.txt:' - git --no-pager diff - if is_pr && ! is_fork; then - echo 'Running on a PR that is not a fork, will commit changes' - source .buildkite/publish/git-setup.sh - export GH_TOKEN="$VAULT_GITHUB_TOKEN" + export GH_TOKEN="$VAULT_GITHUB_TOKEN" + source .buildkite/publish/git-setup.sh + make notice + + if [ -z "$(git status --porcelain | grep NOTICE.txt)" ]; then + echo 'Nothing changed' + exit 0 + else + echo 'New changes to NOTICE.txt:' + git --no-pager diff git add NOTICE.txt git commit -m"Update NOTICE.txt" git push - sleep 15 - else - echo 'Running against a fork or a non-PR change, skipping pushing changes and just failing instead' - fi - exit 1 + exit 1 + fi +else + echo 'Skipping autofix' + make notice + exit 0 fi - -exit 0 diff --git a/Dockerfile.agent b/Dockerfile.agent index e6732406a..b9bdef480 100644 --- a/Dockerfile.agent +++ b/Dockerfile.agent @@ -5,16 +5,16 @@ FROM docker.elastic.co/elastic-agent/elastic-agent:9.0.0-SNAPSHOT USER root -# Install apt-get dependencies -RUN apt-get update && apt-get install -y \ - software-properties-common \ +# Install basic dependencies +RUN microdnf update && microdnf install -y \ vim \ wget \ git \ make \ - && add-apt-repository ppa:deadsnakes/ppa \ - && apt-get update && apt-get install -y python3.11 python3.11-venv \ - && apt-get clean && rm -rf /var/lib/apt/lists/* + python3.11 \ + python3.11-pip \ + && microdnf clean all + # Install Go-based yq separately RUN wget https://github.com/mikefarah/yq/releases/latest/download/yq_linux_amd64 -O /usr/bin/yq && \ diff --git a/Dockerfile.ftest.wolfi b/Dockerfile.ftest.wolfi index 95b7ab499..dd4acc351 100644 --- a/Dockerfile.ftest.wolfi +++ b/Dockerfile.ftest.wolfi @@ -1,4 +1,4 @@ -FROM docker.elastic.co/wolfi/python:3.11-dev@sha256:c196966593fe640ccf8ac044d4b766a4e22a78809d03aaceff58cda3494141c5 +FROM docker.elastic.co/wolfi/python:3.11-dev@sha256:337643dccf2afac53c59955b9244f51ea315541a1641fb72b35d7e9ae2bca67d USER root COPY . /connectors WORKDIR /connectors diff --git a/Dockerfile.wolfi b/Dockerfile.wolfi index 5d0fada85..f4a46740c 100644 --- a/Dockerfile.wolfi +++ b/Dockerfile.wolfi @@ -1,4 +1,4 @@ -FROM docker.elastic.co/wolfi/python:3.11-dev@sha256:c196966593fe640ccf8ac044d4b766a4e22a78809d03aaceff58cda3494141c5 +FROM docker.elastic.co/wolfi/python:3.11-dev@sha256:337643dccf2afac53c59955b9244f51ea315541a1641fb72b35d7e9ae2bca67d USER root COPY . /app WORKDIR /app diff --git a/connectors/agent/connector_record_manager.py b/connectors/agent/connector_record_manager.py index 686025ecc..fe231dddd 100644 --- a/connectors/agent/connector_record_manager.py +++ b/connectors/agent/connector_record_manager.py @@ -50,7 +50,9 @@ async def ensure_connector_records_exist(self, agent_config, connector_name=None random_connector_name_id = generate_random_id(length=4) connector_name = f"[Elastic-managed] {service_type} connector {random_connector_name_id}" - if not await self.connector_index.connector_exists(connector_id): + if not await self.connector_index.connector_exists( + connector_id, include_deleted=True + ): try: await self.connector_index.connector_put( connector_id=connector_id, @@ -64,6 +66,10 @@ async def ensure_connector_records_exist(self, agent_config, connector_name=None f"Failed to create connector record for {connector_id}: {e}" ) raise e + else: + logger.debug( + f"Skipping connector creation. Connector record for {connector_id} already exists." + ) def _check_agent_config_ready(self, agent_config): """ diff --git a/connectors/es/index.py b/connectors/es/index.py index 4e6b753a2..318f282d2 100644 --- a/connectors/es/index.py +++ b/connectors/es/index.py @@ -27,6 +27,14 @@ class TemporaryConnectorApiWrapper(ESClient): def __init__(self, elastic_config): super().__init__(elastic_config) + async def connector_get(self, connector_id, include_deleted): + return await self.client.perform_request( + "GET", + f"/_connector/{connector_id}", + headers={"accept": "application/json"}, + params={"include_deleted": include_deleted}, + ) + async def connector_check_in(self, connector_id): return await self.client.perform_request( "PUT", @@ -98,6 +106,11 @@ async def connector_check_in(self, connector_id): partial(self._api_wrapper.connector_check_in, connector_id) ) + async def connector_get(self, connector_id, include_deleted=False): + return await self._retrier.execute_with_retry( + partial(self._api_wrapper.connector_get, connector_id, include_deleted) + ) + async def connector_put( self, connector_id, service_type, connector_name, index_name, is_native ): diff --git a/connectors/protocol/connectors.py b/connectors/protocol/connectors.py index c69cd9dea..c4614c51d 100644 --- a/connectors/protocol/connectors.py +++ b/connectors/protocol/connectors.py @@ -19,11 +19,15 @@ from datetime import datetime, timezone from enum import Enum -from elasticsearch import ApiError +from elasticsearch import ( + ApiError, +) +from elasticsearch import ( + NotFoundError as ElasticNotFoundError, +) from connectors.es import ESDocument, ESIndex from connectors.es.client import with_concurrency_control -from connectors.es.index import DocumentNotFoundError from connectors.filtering.validation import ( FilteringValidationState, InvalidFilteringError, @@ -179,11 +183,13 @@ async def connector_put( is_native=is_native, ) - async def connector_exists(self, connector_id): + async def connector_exists(self, connector_id, include_deleted=False): try: - doc = await self.fetch_by_id(connector_id) + doc = await self.api.connector_get( + connector_id=connector_id, include_deleted=include_deleted + ) return doc is not None - except DocumentNotFoundError: + except ElasticNotFoundError: return False except Exception as e: logger.error( diff --git a/connectors/source.py b/connectors/source.py index b714e6387..8730b5dbf 100644 --- a/connectors/source.py +++ b/connectors/source.py @@ -141,7 +141,17 @@ def _convert(self, value, field_type_): # list requires special type casting if cast_type is list: if isinstance(value, str): - return [item.strip() for item in value.split(",")] if value else [] + items = [] + if value: + for item in value.split(","): + item = item.strip() + if not item: + logger.debug( + "Empty string detected in the comma-separated list. It will be skipped." + ) + else: + items.append(item) + return items elif isinstance(value, int): return [value] elif isinstance(value, set): diff --git a/docs/CLI.md b/docs/CLI.md index e6a0c7fdc..fc4022b03 100644 --- a/docs/CLI.md +++ b/docs/CLI.md @@ -16,23 +16,23 @@ Connectors CLI helps with Elastic Connectors managing connectors and running syn ## Installation 1. Clone the repository `git clone https://github.com/elastic/connectors.git` 2. Run `make clean install` to install dependencies and create executable files. -3. Connectors CLI is available via `./bin/connectors` +3. Connectors CLI is available via `.venv/bin/connectors` ## Configuration **Note:** Make sure your Elasticsearch instance is up and running. -1. Run `./bin/connectors login` to authenticate the CLI with an Elasticsearch instance. +1. Run `.venv/bin/connectors login` to authenticate the CLI with an Elasticsearch instance. 2. Provide credentials 3. The command will create or ask to rewrite an existing configuration file in `./cli/config.yml` By default, the CLI uses basic authentication method (username, password) however an API key can be used too. -Run `./bin/connectors login --method apikey` to authenticate the CLI via your API key. +Run `.venv/bin/connectors login --method apikey` to authenticate the CLI via your API key. When you run any command you can specify a configuration file using `-c` argument. Example: ```bash -./bin/connectors -c connector list +.venv/bin/connectors -c connector list ``` ## Available commands @@ -41,7 +41,7 @@ Connectors CLI provides a `--help`/`-h` argument that can be used with any comma For example: ```bash -./bin/connectors --help +.venv/bin/connectors --help Usage: connectors [OPTIONS] COMMAND [ARGS]... @@ -76,7 +76,7 @@ To bypass interactive mode you can use the `--from-file` argument, pointing to a Examples: ```console -./bin/connectors connector create \ +.venv/bin/connectors connector create \ --index-name my-index \ --service-type sharepoint_online \ --index-language en \ @@ -95,7 +95,7 @@ Lists all the existing connectors Examples: ```console -./bin/connectors connector list +.venv/bin/connectors connector list ``` This will display all existing connectors and the associated indices. @@ -105,7 +105,7 @@ Lists all jobs and their stats. Examples ```console -./bin/connectors job list -- +.venv/bin/connectors job list -- ``` This will display all sync jobs including information like job status, number of indexed documents and index data volume associated with `connector_id`. @@ -116,7 +116,7 @@ Marks the job as `cancelling` to let Connector services know that the job has to Examples: ```console -./bin/connectors job cancel -- +.venv/bin/connectors job cancel -- ``` #### `connectors job start` @@ -125,7 +125,7 @@ Schedules a new sync job and lets Connector service pick it up. Examples: ```console -./bin/connectors job start -- \ +.venv/bin/connectors job start -- \ -i \ -t \ -o @@ -139,7 +139,7 @@ Shows information about a sync job. Examples: ```console -./bin/connectors job view -- -o -o = 8.16) @@ -26,6 +27,15 @@ On the day of the release, `#mission-control` will notify the release manager th The Unified Release build will take care of producing git tags and official artifacts from our most recent DRA artifacts. +### Pre-release artifacts + +If `#mission-control` asks for a pre-release artifact to be built, trigger the build pipeline from the relevant branch +and add an Environment Variable for `VERSION_QUALIFIER` with the value of the pre-release. + +For example, to release 9.0.0-BC1, you would set `VERSION_QUALIFIER=BC1` for this build. + +Note that the qualified artifacts will only show up in DRA "staging" but not "snapshot" reports. + ### In-Between releases Sometimes, we need to release Connectors independently of the Elastic unified-release. diff --git a/scripts/testing/README.md b/scripts/testing/README.md index d1d4fb612..e6bf6b79b 100644 --- a/scripts/testing/README.md +++ b/scripts/testing/README.md @@ -19,18 +19,18 @@ Run `make clean install` to generate executable files in `./bin` folder ### Usage -Run `./bin/test-connectors --help` or `./bin/test-connectors {command name} --help` to get more information about the cli. +Run `.venv/bin/test-connectors --help` or `.venv/bin/test-connectors {command name} --help` to get more information about the cli. #### Running test with Elastic cloud deployment If you want to run your test suite using a cloud Elasticsearch deployment follow the next steps: 1. Create a cloud deployment 2. Download a credentials file (or create a new user) -3. Run `./bin/test-connectors run-test my-testing-environment-name --es-host {host} --es-username {user name} --es-password {password} --test-case {path to the test case file}` +3. Run `.venv/bin/test-connectors run-test my-testing-environment-name --es-host {host} --es-username {user name} --es-password {password} --test-case {path to the test case file}` #### Running test with local Elasticsearch If you want to run your tests with local Elasticsearch you need to specify `--es-version` option. Like -`./bin/test-connectors run-test my-testing-environment-name --es-version 8.12-SNAPSHOT --test-case {path to the test case file}` +`.venv/bin/test-connectors run-test my-testing-environment-name --es-version 8.12-SNAPSHOT --test-case {path to the test case file}` In this case, the cli will deploy an Elasticsearch instance in the same VM where the connector service will be running. @@ -38,7 +38,7 @@ In this case, the cli will deploy an Elasticsearch instance in the same VM where You can use any git reference such as commit sha, a tag, or a branch name. The cli will pull the defined git reference and run `make clean install`. -Example: `./bin/test-connectors run-test my-testing-environment-name --es-version 8.12-SNAPSHOT --connectors-ref 8.12 --test-case {path to the test case file}` +Example: `.venv/bin/test-connectors run-test my-testing-environment-name --es-version 8.12-SNAPSHOT --connectors-ref 8.12 --test-case {path to the test case file}` #### Keeping your VM running when the tests passed Sometimes it's useful to get access to the logs or make some changes in the code and run tests again. The CLI will print a list of useful commands you can use to access the VM resources like: @@ -47,7 +47,7 @@ Sometimes it's useful to get access to the logs or make some changes in the code To automatically delete the VM you need to use `--delete` option. -`./bin/test-connectors run-test my-testing-environment-name --es-version 8.12-SNAPSHOT --connectors-ref 8.12 --test-case {path to the test case file} --delete` +`.venv/bin/test-connectors run-test my-testing-environment-name --es-version 8.12-SNAPSHOT --connectors-ref 8.12 --test-case {path to the test case file} --delete` #### Using different machine type All new VMs are based on a predefined image which is in turn based on `ubuntu-2204-lts` with python3 and docker installed. Custome images are not supported. You can change a machine type by providing `--vm-type`. Visit [the official GCP documentation](https://cloud.google.com/compute/docs/general-purpose-machines) to get more information. diff --git a/tests/protocol/test_connectors.py b/tests/protocol/test_connectors.py index d77f762c5..54e4bae4b 100644 --- a/tests/protocol/test_connectors.py +++ b/tests/protocol/test_connectors.py @@ -10,10 +10,9 @@ from unittest.mock import ANY, AsyncMock, Mock, patch import pytest -from elasticsearch import ApiError, ConflictError +from elasticsearch import ApiError, ConflictError, NotFoundError from connectors.config import load_config -from connectors.es.index import DocumentNotFoundError from connectors.filtering.validation import ( FilteringValidationResult, FilteringValidationState, @@ -1656,7 +1655,7 @@ async def test_connector_exists_returns_true_when_found(): } index = ConnectorIndex(config) - index.fetch_by_id = AsyncMock(return_value={"id": "1"}) + index.api.connector_get = AsyncMock(return_value={"id": "1"}) exists = await index.connector_exists("1") assert exists is True @@ -1671,7 +1670,14 @@ async def test_connector_exists_returns_false_when_not_found(): } index = ConnectorIndex(config) - index.fetch_by_id = AsyncMock(side_effect=DocumentNotFoundError) + api_meta = Mock() + api_meta.status = 404 + error_body = {"error": {"reason": "mocked test failure"}} + index.api.connector_get = AsyncMock( + side_effect=NotFoundError( + message="this is an error message", body=error_body, meta=api_meta + ) + ) exists = await index.connector_exists("1") assert exists is False @@ -1686,7 +1692,7 @@ async def test_connector_exists_raises_non_404_exception(): } index = ConnectorIndex(config) - index.fetch_by_id = AsyncMock(side_effect=Exception("Fetch error")) + index.api.connector_get = AsyncMock(side_effect=Exception("Fetch error")) with pytest.raises(Exception, match="Fetch error"): await index.connector_exists("1") diff --git a/tests/test_source.py b/tests/test_source.py index f1f1e971a..6e5e48b48 100644 --- a/tests/test_source.py +++ b/tests/test_source.py @@ -101,6 +101,9 @@ def test_field_convert(): assert Field("name", field_type="list").value == [] assert Field("name", value="1", field_type="list").value == ["1"] + assert Field("name", value="1,2,3,", field_type="list").value == ["1", "2", "3"] + assert Field("name", value="1,2,3, ", field_type="list").value == ["1", "2", "3"] + assert Field("name", value=",,,1,2,3", field_type="list").value == ["1", "2", "3"] assert Field("name", value="1,2,3", field_type="list").value == ["1", "2", "3"] assert Field("name", value=[1, 2], field_type="list").value == [1, 2] assert Field("name", value=0, field_type="list").value == [0]