[Backport 2.18] fix model stuck in deploying state during node crash/cluster restart #7782
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Build and Test ml-commons | |
# This workflow is triggered on pull requests and push to any branches | |
on: | |
push: | |
branches-ignore: | |
- 'backport/**' | |
- 'create-pull-request/**' | |
- 'dependabot/**' | |
pull_request_target: | |
types: [opened, synchronize, reopened] | |
permissions: | |
id-token: write | |
contents: read | |
jobs: | |
Get-Require-Approval: | |
uses: ./.github/workflows/require-approval.yml | |
Get-CI-Image-Tag: | |
uses: opensearch-project/opensearch-build/.github/workflows/get-ci-image-tag.yml@main | |
with: | |
product: opensearch | |
Build-ml-linux: | |
needs: [Get-Require-Approval, Get-CI-Image-Tag] | |
strategy: | |
matrix: | |
java: [11, 17, 21] | |
env: | |
ACTIONS_ALLOW_USE_UNSECURE_NODE_VERSION: true | |
name: Build and Test MLCommons Plugin on linux | |
if: github.repository == 'opensearch-project/ml-commons' | |
environment: ${{ needs.Get-Require-Approval.outputs.is-require-approval }} | |
outputs: | |
build-test-linux: ${{ steps.step-build-test-linux.outputs.build-test-linux }} | |
runs-on: ubuntu-latest | |
container: | |
# using the same image which is used by opensearch-build team to build the OpenSearch Distribution | |
# this image tag is subject to change as more dependencies and updates will arrive over time | |
image: ${{ needs.Get-CI-Image-Tag.outputs.ci-image-version-linux }} | |
# need to switch to root so that github actions can install runner binary on container without permission issues. | |
options: --user root | |
steps: | |
- name: Setup Java ${{ matrix.java }} | |
uses: actions/setup-java@v1 | |
with: | |
java-version: ${{ matrix.java }} | |
- uses: aws-actions/configure-aws-credentials@v2 | |
with: | |
role-to-assume: ${{ secrets.ML_ROLE }} | |
aws-region: us-west-2 | |
- name: Checkout MLCommons | |
uses: actions/checkout@v3 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Build and Run Tests | |
id: step-build-test-linux | |
run: | | |
chown -R 1000:1000 `pwd` | |
su `id -un 1000` -c 'whoami && java -version && | |
export OPENAI_KEY=`aws secretsmanager get-secret-value --secret-id github_openai_key --query SecretString --output text` && | |
export COHERE_KEY=`aws secretsmanager get-secret-value --secret-id github_cohere_key --query SecretString --output text` && | |
echo "::add-mask::$OPENAI_KEY" && | |
echo "::add-mask::$COHERE_KEY" && | |
echo "build and run tests" && ./gradlew build && | |
echo "Publish to Maven Local" && ./gradlew publishToMavenLocal && | |
echo "Multi Nodes Integration Testing" && ./gradlew integTest -PnumNodes=3' | |
plugin=`basename $(ls plugin/build/distributions/*.zip)` | |
echo $plugin | |
mv -v plugin/build/distributions/$plugin ./ | |
echo "build-test-linux=$plugin" >> $GITHUB_OUTPUT | |
- name: Upload Coverage Report | |
uses: codecov/codecov-action@v1 | |
with: | |
flags: ml-commons | |
token: ${{ secrets.CODECOV_TOKEN }} | |
- uses: actions/upload-artifact@v3 | |
with: | |
name: ml-plugin-linux-${{ matrix.java }} | |
path: ${{ steps.step-build-test-linux.outputs.build-test-linux }} | |
if-no-files-found: error | |
Test-ml-linux-docker: | |
needs: [Get-Require-Approval, Build-ml-linux] | |
strategy: | |
matrix: | |
java: [11, 17, 21] | |
name: Test MLCommons Plugin on linux docker | |
if: github.repository == 'opensearch-project/ml-commons' | |
environment: ${{ needs.Get-Require-Approval.outputs.is-require-approval }} | |
runs-on: ubuntu-latest | |
steps: | |
- uses: aws-actions/configure-aws-credentials@v2 | |
with: | |
role-to-assume: ${{ secrets.ML_ROLE }} | |
aws-region: us-west-2 | |
- name: Checkout MLCommons | |
uses: actions/checkout@v3 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- uses: actions/[email protected] | |
with: | |
name: ml-plugin-linux-${{ matrix.java }} | |
- name: Pull and Run Docker | |
run: | | |
plugin=${{ needs.Build-ml-linux.outputs.build-test-linux }} | |
version=`echo $plugin|awk -F- '{print $3}'| cut -d. -f 1-3` | |
plugin_version=`echo $plugin|awk -F- '{print $3}'| cut -d. -f 1-4` | |
qualifier=`echo $plugin|awk -F- '{print $4}'| cut -d. -f 1-1` | |
if [ -n "$qualifier" ] && [ "$qualifier" != "SNAPSHOT" ]; then | |
qualifier=-${qualifier} | |
else | |
qualifier="" | |
fi | |
docker_version=$version$qualifier | |
echo plugin version plugin_version qualifier docker_version | |
echo "($plugin) ($version) ($plugin_version) ($qualifier) ($docker_version)" | |
pwd && ls -l ./$plugin | |
if docker pull opensearchstaging/opensearch:$docker_version | |
then | |
echo "FROM opensearchstaging/opensearch:$docker_version" >> Dockerfile | |
echo "RUN if [ -d /usr/share/opensearch/plugins/opensearch-skills ]; then /usr/share/opensearch/bin/opensearch-plugin remove opensearch-skills; fi" >> Dockerfile | |
echo "RUN if [ -d /usr/share/opensearch/plugins/opensearch-ml ]; then /usr/share/opensearch/bin/opensearch-plugin remove opensearch-ml; fi" >> Dockerfile | |
echo "COPY $plugin /tmp/" >> Dockerfile | |
echo "RUN /usr/share/opensearch/bin/opensearch-plugin install --batch file:/tmp/$plugin" >> Dockerfile | |
docker build -t opensearch-ml:test . | |
echo "imagePresent=true" >> $GITHUB_ENV | |
else | |
echo "imagePresent=false" >> $GITHUB_ENV | |
fi | |
- name: Generate Password For Admin | |
id: genpass | |
run: | | |
PASSWORD=$(openssl rand -base64 20 | tr -dc 'A-Za-z0-9!@#$%^&*()_+=-') | |
echo "password={$PASSWORD}" >> $GITHUB_OUTPUT | |
- name: Run Docker Image | |
if: env.imagePresent == 'true' | |
run: | | |
cd .. | |
docker run -p 9200:9200 -d -p 9600:9600 -e "discovery.type=single-node" -e OPENSEARCH_INITIAL_ADMIN_PASSWORD=${{ steps.genpass.outputs.password }} opensearch-ml:test | |
sleep 90 | |
- name: Run MLCommons Test | |
if: env.imagePresent == 'true' | |
run: | | |
security=`curl -XGET https://localhost:9200/_cat/plugins?v -u admin:${{ steps.genpass.outputs.password }} --insecure |grep opensearch-security|wc -l` | |
export OPENAI_KEY=$(aws secretsmanager get-secret-value --secret-id github_openai_key --query SecretString --output text) | |
export COHERE_KEY=$(aws secretsmanager get-secret-value --secret-id github_cohere_key --query SecretString --output text) | |
echo "::add-mask::$OPENAI_KEY" | |
echo "::add-mask::$COHERE_KEY" | |
if [ $security -gt 0 ] | |
then | |
echo "Security plugin is available" | |
./gradlew integTest -Dtests.rest.cluster=localhost:9200 -Dtests.cluster=localhost:9200 -Dtests.clustername="docker-cluster" -Dhttps=true -Duser=admin -Dpassword=${{ steps.genpass.outputs.password }} | |
else | |
echo "Security plugin is NOT available" | |
./gradlew integTest -Dtests.rest.cluster=localhost:9200 -Dtests.cluster=localhost:9200 -Dtests.clustername="docker-cluster" | |
fi | |
- name: Upload Coverage Report | |
uses: codecov/codecov-action@v3 | |
with: | |
flags: ml-commons | |
token: ${{ secrets.CODECOV_TOKEN }} | |
Build-ml-windows: | |
strategy: | |
matrix: | |
java: [11, 17, 21] | |
name: Build and Test MLCommons Plugin on Windows | |
if: github.repository == 'opensearch-project/ml-commons' | |
needs: [Get-Require-Approval] | |
environment: ${{ needs.Get-Require-Approval.outputs.is-require-approval }} | |
runs-on: windows-latest | |
steps: | |
- name: Setup Java ${{ matrix.java }} | |
uses: actions/setup-java@v1 | |
with: | |
java-version: ${{ matrix.java }} | |
- uses: aws-actions/configure-aws-credentials@v2 | |
with: | |
role-to-assume: ${{ secrets.ML_ROLE }} | |
aws-region: us-west-2 | |
# ml-commons | |
- name: Checkout MLCommons | |
uses: actions/checkout@v3 | |
with: | |
ref: ${{ github.event.pull_request.head.sha }} | |
- name: Build and Run Tests | |
shell: bash | |
run: | | |
export OPENAI_KEY=$(aws secretsmanager get-secret-value --secret-id github_openai_key --query SecretString --output text) | |
export COHERE_KEY=$(aws secretsmanager get-secret-value --secret-id github_cohere_key --query SecretString --output text) | |
echo "::add-mask::$OPENAI_KEY" | |
echo "::add-mask::$COHERE_KEY" | |
./gradlew.bat build | |
- name: Publish to Maven Local | |
run: | | |
./gradlew publishToMavenLocal | |
# - name: Multi Nodes Integration Testing | |
# shell: bash | |
# run: | | |
# export OPENAI_KEY=$(aws secretsmanager get-secret-value --secret-id github_openai_key --query SecretString --output text) | |
# export COHERE_KEY=$(aws secretsmanager get-secret-value --secret-id github_cohere_key --query SecretString --output text) | |
# echo "::add-mask::$OPENAI_KEY" | |
# echo "::add-mask::$COHERE_KEY" | |
# ./gradlew integTest -PnumNodes=3 |