Skip to content

Commit

Permalink
handle empty agent config in Go client and fix build failures (#1338)
Browse files Browse the repository at this point in the history
* handle empty agent config in Go client

* handle empty agent config in Go client

* add retrycount on task failure for build

* add backup trivy db and retry

* use cached buildx image

* add env variables for trivy backup dbs

* add env variables for trivy backup dbs

* implement trivy db and javadb fallback mechanism

* fix windows image flaky build failures

* fix windows image flaky build failures

* fix windows image flaky build failures

* fix windows image flaky build failures

* fix windows image flaky build failures

* revert changes
  • Loading branch information
ganga1980 authored Dec 16, 2024
1 parent 2b37fbb commit f756bad
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 19 deletions.
74 changes: 64 additions & 10 deletions .pipelines/azure_pipeline_mergedbranches.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ jobs:
cd $(Build.SourcesDirectory)/deployment/arc-k8s-extension/ServiceGroupRoot/Scripts
tar -czvf ../artifacts.tar.gz ../../../../charts/azuremonitor-containers/ pushChartToAcr.sh
cd $(Build.SourcesDirectory)/deployment/arc-k8s-extension-release-v2/ServiceGroupRoot/Scripts
tar -czvf ../artifacts.tar.gz arcExtensionRelease.sh
Expand All @@ -95,7 +95,7 @@ jobs:
- task: CredScan@3
displayName: "SDL : Run credscan"

- task: CopyFiles@2
displayName: "Copy ev2 deployment artifacts"
inputs:
Expand Down Expand Up @@ -140,7 +140,7 @@ jobs:
inputs:
pathToPublish: '$(Build.ArtifactStagingDirectory)'
artifactName: drop

- task: Armory@2
displayName: 'Run ARMory'
inputs:
Expand Down Expand Up @@ -175,6 +175,7 @@ jobs:
sudo apt-get update && sudo apt-get -y install qemu binfmt-support qemu-user-static
docker system prune --all -f
docker images -q --filter "dangling=true" | xargs docker rmi
docker run --rm --privileged multiarch/qemu-user-static --reset -p yes
docker buildx create --name testbuilder
Expand All @@ -185,6 +186,11 @@ jobs:
az account set -s ${{ variables.subscription }}
az acr login -n ${{ variables.containerRegistry }}
# NOTE: Using the prometheus-collector team's cached buildx image since moby/buildkit:buildx-stable-1 getting throttled
docker pull mcr.microsoft.com/azuremonitor/containerinsights/cidev/prometheus-collector/images:buildx-stable-1
docker buildx create --name dockerbuilder --driver docker-container --driver-opt image=mcr.microsoft.com/azuremonitor/containerinsights/cidev/prometheus-collector/images:buildx-stable-1 --use
docker buildx inspect --bootstrap
if [ "$(Build.Reason)" != "PullRequest" ]; then
docker buildx build --platform $(BUILD_PLATFORMS) --tag ${{ variables.repoImageName }}:$(linuxImagetag) -f kubernetes/linux/Dockerfile.multiarch --metadata-file $(Build.ArtifactStagingDirectory)/linux/metadata.json --build-arg IMAGE_TAG=$(linuxTelemetryTag) --build-arg GOLANG_BASE_IMAGE=$(GOLANG_BASE_IMAGE) --build-arg CI_BASE_IMAGE=$(CI_BASE_IMAGE) --push --provenance=false .
Expand Down Expand Up @@ -274,8 +280,54 @@ jobs:
scriptLocation: inlineScript
inlineScript: |
curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin
PRIMARY_TRIVY_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-db"
SECONDARY_TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db"
PRIMARY_TRIVY_JAVA_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-java-db"
SECONDARY_TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db"
# Set initial repositories to primary
export TRIVY_DB_REPOSITORY=$PRIMARY_TRIVY_DB_REPOSITORY
export TRIVY_JAVA_DB_REPOSITORY=$PRIMARY_TRIVY_JAVA_DB_REPOSITORY
# Function to run Trivy scan and handle output
run_trivy_scan() {
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM "${{ variables.repoImageName }}:$(linuxImagetag)" > trivy_output.log 2>&1
return $?
}
trivy image --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --exit-code 1 ${{ variables.repoImageName }}:$(linuxImagetag)
# Attempt scan up to 5 times with repository fallback
for i in {1..5}; do
echo "Running Trivy scan attempt $i..."
# Run the Trivy scan and capture exit code
run_trivy_scan
TRIVY_EXIT_CODE=$?
# Check if scan was successful
if [ $TRIVY_EXIT_CODE -eq 0 ]; then
echo "Trivy scan succeeded."
cat trivy_output.log
break
fi
# If the first attempt fails, switch to secondary repositories
if [ $i -eq 1 ]; then
echo "Primary repositories failed with an error. Switching to secondary repositories."
export TRIVY_DB_REPOSITORY=$SECONDARY_TRIVY_DB_REPOSITORY
export TRIVY_JAVA_DB_REPOSITORY=$SECONDARY_TRIVY_JAVA_DB_REPOSITORY
fi
# Log and wait before retrying if an error occurred
echo "Error: Trivy scan attempt $i failed. Retrying... ($i/5)"
cat trivy_output.log
sleep 5 # Wait 5 seconds before retrying
done
# Final check: if still failing after 5 attempts, exit with error
if [ $TRIVY_EXIT_CODE -ne 0 ]; then
echo "Error: Trivy scan failed after 5 retries."
exit 1
fi
# Find in cache or download a specific version of Go and add it to the PATH.
- task: GoTool@0
Expand All @@ -287,7 +339,7 @@ jobs:
ls
make
displayName: 'Execute Makefile for Linux Build'
- task: CodeQL3000Finalize@0
condition: eq(variables.IS_MAIN_BRANCH, true)

Expand Down Expand Up @@ -355,6 +407,7 @@ jobs:
azureSubscription: ${{ variables.armServiceConnectionName }}
scriptType: ps
scriptLocation: inlineScript
retryCountOnTaskFailure: 2
inlineScript: |
mkdir -p $(Build.ArtifactStagingDirectory)/windows
cd kubernetes/windows
Expand Down Expand Up @@ -398,7 +451,7 @@ jobs:
echo "Extract fluent-bit"
docker cp signingContainer:C:\opt\fluent-bit .
echo "Extract Ruby"
docker cp signingContainer:C:\ruby31 .
Expand Down Expand Up @@ -525,7 +578,7 @@ jobs:
targetType: 'inline'
script: |
docker create --name pushContainer ${{ variables.repoImageName }}:$(windowsImageTag)-$(windows2019BaseImageVersion)-unsigned
echo "Copy Signed binaries/folders back to docker image..."
docker cp $(Build.ArtifactStagingDirectory)/fpSigning/CertificateGenerator.exe pushContainer:C:\opt\amalogswindows\certgenerator\CertificateGenerator.exe
docker cp $(Build.ArtifactStagingDirectory)/fpSigning/CertificateGenerator.dll pushContainer:C:\opt\amalogswindows\certgenerator\CertificateGenerator.dll
Expand Down Expand Up @@ -588,6 +641,7 @@ jobs:
azureSubscription: ${{ variables.armServiceConnectionName }}
scriptType: ps
scriptLocation: inlineScript
retryCountOnTaskFailure: 2
inlineScript: |
mkdir -p $(Build.ArtifactStagingDirectory)/windows
cd kubernetes/windows
Expand Down Expand Up @@ -631,7 +685,7 @@ jobs:
echo "Extract fluent-bit"
docker cp signingContainer:C:\opt\fluent-bit .
echo "Extract Ruby"
docker cp signingContainer:C:\ruby31 .
Expand Down Expand Up @@ -758,7 +812,7 @@ jobs:
targetType: 'inline'
script: |
docker create --name pushContainer ${{ variables.repoImageName }}:$(windowsImageTag)-$(windows2022BaseImageVersion)-unsigned
echo "Copy Signed binaries/folders back to docker image..."
docker cp $(Build.ArtifactStagingDirectory)/fpSigning/CertificateGenerator.exe pushContainer:C:\opt\amalogswindows\certgenerator\CertificateGenerator.exe
docker cp $(Build.ArtifactStagingDirectory)/fpSigning/CertificateGenerator.dll pushContainer:C:\opt\amalogswindows\certgenerator\CertificateGenerator.dll
Expand Down Expand Up @@ -907,7 +961,7 @@ jobs:
inputs:
pathToPublish: '$(Build.ArtifactStagingDirectory)'
artifactName: drop

- task: AntiMalware@4
displayName: 'Run MpCmdRun.exe'
inputs:
Expand Down
23 changes: 23 additions & 0 deletions kubernetes/linux/Dockerfile.multiarch
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,29 @@ RUN ln -s /lib/libnssckbi.so /lib/p11-kit-trust.so
FROM distroless_image AS vulnscan
COPY .trivyignore .trivyignore
RUN ["/bin/bash", "-c", "curl -sfL https://raw.githubusercontent.com/aquasecurity/trivy/main/contrib/install.sh | sh -s -- -b /usr/local/bin v0.39.0"]

# Set up primary and secondary repository URLs
ENV PRIMARY_TRIVY_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-db"
ENV SECONDARY_TRIVY_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-db"
ENV PRIMARY_TRIVY_JAVA_DB_REPOSITORY="ghcr.io/aquasecurity/trivy-java-db"
ENV SECONDARY_TRIVY_JAVA_DB_REPOSITORY="public.ecr.aws/aquasecurity/trivy-java-db"

# Download Trivy main database with a fallback mechanism
RUN export TRIVY_DB_REPOSITORY=$PRIMARY_TRIVY_DB_REPOSITORY && \
trivy image --download-db-only || \
(echo "Primary TRIVY_DB_REPOSITORY failed, trying secondary." && \
export TRIVY_DB_REPOSITORY=$SECONDARY_TRIVY_DB_REPOSITORY && \
trivy image --download-db-only) || \
(echo "Both TRIVY_DB_REPOSITORY sources failed." && exit 1)

# Download Trivy Java database with a fallback mechanism
RUN export TRIVY_JAVA_DB_REPOSITORY=$PRIMARY_TRIVY_JAVA_DB_REPOSITORY && \
trivy fs --scanners vuln --vuln-type library --download-java-db-only || \
(echo "Primary TRIVY_JAVA_DB_REPOSITORY failed, trying secondary." && \
export TRIVY_JAVA_DB_REPOSITORY=$SECONDARY_TRIVY_JAVA_DB_REPOSITORY && \
trivy fs --scanners vuln --vuln-type library --download-java-db-only) || \
(echo "Both TRIVY_JAVA_DB_REPOSITORY sources failed." && exit 1)

RUN ["/bin/bash", "-c", "trivy rootfs --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --skip-files \"/usr/local/bin/trivy\" /"]
RUN ["/bin/bash", "-c", "trivy rootfs --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM /usr/lib"]
RUN ["/bin/bash", "-c", "trivy rootfs --exit-code 1 --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM --skip-files \"/usr/local/bin/trivy\" / > /dev/null 2>&1 && trivy rootfs --exit-code 1 --ignore-unfixed --no-progress --severity HIGH,CRITICAL,MEDIUM /usr/lib > /dev/null 2>&1"]
Expand Down
36 changes: 27 additions & 9 deletions source/plugins/go/src/ingestion_token_utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -444,17 +444,35 @@ func getAgentConfiguration(imdsAccessToken string) (configurationId string, chan
return configurationId, channelId, err
}

if len(agentConfiguration.Configurations[0].Content.Channels) == 0 {
message := "getAgentConfiguration: Received empty agentConfiguration.Configurations[0].Content.Channels"
Log(message)
SendException(message)
return configurationId, channelId, err
for _, config := range agentConfiguration.Configurations {
if len(config.Content.Channels) == 0 {
// this is expected because AMCS will return agent config based on OS Type. For example, syslog is not supported on windows hence config will not have channels and data sources
message := "getAgentConfiguration: Received empty config.Content.Channels"
Log(message)
continue
}

configurationId = config.Configurationid
for _, channel := range config.Content.Channels {
if channel.ID != "" {
channelId = channel.ID
break
}
}

if !ContainerLogV2ConfigMap && len(config.Content.Extensionconfigurations.Containerinsights) > 0 {
for _, ciExtensionInstance := range config.Content.Extensionconfigurations.Containerinsights {
ContainerLogSchemaV2 = ciExtensionInstance.Extensionsettings.DataCollectionSettings.EnableContainerLogV2
}
}
break
}

configurationId = agentConfiguration.Configurations[0].Configurationid
channelId = agentConfiguration.Configurations[0].Content.Channels[0].ID
if !ContainerLogV2ConfigMap && len(agentConfiguration.Configurations[0].Content.Extensionconfigurations.Containerinsights) > 0 {
ContainerLogSchemaV2 = agentConfiguration.Configurations[0].Content.Extensionconfigurations.Containerinsights[0].Extensionsettings.DataCollectionSettings.EnableContainerLogV2
if configurationId == "" || channelId == "" {
message := "getAgentConfiguration: Failed to obtain configurationId or channelId"
Log(message)
SendException(message)
return configurationId, channelId, errors.New(message)
}
Log("getAgentConfiguration: obtained configurationId: %s, channelId: %s", configurationId, channelId)
Log("Info getAgentConfiguration: end")
Expand Down

0 comments on commit f756bad

Please sign in to comment.