From 672f91c3f9997a0fd666317b658ecd7364d8a921 Mon Sep 17 00:00:00 2001 From: Jay Clifford <45856600+Jayclifford345@users.noreply.github.com> Date: Wed, 8 Jan 2025 13:18:41 +0000 Subject: [PATCH 1/9] docs: Cloud Deployment Azure (#15397) **What this PR does / why we need it**: Continuation of the cloud deployment guides. This teaches the user how to deploy Loki on Azure using the helm. Including: * Azure Setup * Helm values config * Testing --- docs/sources/setup/install/helm/_index.md | 10 + .../install/helm/deployment-guides/_index.md | 3 +- .../install/helm/deployment-guides/aws.md | 3 - .../install/helm/deployment-guides/azure.md | 590 ++++++++++++++++++ .../helm/install-microservices/_index.md | 3 +- 5 files changed, 604 insertions(+), 5 deletions(-) create mode 100644 docs/sources/setup/install/helm/deployment-guides/azure.md diff --git a/docs/sources/setup/install/helm/_index.md b/docs/sources/setup/install/helm/_index.md index 5392838e2ca74..05def151c5ca7 100644 --- a/docs/sources/setup/install/helm/_index.md +++ b/docs/sources/setup/install/helm/_index.md @@ -22,12 +22,22 @@ This guide references the Loki Helm chart version 3.0 or greater and contains th If you are installing Grafana Enterprise Logs, follow the [GEL Helm installation](https://grafana.com/docs/enterprise-logs//setup/helm/). +## Deployment Recommendations + +Loki is designed to be run in two states: +* [Monolithic](https://grafana.com/docs/loki//get-started/deployment-modes/#monolithic-mode): Recommended when you are running Loki as part of a small meta monitoring stack. +* [Microservices](https://grafana.com/docs/loki//get-started/deployment-modes/#microservices-mode): For workloads that require high availability and scalability. Loki is deployed in this mode internally at Grafana Labs. + +{{< admonition type="tip" >}} +Loki can also be deployed in [Simple Scalable mode](https://grafana.com/docs/loki//get-started/deployment-modes/#simple-scalable). For the best possible experience in production, we recommend deploying Loki in *microservices* mode. +{{< /admonition >}} ## Cloud Deployment Guides The following guides provide step-by-step instructions for deploying Loki on cloud providers: - [Amazon EKS](https://grafana.com/docs/loki//setup/install/helm/deployment-guides/aws/) +- [Azure AKS](https://grafana.com/docs/loki//setup/install/helm/deployment-guides/azure/) ## Reference diff --git a/docs/sources/setup/install/helm/deployment-guides/_index.md b/docs/sources/setup/install/helm/deployment-guides/_index.md index 4ff7d5dcaa983..3e408257f9afe 100644 --- a/docs/sources/setup/install/helm/deployment-guides/_index.md +++ b/docs/sources/setup/install/helm/deployment-guides/_index.md @@ -10,4 +10,5 @@ keywords: The following guides provide step-by-step instructions for deploying Loki on cloud providers: -- [Deploy Loki on AWS](https://grafana.com/docs/loki//setup/install/helm/deployment-guides/aws/) \ No newline at end of file +- [Deploy Loki on AWS](https://grafana.com/docs/loki//setup/install/helm/deployment-guides/aws/) +- [Deploy Loki on Azure](https://grafana.com/docs/loki//setup/install/helm/deployment-guides/azure/) \ No newline at end of file diff --git a/docs/sources/setup/install/helm/deployment-guides/aws.md b/docs/sources/setup/install/helm/deployment-guides/aws.md index 82123318f8bb4..29ead8dfbfcd4 100644 --- a/docs/sources/setup/install/helm/deployment-guides/aws.md +++ b/docs/sources/setup/install/helm/deployment-guides/aws.md @@ -531,9 +531,6 @@ k6 is one of the fastest ways to test your Loki deployment. This will allow you iterations: 10, }; - **It is important to create a namespace called `loki` as our trust policy is set to allow the IAM role to be used by the `loki` service account in the `loki` namespace. This is configurable but make sure to update your service account.** - * "main" function for each VU iteration - */ export default () => { // Push request with 10 streams and uncompressed logs between 800KB and 2MB var res = client.pushParameterized(10, 800 * KB, 2 * MB); diff --git a/docs/sources/setup/install/helm/deployment-guides/azure.md b/docs/sources/setup/install/helm/deployment-guides/azure.md new file mode 100644 index 0000000000000..7f4a2fdd85095 --- /dev/null +++ b/docs/sources/setup/install/helm/deployment-guides/azure.md @@ -0,0 +1,590 @@ +--- +title: Deploy the Loki Helm chart on Azure +menuTitle: Deploy on Azure +description: Installing the Loki Helm chart on Azure. +keywords: +--- + +# Deploy the Loki Helm chart on Azure + +This guide shows how to deploy a minimally viable Loki in **microservice** mode on Azure using the Helm chart. In order to successfully complete this guide, you must have the necessary tools and permissions to deploy resources on Azure, such as: + +- Full access to AKS (Azure Kubernetes Service) +- Full access to Azure Blob Storage +- Sufficient permissions to create federated credentials and roles in Azure AD (Active Directory) + +There are three primary ways to authenticate Loki with Azure: + +- Hard coding a connection string - this is the simplest method but is not recommended for production environments. +- Manged identity +- Federated token + +In this guide, we will use the federated token method to deploy Loki on Azure. This method is more secure than hard coding a connection string and is more suitable for production environments. + +## Considerations + +{{< admonition type="caution" >}} +This guide was accurate at the time it was last updated on **8th of January, 2025**. As cloud providers frequently update their services and offerings, as a best practice, you should refer to the [Azure documentation](https://learn.microsoft.com/en-us/azure/) before creating your storage account and assigning roles. +{{< /admonition >}} + +- **AD Role:** In this tutorial we will create a role in Azure Active Directory (Azure AD) to allow Loki to read and write from Azure Blob Storage. This role will be assigned to the Loki service account. You may want to adjust the permissions based on your requirements. + +- **Authentication:** Grafana Loki comes with a basic authentication layer. The Loki gateway (NGINX) is exposed to the internet using basic authentication in this example. NGINX can also be replaced with other open-source reverse proxies. Refer to [Authentication](https://grafana.com/docs/loki//operations/authentication/) for more information. + +- **Retention:** The retention period is set to 28 days in the `values.yaml` file. You may wish to adjust this based on your requirements. + +- **Costs:** Running Loki on Azure will incur costs. Make sure to monitor your usage and costs to avoid any unexpected bills. In this guide we have used a simple AKS cluster with 3 nodes and `Standard_E2ds_v5` instances. You may wish to adjust the instance types and number of nodes based on your workload. + +## Prerequisites + +- Helm 3 or above. Refer to [Installing Helm](https://helm.sh/docs/intro/install/). This should be installed on your local machine. +- Kubectl installed on your local machine. Refer to [Install and Set Up kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/). +- Azure CLI installed on your local machine. Refer to [Installing the Azure CLI](https://learn.microsoft.com/en-us/cli/azure/install-azure-cli). This is a requirement for following this guide as all resources will be created using the Azure CLI. + +### AKS minimum requirements + +Before creating an AKS cluster in Azure you need to create a resource group. You can create a resource group using the Azure CLI: + +```bash +az group create --name -location +``` + +{{< admonition type="caution" >}} +These AKS requirements are the minimum specification needed to deploy Loki using this guide. You may wish to adjust the instance types based on your Azure environment and workload. **If you choose to do so, we cannot guarantee that this sample configuration will still meet your needs.** + +In this guide, we deploy Loki using `Standard_E2ds_v5` instances. This is to make sure we remain within the free tier limits for Azure. Which allows us to deploy up to 10 vCPUs within a region. We recommend for large production workloads to scale these nodes up to `Standard_D4_v5`. +{{< /admonition >}} + +The minimum requirements for deploying Loki on AKS are: + +- Kubernetes version `1.30` or above. +- `3` nodes for the AKS cluster. +- Instance type depends on your workload. A good starting point for a production cluster in the free tier is `Standard_E2ds_v5` instances and for large production workloads `Standard_D4_v5` instances. + +Here is how to create an AKS cluster with the minimum requirements: + +```bash +az aks create \ + --resource-group \ + --name \ + --node-count 3 \ + --node-vm-size Standard_E2ds_v5 \ + --generate-ssh-keys \ + --enable-workload-identity \ + --enable-oidc-issuer +``` + +Note in the above command we have enabled workload identity and OIDC issuer. This is required for the Loki service account to authenticate with Azure AD. If you have already created an AKS cluster, you can enable these features using the following command: + +```bash +az aks update \ + --resource-group \ + --name \ + --enable-workload-identity \ + --enable-oidc-issuer +``` + +The Azure CLI also lets you bind the AKS cluster to kubectl. You can do this by running the following command: + +```bash +az aks get-credentials --resource-group --name +``` + +## Configuring Azure Blob Storage + +{{< admonition type="tip" >}} + Consider using unique bucket names rather than: `chunk`, `ruler`, and `admin`. Although Azure Blog Storage is not directly affected by this [security update](https://grafana.com/blog/2024/06/27/grafana-security-update-grafana-loki-and-unintended-data-write-attempts-to-amazon-s3-buckets/) it is a best practice to use unique container names for buckets. +{{< /admonition >}} + +Before deploying Loki, you need to create two Azure storage containers; one to store logs (chunks), the second to store alert rules. You can create the containers using the Azure CLI. Containers must exist inside a storage account. + +{{< admonition type="note" >}} +GEL customers will require a third container to store the admin data. This container is not required for OSS users. +{{< /admonition >}} + +1. Create a storage account: + + ```bash + az storage account create \ + --name \ + --location \ + --sku Standard_ZRS \ + --encryption-services blob \ + --resource-group + ``` + Replace the placeholders with your desired values. + +1. Create the containers for chunks and ruler: + + ```bash + az storage container create --account-name --name --auth-mode login && \ + az storage container create --account-name --name --auth-mode login + ``` + Make sure `--account-name` matches the account you just created + + +With the storage account and containers created, you can now proceed to creating the Azure AD role and federated credentials. + +## Creating the Azure AD role and federated credentials + +The recommended way to authenticate Loki with Azure Blob Storage is to use federated credentials. This method is more secure than hard coding a connection string directly into the Loki configuration. In this next section, we will create an Azure AD role and federated credentials for Loki to allow it to read and write from Azure Blob Storage: + +1. Locate the OpenID Connect (OIDC) issuer URL: + + ```bash + az aks show \ + --resource-group \ + --name \ + --query "oidcIssuerProfile.issuerUrl" \ + -o tsv + ``` + This command will return the OIDC issuer URL. You will need this URL to create the federated credentials. + +1. Generate a `credentials.json` file with the following content: + ```json + { + "name": "LokiFederatedIdentity", + "issuer": "", + "subject": "system:serviceaccount:loki:loki", + "description": "Federated identity for Loki accessing Azure resources", + "audiences": [ + "api://AzureADTokenExchange" + ] + } + ``` + Replace `` with the OIDC issuer URL you found in the previous step. + +1. Make sure you to save the `credentials.json` file before continuing. + +1. Next generate an Azure directory `app`. We will use this to assign our federated credentials to: + ```bash + az ad app create \ + --display-name loki \ + --query appId \ + -o tsv + ``` + This will return the app ID. Save this for later use. If you need to find the app ID later you can run the following command: + ```bash + az ad app list --display-name loki --query "[].appId" -o tsv + ``` + +1. The app requires a service principal to authenticate with Azure AD. Create a service principal for the app: + + ```bash + az ad sp create --id + ``` + Replace `` with the app ID you generated in the previous step. + +1. Next assign the federated credentials to the app: + + ```bash + az ad app federated-credential create \ + --id \ + --parameters credentials.json + ``` + Replace `` with the app ID you generated in the previous step. + +1. Lastly add a role assignment to the app: + + ```bash + az role assignment create \ + --role "Storage Blob Data Contributor" \ + --assignee \ + --scope /subscriptions//resourceGroups//providers/Microsoft.Storage/storageAccounts/ + ``` + Replace the placeholders with your actual values. + +Now that you have created the Azure AD role and federated credentials, you can proceed to deploying Loki using the Helm chart. + + +## Deploying the Helm chart + +The following steps require the use of `helm` and `kubectl`. Make sure you have run the `az` command to bind your AKS cluster to `kubectl`: + +```bash +az aks get-credentials --resource-group --name +``` + +Before we can deploy the Loki Helm chart, we need to add the Grafana chart repository to Helm. This repository contains the Loki Helm chart. + +1. Add the Grafana chart repository to Helm: + + ```bash + helm repo add grafana https://grafana.github.io/helm-charts + ``` +1. Update the chart repository: + + ```bash + helm repo update + ``` +1. Create a new namespace for Loki: + + ```bash + kubectl create namespace loki + ``` +### Loki basic authentication + +Loki by default does not come with any authentication. Since we will be deploying Loki to Azure and exposing the gateway to the internet, we recommend adding at least basic authentication. In this guide we will give Loki a username and password: + +1. To start we will need create a `.htpasswd` file with the username and password. You can use the `htpasswd` command to create the file: + + {{< admonition type="tip" >}} + If you don't have the `htpasswd` command installed, you can install it using `brew` or `apt-get` or `yum` depending on your OS. + {{< /admonition >}} + + ```bash + htpasswd -c .htpasswd + ``` + This will create a file called `auth` with the username `loki`. You will be prompted to enter a password. + + 1. Create a Kubernetes secret with the `.htpasswd` file: + + ```bash + kubectl create secret generic loki-basic-auth --from-file=.htpasswd -n loki + ``` + + This will create a secret called `loki-basic-auth` in the `loki` namespace. We will reference this secret in the Loki Helm chart configuration. + +1. Create a `canary-basic-auth` secret for the canary: + + ```bash + kubectl create secret generic canary-basic-auth \ + --from-literal=username= \ + --from-literal=password= \ + -n loki + ``` + We create a literal secret with the username and password for Loki canary to authenticate with the Loki gateway. Make sure to replace the placeholders with your desired username and password. + +### Loki Helm chart configuration + +Create a `values.yaml` file choosing the configuration options that best suit your requirements. Below there is an example of `values.yaml` files for the Loki Helm chart in [microservices](https://grafana.com/docs/loki//get-started/deployment-modes/#microservices-mode) mode. + +```yaml +loki: + podLabels: + "azure.workload.identity/use": "true" # Add this label to the Loki pods to enable workload identity + schemaConfig: + configs: + - from: "2024-04-01" + store: tsdb + object_store: azure + schema: v13 + index: + prefix: loki_index_ + period: 24h + storage_config: + azure: + account_name: "" + container_name: "" # Your actual Azure Blob Storage container name (loki-azure-dev-chunks) + use_federated_token: true # Use federated token for authentication + ingester: + chunk_encoding: snappy + pattern_ingester: + enabled: true + limits_config: + allow_structured_metadata: true + volume_enabled: true + retention_period: 672h # 28 days retention + compactor: + retention_enabled: true + delete_request_store: azure + ruler: + enable_api: true + storage: + type: azure + azure: + account_name: + container_name: # Your actual Azure Blob Storage container name (loki-azure-dev-ruler) + use_federated_token: true # Use federated token for authentication + alertmanager_url: http://prom:9093 # The URL of the Alertmanager to send alerts (Prometheus, Mimir, etc.) + + querier: + max_concurrent: 4 + + storage: + type: azure + bucketNames: + chunks: "" # Your actual Azure Blob Storage container name (loki-azure-dev-chunks) + ruler: "" # Your actual Azure Blob Storage container name (loki-azure-dev-ruler) + # admin: "admin-loki-devrel" # Your actual Azure Blob Storage container name (loki-azure-dev-admin) + azure: + accountName: + useFederatedToken: true # Use federated token for authentication + +# Define the Azure workload identity +serviceAccount: + name: loki + annotations: + "azure.workload.identity/client-id": "" # The app ID of the Azure AD app + labels: + "azure.workload.identity/use": "true" + +deploymentMode: Distributed + +ingester: + replicas: 3 + zoneAwareReplication: + enabled: false + +querier: + replicas: 3 + maxUnavailable: 2 + +queryFrontend: + replicas: 2 + maxUnavailable: 1 + +queryScheduler: + replicas: 2 + +distributor: + replicas: 3 + maxUnavailable: 2 +compactor: + replicas: 1 + +indexGateway: + replicas: 2 + maxUnavailable: 1 + +ruler: + replicas: 1 + maxUnavailable: 1 + + +# This exposes the Loki gateway so it can be written to and queried externaly +gateway: + service: + type: LoadBalancer + basicAuth: + enabled: true + existingSecret: loki-basic-auth + +# Since we are using basic auth, we need to pass the username and password to the canary +lokiCanary: + extraArgs: + - -pass=$(LOKI_PASS) + - -user=$(LOKI_USER) + extraEnv: + - name: LOKI_PASS + valueFrom: + secretKeyRef: + name: canary-basic-auth + key: password + - name: LOKI_USER + valueFrom: + secretKeyRef: + name: canary-basic-auth + key: username + +# Enable minio for storage +minio: + enabled: false + +backend: + replicas: 0 +read: + replicas: 0 +write: + replicas: 0 + +singleBinary: + replicas: 0 +``` + +{{< admonition type="caution" >}} +Make sure to replace the placeholders with your actual values. +{{< /admonition >}} + +It is critical to define a valid `values.yaml` file for the Loki deployment. To remove the risk of misconfiguration, let's break down the configuration options to keep in mind when deploying to Azure: + +- **Loki Config vs. Values Config:** + - The `values.yaml` file contains a section called `loki`, which contains a direct representation of the Loki configuration file. + - This section defines the Loki configuration, including the schema, storage, and querier configuration. + - The key configuration to focus on for chunks is the `storage_config` section, where you define the Azure container name and storage account. This tells Loki where to store the chunks. + - The `ruler` section defines the configuration for the ruler, including the Azure container name and storage account. This tells Loki where to store the alert and recording rules. + - For the full Loki configuration, refer to the [Loki Configuration](https://grafana.com/docs/loki//configure/) documentation. + +- **Storage:** + - Defines where the Helm chart stores data. + - Set the type to `azure` since we are using Azure Blob Storage. + - Configure the container names for the chunks and ruler to match the containers created earlier. + - The `azure` section specifies the storage account name and also sets `useFederatedToken` to `true`. This tells Loki to use federated credentials for authentication. + +- **Service Account:** + - The `serviceAccount` section is used to define the federated workload identity Loki will use to authenticate with Azure AD. + - We set the `azure.workload.identity/client-id` annotation to the app ID of the Azure AD app. + +- **Gateway:** + - Defines how the Loki gateway will be exposed. + - We are using a `LoadBalancer` service type in this configuration. + + +### Deploy Loki + +Now that you have created the `values.yaml` file, you can deploy Loki using the Helm chart. + +1. Deploy using the newly created `values.yaml` file: + + ```bash + helm install --values values.yaml loki grafana/loki -n loki --create-namespace + ``` + It is important to create a namespace called `loki` as our federated credentials were generated with the value `system:serviceaccount:loki:loki`. This translates to the `loki` service account in the `loki` namespace. This is configurable but make sure to update the federated credentials file first. + +1. Verify the deployment: + + ```bash + kubectl get pods -n loki + ``` + You should see the Loki pods running. + + ```console + NAME READY STATUS RESTARTS AGE + loki-canary-crqpg 1/1 Running 0 10m + loki-canary-hm26p 1/1 Running 0 10m + loki-canary-v9wv9 1/1 Running 0 10m + loki-chunks-cache-0 2/2 Running 0 10m + loki-compactor-0 1/1 Running 0 10m + loki-distributor-78ccdcc9b4-9wlhl 1/1 Running 0 10m + loki-distributor-78ccdcc9b4-km6j2 1/1 Running 0 10m + loki-distributor-78ccdcc9b4-ptwrb 1/1 Running 0 10m + loki-gateway-5f97f78755-hm6mx 1/1 Running 0 10m + loki-index-gateway-0 1/1 Running 0 10m + loki-index-gateway-1 1/1 Running 0 10m + loki-ingester-zone-a-0 1/1 Running 0 10m + loki-ingester-zone-b-0 1/1 Running 0 10m + loki-ingester-zone-c-0 1/1 Running 0 10m + loki-querier-89d4ff448-4vr9b 1/1 Running 0 10m + loki-querier-89d4ff448-7nvrf 1/1 Running 0 10m + loki-querier-89d4ff448-q89kh 1/1 Running 0 10m + loki-query-frontend-678899db5-n5wc4 1/1 Running 0 10m + loki-query-frontend-678899db5-tf69b 1/1 Running 0 10m + loki-query-scheduler-7d666bf759-9xqb5 1/1 Running 0 10m + loki-query-scheduler-7d666bf759-kpb5q 1/1 Running 0 10m + loki-results-cache-0 2/2 Running 0 10m + loki-ruler-0 1/1 Running 0 10m + ``` + +### Find the Loki gateway service + +The Loki gateway service is a load balancer service that exposes the Loki gateway to the internet. This is where you will write logs to and query logs from. By default NGINX is used as the gateway. + +{{< admonition type="caution" >}} +The Loki gateway service is exposed to the internet. We provide basic authentication using a username and password in this tutorial. Refer to the [Authentication](https://grafana.com/docs/loki//operations/authentication/) documentation for more information. +{{< /admonition >}} + +To find the Loki gateway service, run the following command: + +```bash +kubectl get svc -n loki +``` +You should see the Loki gateway service with an external IP address. This is the address you will use to write to and query Loki. + +```console + NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +loki-gateway LoadBalancer 10.100.201.74 134.236.21.145 80:30707/TCP 46m +``` + +Congratulations! You have successfully deployed Loki on Azure using the Helm chart. Before we finish, let's test the deployment. + +## Testing Your Loki Deployment + +k6 is one of the fastest ways to test your Loki deployment. This will allow you to both write and query logs to Loki. To get started with k6, follow the steps below: + +1. Install k6 with the Loki extension on your local machine. Refer to [Installing k6 and the xk6-loki extension](https://grafana.com/docs/loki//send-data/k6/). + +2. Create a `azure-test.js` file with the following content: + + ```javascript + import {sleep, check} from 'k6'; + import loki from 'k6/x/loki'; + + /** + * URL used for push and query requests + * Path is automatically appended by the client + * @constant {string} + */ + + const username = ''; + const password = ''; + const external_ip = ''; + + const credentials = `${username}:${password}`; + + const BASE_URL = `http://${credentials}@${external_ip}`; + + /** + * Helper constant for byte values + * @constant {number} + */ + const KB = 1024; + + /** + * Helper constant for byte values + * @constant {number} + */ + const MB = KB * KB; + + /** + * Instantiate config and Loki client + */ + + const conf = new loki.Config(BASE_URL); + const client = new loki.Client(conf); + + /** + * Define test scenario + */ + export const options = { + vus: 10, + iterations: 10, + }; + + export default () => { + // Push request with 10 streams and uncompressed logs between 800KB and 2MB + var res = client.pushParameterized(10, 800 * KB, 2 * MB); + // Check for successful write + check(res, { 'successful write': (res) => res.status == 204 }); + + // Pick a random log format from label pool + let format = randomChoice(conf.labels["format"]); + + // Execute instant query with limit 1 + res = client.instantQuery(`count_over_time({format="${format}"}[1m])`, 1) + // Check for successful read + check(res, { 'successful instant query': (res) => res.status == 200 }); + + // Execute range query over last 5m and limit 1000 + res = client.rangeQuery(`{format="${format}"}`, "5m", 1000) + // Check for successful read + check(res, { 'successful range query': (res) => res.status == 200 }); + + // Wait before next iteration + sleep(1); + } + + /** + * Helper function to get random item from array + */ + function randomChoice(items) { + return items[Math.floor(Math.random() * items.length)]; + } + ``` + + Replace `` with the external IP address of the Loki Gateway service. + + This script will write logs to Loki and query logs from Loki. It will write logs in a random format between 800KB and 2MB and query logs in a random format over the last 5 minutes. + +3. Run the test: + + ```bash + ./k6 run azure-test.js + ``` + + This will run the test and output the results. You should see the test writing logs to Loki and querying logs from Loki. + +Now that you have successfully deployed Loki in microservices mode on Microsoft Azure, you may wish to explore the following: + +- [Sending data to Loki](https://grafana.com/docs/loki//query/) +- [Manage Loki](https://grafana.com/docs/loki/}} We recommend running Loki at scale within a cloud environment like AWS, Azure, or GCP. The below guides will show you how to deploy a minimally viable production environment. -- [Deploy Loki on AWS]({{< relref "../deployment-guides/aws" >}}) +- [Deploy Loki on AWS](https://grafana.com/docs/loki//setup/install/helm/deployment-guides/aws/) +- [Deploy Loki on Azure](https://grafana.com/docs/loki//setup/install/helm/deployment-guides/azure/) ## Next Steps * Configure an agent to [send log data to Loki](/docs/loki//send-data/). From f2fc0c26c2d5b8986eadc7ed07b45387f50cd2c3 Mon Sep 17 00:00:00 2001 From: Robert Fratto Date: Wed, 8 Jan 2025 09:27:56 -0500 Subject: [PATCH 2/9] chore(dataobj): column building (#15634) This commit adds the ability to accumulate sequences of dataset.Value into a column, which is split up across multiple pages. Each page is broken down into two parts: * A bitmap-encoded sequence of booleans, where 1 indicates a row has a value and 0 indicates the row is NULL, and * the encoded sequence of non-NULL values, whose encoding is determined by the column options. The sequence of non-NULL values is then optionally compressed. This commit also includes initial support for reading these columns, starting with internal-only helper utilities for unit tests. --- pkg/dataobj/internal/dataset/column.go | 25 + .../internal/dataset/column_builder.go | 173 ++++++ pkg/dataobj/internal/dataset/column_iter.go | 20 + pkg/dataobj/internal/dataset/column_test.go | 61 +++ pkg/dataobj/internal/dataset/page.go | 99 ++++ pkg/dataobj/internal/dataset/page_builder.go | 240 ++++++++ .../internal/dataset/page_compress_writer.go | 123 +++++ pkg/dataobj/internal/dataset/page_iter.go | 53 ++ pkg/dataobj/internal/dataset/page_test.go | 83 +++ .../metadata/datasetmd/datasetmd.pb.go | 518 +++++++++++++++++- .../metadata/datasetmd/datasetmd.proto | 26 + pkg/dataobj/internal/result/result.go | 101 ++++ pkg/dataobj/internal/streamio/varint.go | 18 + 13 files changed, 1519 insertions(+), 21 deletions(-) create mode 100644 pkg/dataobj/internal/dataset/column.go create mode 100644 pkg/dataobj/internal/dataset/column_builder.go create mode 100644 pkg/dataobj/internal/dataset/column_iter.go create mode 100644 pkg/dataobj/internal/dataset/column_test.go create mode 100644 pkg/dataobj/internal/dataset/page.go create mode 100644 pkg/dataobj/internal/dataset/page_builder.go create mode 100644 pkg/dataobj/internal/dataset/page_compress_writer.go create mode 100644 pkg/dataobj/internal/dataset/page_iter.go create mode 100644 pkg/dataobj/internal/dataset/page_test.go create mode 100644 pkg/dataobj/internal/result/result.go diff --git a/pkg/dataobj/internal/dataset/column.go b/pkg/dataobj/internal/dataset/column.go new file mode 100644 index 0000000000000..73c63c3512984 --- /dev/null +++ b/pkg/dataobj/internal/dataset/column.go @@ -0,0 +1,25 @@ +package dataset + +import "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" + +// Helper types. +type ( + // ColumnInfo describes a column. + ColumnInfo struct { + Name string // Name of the column, if any. + Type datasetmd.ValueType // Type of values in the column. + Compression datasetmd.CompressionType // Compression used for the column. + + RowsCount int // Total number of rows in the column. + CompressedSize int // Total size of all pages in the column after compression. + UncompressedSize int // Total size of all pages in the column before compression. + + Statistics *datasetmd.Statistics // Optional statistics for the column. + } +) + +// MemColumn holds a set of pages of a common type. +type MemColumn struct { + Info ColumnInfo // Information about the column. + Pages []*MemPage // The set of pages in the column. +} diff --git a/pkg/dataobj/internal/dataset/column_builder.go b/pkg/dataobj/internal/dataset/column_builder.go new file mode 100644 index 0000000000000..83476f6f04c92 --- /dev/null +++ b/pkg/dataobj/internal/dataset/column_builder.go @@ -0,0 +1,173 @@ +package dataset + +import ( + "fmt" + + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" +) + +// BuilderOptions configures common settings for building pages. +type BuilderOptions struct { + // PageSizeHint is the soft limit for the size of the page. Builders try to + // fill pages as close to this size as possible, but the actual size may be + // slightly larger or smaller. + PageSizeHint int + + // Value is the value type of data to write. + Value datasetmd.ValueType + + // Encoding is the encoding algorithm to use for values. + Encoding datasetmd.EncodingType + + // Compression is the compression algorithm to use for values. + Compression datasetmd.CompressionType +} + +// A ColumnBuilder builds a sequence of [Value] entries of a common type into a +// column. Values are accumulated into a buffer and then flushed into +// [MemPage]s once the size of data exceeds a configurable limit. +type ColumnBuilder struct { + name string + opts BuilderOptions + + rows int // Total number of rows in the column. + + pages []*MemPage + builder *pageBuilder +} + +// NewColumnBuilder creates a new ColumnBuilder from the optional name and +// provided options. NewColumnBuilder returns an error if the options are +// invalid. +func NewColumnBuilder(name string, opts BuilderOptions) (*ColumnBuilder, error) { + builder, err := newPageBuilder(opts) + if err != nil { + return nil, fmt.Errorf("creating page builder: %w", err) + } + + return &ColumnBuilder{ + name: name, + opts: opts, + + builder: builder, + }, nil +} + +// Append adds a new value into cb with the given zero-indexed row number. If +// the row number is higher than the current number of rows in cb, null values +// are added up to the new row. +// +// Append returns an error if the row number is out-of-order. +func (cb *ColumnBuilder) Append(row int, value Value) error { + if row < cb.rows { + return fmt.Errorf("row %d is older than current row %d", row, cb.rows) + } + + // We give two attempts to append the data to the buffer; if the buffer is + // full, we cut a page and then append to the newly reset buffer. + // + // The second iteration should never fail, as the buffer will always be empty + // then. + for range 2 { + if cb.append(row, value) { + cb.rows = row + 1 + return nil + } + + cb.flushPage() + } + + panic("ColumnBuilder.Append: failed to append value to fresh buffer") +} + +// Backfill adds NULLs into cb up to (but not including) the provided row +// number. If values exist up to the provided row number, Backfill does +// nothing. +func (cb *ColumnBuilder) Backfill(row int) { + // We give two attempts to append the data to the buffer; if the buffer is + // full, we cut a page and then append again to the newly reset buffer. + // + // The second iteration should never fail, as the buffer will always be + // empty. + for range 2 { + if cb.backfill(row) { + return + } + cb.flushPage() + } + + panic("ColumnBuilder.Backfill: failed to backfill buffer") +} + +func (cb *ColumnBuilder) backfill(row int) bool { + for row > cb.rows { + if !cb.builder.AppendNull() { + return false + } + cb.rows++ + } + + return true +} + +func (cb *ColumnBuilder) append(row int, value Value) bool { + // Backfill up to row. + if !cb.backfill(row) { + return false + } + return cb.builder.Append(value) +} + +// Flush converts data in cb into a [MemColumn]. Afterwards, cb is reset to a +// fresh state and can be reused. +func (cb *ColumnBuilder) Flush() (*MemColumn, error) { + cb.flushPage() + + info := ColumnInfo{ + Name: cb.name, + Type: cb.opts.Value, + + Compression: cb.opts.Compression, + } + + // TODO(rfratto): Should we compute column-wide statistics if they're + // available in pages? + // + // That would potentially work for min/max values, but not for count + // distinct, unless we had a way to pass sketches around. + + for _, page := range cb.pages { + info.RowsCount += page.Info.RowCount + info.CompressedSize += page.Info.CompressedSize + info.UncompressedSize += page.Info.UncompressedSize + } + + column := &MemColumn{ + Info: info, + Pages: cb.pages, + } + + cb.Reset() + return column, nil +} + +func (cb *ColumnBuilder) flushPage() { + if cb.builder.Rows() == 0 { + return + } + + page, err := cb.builder.Flush() + if err != nil { + // Flush should only return an error when it's empty, which we already + // ensure it's not in the lines above. + panic(fmt.Sprintf("failed to flush page: %s", err)) + } + cb.pages = append(cb.pages, page) +} + +// Reset clears all data in cb and resets it to a fresh state. +func (cb *ColumnBuilder) Reset() { + cb.rows = 0 + cb.pages = nil + cb.builder.Reset() +} diff --git a/pkg/dataobj/internal/dataset/column_iter.go b/pkg/dataobj/internal/dataset/column_iter.go new file mode 100644 index 0000000000000..53b83f159575a --- /dev/null +++ b/pkg/dataobj/internal/dataset/column_iter.go @@ -0,0 +1,20 @@ +package dataset + +import "github.com/grafana/loki/v3/pkg/dataobj/internal/result" + +func iterMemColumn(col *MemColumn) result.Seq[Value] { + return result.Iter(func(yield func(Value) bool) error { + for _, page := range col.Pages { + for result := range iterMemPage(page, col.Info.Type, col.Info.Compression) { + val, err := result.Value() + if err != nil { + return err + } else if !yield(val) { + return nil + } + } + } + + return nil + }) +} diff --git a/pkg/dataobj/internal/dataset/column_test.go b/pkg/dataobj/internal/dataset/column_test.go new file mode 100644 index 0000000000000..545c9a26e357c --- /dev/null +++ b/pkg/dataobj/internal/dataset/column_test.go @@ -0,0 +1,61 @@ +package dataset + +import ( + "testing" + + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" +) + +func TestColumnBuilder_ReadWrite(t *testing.T) { + in := []string{ + "hello, world!", + "", + "this is a test of the emergency broadcast system", + "this is only a test", + "if this were a real emergency, you would be instructed to panic", + "but it's not, so don't", + "", + "this concludes the test", + "thank you for your cooperation", + "goodbye", + } + + opts := BuilderOptions{ + // Set the size to 0 so each column has exactly one value. + PageSizeHint: 0, + Value: datasetmd.VALUE_TYPE_STRING, + Compression: datasetmd.COMPRESSION_TYPE_ZSTD, + Encoding: datasetmd.ENCODING_TYPE_PLAIN, + } + b, err := NewColumnBuilder("", opts) + require.NoError(t, err) + + for i, s := range in { + require.NoError(t, b.Append(i, StringValue(s))) + } + + col, err := b.Flush() + require.NoError(t, err) + require.Equal(t, datasetmd.VALUE_TYPE_STRING, col.Info.Type) + require.Greater(t, len(col.Pages), 1) + + t.Log("Uncompressed size: ", col.Info.UncompressedSize) + t.Log("Compressed size: ", col.Info.CompressedSize) + t.Log("Pages: ", len(col.Pages)) + + var actual []string + for result := range iterMemColumn(col) { + val, err := result.Value() + require.NoError(t, err) + + if val.IsNil() || val.IsZero() { + actual = append(actual, "") + } else { + require.Equal(t, datasetmd.VALUE_TYPE_STRING, val.Type()) + actual = append(actual, val.String()) + } + } + require.Equal(t, in, actual) +} diff --git a/pkg/dataobj/internal/dataset/page.go b/pkg/dataobj/internal/dataset/page.go new file mode 100644 index 0000000000000..31cae969a9c76 --- /dev/null +++ b/pkg/dataobj/internal/dataset/page.go @@ -0,0 +1,99 @@ +package dataset + +import ( + "bytes" + "encoding/binary" + "fmt" + "hash/crc32" + "io" + + "github.com/golang/snappy" + "github.com/klauspost/compress/zstd" + + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" +) + +// Helper types. +type ( + // PageData holds the raw data for a page. Data is formatted as: + // + // + // + // The presence-bitmap is a bitmap-encoded sequence of booleans, where values + // describe which rows are present (1) or nil (0). The presence bitmap is + // always stored uncompressed. + // + // values-data is then the encoded and optionally compressed sequence of + // non-NULL values. + PageData []byte + + // PageInfo describes a page. + PageInfo struct { + UncompressedSize int // UncompressedSize is the size of a page before compression. + CompressedSize int // CompressedSize is the size of a page after compression. + CRC32 uint32 // CRC32 checksum of the page after encoding and compression. + RowCount int // RowCount is the number of rows in the page, including NULLs. + + Encoding datasetmd.EncodingType // Encoding used for values in the page. + Stats *datasetmd.Statistics // Optional statistics for the page. + } +) + +// MemPage holds an encoded (and optionally compressed) sequence of [Value] +// entries of a common type. Use [ColumnBuilder] to construct sets of pages. +type MemPage struct { + Info PageInfo // Information about the page. + Data PageData // Data for the page. +} + +var checksumTable = crc32.MakeTable(crc32.Castagnoli) + +// reader returns a reader for decompressed page data. Reader returns an error +// if the CRC32 fails to validate. +func (p *MemPage) reader(compression datasetmd.CompressionType) (presence io.Reader, values io.ReadCloser, err error) { + if actual := crc32.Checksum(p.Data, checksumTable); p.Info.CRC32 != actual { + return nil, nil, fmt.Errorf("invalid CRC32 checksum %x, expected %x", actual, p.Info.CRC32) + } + + bitmapSize, n := binary.Uvarint(p.Data) + if n <= 0 { + return nil, nil, fmt.Errorf("reading presence bitmap size: %w", err) + } + + var ( + bitmapReader = bytes.NewReader(p.Data[n : n+int(bitmapSize)]) + compressedDataReader = bytes.NewReader(p.Data[n+int(bitmapSize):]) + ) + + switch compression { + case datasetmd.COMPRESSION_TYPE_UNSPECIFIED, datasetmd.COMPRESSION_TYPE_NONE: + return bitmapReader, io.NopCloser(compressedDataReader), nil + + case datasetmd.COMPRESSION_TYPE_SNAPPY: + sr := snappy.NewReader(compressedDataReader) + return bitmapReader, io.NopCloser(sr), nil + + case datasetmd.COMPRESSION_TYPE_ZSTD: + zr, err := zstd.NewReader(compressedDataReader) + if err != nil { + return nil, nil, fmt.Errorf("opening zstd reader: %w", err) + } + return bitmapReader, newZstdReader(zr), nil + } + + panic(fmt.Sprintf("dataset.MemPage.reader: unknown compression type %q", compression.String())) +} + +// zstdReader implements [io.ReadCloser] for a [zstd.Decoder]. +type zstdReader struct{ *zstd.Decoder } + +// newZstdReader returns a new [io.ReadCloser] for a [zstd.Decoder]. +func newZstdReader(dec *zstd.Decoder) io.ReadCloser { + return &zstdReader{Decoder: dec} +} + +// Close implements [io.Closer]. +func (r *zstdReader) Close() error { + r.Decoder.Close() + return nil +} diff --git a/pkg/dataobj/internal/dataset/page_builder.go b/pkg/dataobj/internal/dataset/page_builder.go new file mode 100644 index 0000000000000..34a63a2181fb5 --- /dev/null +++ b/pkg/dataobj/internal/dataset/page_builder.go @@ -0,0 +1,240 @@ +package dataset + +import ( + "bytes" + "encoding/binary" + "fmt" + "hash/crc32" + + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" + "github.com/grafana/loki/v3/pkg/dataobj/internal/streamio" +) + +// pageBuilder accumulates sequences of [Value] in memory until reaching a +// configurable size limit. A [MemPage] can then be created from a PageBuiler +// by calling [pageBuilder.Flush]. +type pageBuilder struct { + // Each pageBuilder writes two sets of data. + // + // The first set of data is a presence bitmap which tells readers which rows + // are present. Use use 1 to indicate presence and 0 to indicate absence + // (NULL). This bitmap always uses bitmap encoding regardless of the encoding + // type used for values. + // + // The second set of data is the encoded set of non-NULL values. As an + // optimization, the zero value is treated as NULL. + // + // The two sets of data are accmumulated into separate buffers, with the + // presence bitmap being written uncompresed and the values being written + // with the configured compression type, if any. + // + // To orchestrate building two sets of data, we have a few components: + // + // * The final buffers which hold encoded and potentially compressed data. + // * The writer performing compression for values. + // * The encoders that write values. + + opts BuilderOptions + + presenceBuffer *bytes.Buffer // presenceBuffer holds the encoded presence bitmap. + valuesBuffer *bytes.Buffer // valuesBuffer holds encoded and optionally compressed values. + + valuesWriter *compressWriter // Compresses data and writes to valuesBuffer. + + presenceEnc *bitmapEncoder + valuesEnc valueEncoder + + rows int // Number of rows appended to the builder. +} + +// newPageBuilder creates a new pageBuilder that stores a sequence of [Value]s. +// newPageBuilder returns an error if there is no encoder available for the +// combination of opts.Value and opts.Encoding. +func newPageBuilder(opts BuilderOptions) (*pageBuilder, error) { + var ( + presenceBuffer = bytes.NewBuffer(nil) + valuesBuffer = bytes.NewBuffer(make([]byte, 0, opts.PageSizeHint)) + + valuesWriter = newCompressWriter(valuesBuffer, opts.Compression) + ) + + presenceEnc := newBitmapEncoder(presenceBuffer) + valuesEnc, ok := newValueEncoder(opts.Value, opts.Encoding, valuesWriter) + if !ok { + return nil, fmt.Errorf("no encoder available for %s/%s", opts.Value, opts.Encoding) + } + + return &pageBuilder{ + opts: opts, + + presenceBuffer: presenceBuffer, + valuesBuffer: valuesBuffer, + + valuesWriter: valuesWriter, + + presenceEnc: presenceEnc, + valuesEnc: valuesEnc, + }, nil +} + +// Append appends value into the pageBuilder. Append returns true if the data +// was appended; false if the pageBuilder is full. +func (b *pageBuilder) Append(value Value) bool { + if value.IsNil() || value.IsZero() { + return b.AppendNull() + } + + // We can't accurately know whether adding value would tip us over the page + // size: we don't know the current state of the encoders and we don't know + // for sure how much space value will fill. + // + // We use a rough estimate which will tend to overshoot the page size, making + // sure we rarely go over. + if sz := b.estimatedSize(); sz > 0 && sz+valueSize(value) > b.opts.PageSizeHint { + return false + } + + // The following calls won't fail; they only return errors when the + // underlying writers fail, which ours cannot. + if err := b.presenceEnc.Encode(Uint64Value(1)); err != nil { + panic(fmt.Sprintf("pageBuilder.Append: encoding presence bitmap entry: %v", err)) + } + if err := b.valuesEnc.Encode(value); err != nil { + panic(fmt.Sprintf("pageBuilder.Append: encoding value: %v", err)) + } + + b.rows++ + return true +} + +// AppendNull appends a NULL value to the Builder. AppendNull returns true if +// the NULL was appended, or false if the Builder is full. +func (b *pageBuilder) AppendNull() bool { + // See comment in Append for why we can only estimate the cost of appending a + // value. + // + // Here we assume appending a NULL costs one byte, but in reality most NULLs + // have no cost depending on the state of our bitmap encoder. + if sz := b.estimatedSize(); sz > 0 && sz+1 > b.opts.PageSizeHint { + return false + } + + // The following call won't fail; it only returns an error when the + // underlying writer fails, which ours cannot. + if err := b.presenceEnc.Encode(Uint64Value(0)); err != nil { + panic(fmt.Sprintf("Builder.AppendNull: encoding presence bitmap entry: %v", err)) + } + + b.rows++ + return true +} + +func valueSize(v Value) int { + switch v.Type() { + case datasetmd.VALUE_TYPE_INT64: + // Assuming that int64s are written as varints. + return streamio.VarintSize(v.Int64()) + + case datasetmd.VALUE_TYPE_UINT64: + // Assuming that uint64s are written as uvarints. + return streamio.UvarintSize(v.Uint64()) + + case datasetmd.VALUE_TYPE_STRING: + // Assuming that strings are PLAIN encoded using their length and bytes. + str := v.String() + return binary.Size(len(str)) + len(str) + } + + return 0 +} + +// estimatedSize returns the estimated uncompressed size of the builder in +// bytes. +func (b *pageBuilder) estimatedSize() int { + // This estimate doesn't account for any values in encoders which haven't + // been flushed yet. However, encoder buffers are usually small enough that + // we wouldn't massively overshoot our estimate. + return b.presenceBuffer.Len() + b.valuesWriter.BytesWritten() +} + +// Rows returns the number of rows appended to the pageBuilder. +func (b *pageBuilder) Rows() int { return b.rows } + +// Flush converts data in pageBuilder into a [MemPage], and returns it. +// Afterwards, pageBuilder is reset to a fresh state and can be reused. Flush +// returns an error if the pageBuilder is empty. +// +// To avoid computing useless stats, the Stats field of the returned Page is +// unset. If stats are needed for a page, callers should compute them by +// iterating over the returned Page. +func (b *pageBuilder) Flush() (*MemPage, error) { + if b.rows == 0 { + return nil, fmt.Errorf("no data to flush") + } + + // Before we can build the page we need to finish flushing our encoders and writers. + if err := b.presenceEnc.Flush(); err != nil { + return nil, fmt.Errorf("flushing presence encoder: %w", err) + } else if err := b.valuesEnc.Flush(); err != nil { + return nil, fmt.Errorf("flushing values encoder: %w", err) + } else if err := b.valuesWriter.Flush(); err != nil { + return nil, fmt.Errorf("flushing values writer: %w", err) + } + + // The final data of our page is the combination of the presence bitmap and + // the values. To denote when one ends and the other begins, we prepend the + // data with the size of the presence bitmap as a uvarint. See the doc + // comment of [PageData] for more information. + var ( + headerSize = streamio.UvarintSize(uint64(b.presenceBuffer.Len())) + presenceSize = b.presenceBuffer.Len() + valuesSize = b.valuesBuffer.Len() + + finalData = bytes.NewBuffer(make([]byte, 0, headerSize+presenceSize+valuesSize)) + ) + + if err := streamio.WriteUvarint(finalData, uint64(b.presenceBuffer.Len())); err != nil { + return nil, fmt.Errorf("writing presence buffer size: %w", err) + } else if _, err := b.presenceBuffer.WriteTo(finalData); err != nil { + return nil, fmt.Errorf("writing presence buffer: %w", err) + } else if _, err := b.valuesBuffer.WriteTo(finalData); err != nil { + return nil, fmt.Errorf("writing values buffer: %w", err) + } + + checksum := crc32.Checksum(finalData.Bytes(), checksumTable) + + page := MemPage{ + Info: PageInfo{ + UncompressedSize: headerSize + presenceSize + b.valuesWriter.BytesWritten(), + CompressedSize: finalData.Len(), + CRC32: checksum, + RowCount: b.rows, + + Encoding: b.opts.Encoding, + + // TODO(rfratto): At the moment we don't compute stats because they're + // not going to be valuable in every scenario: the min/max values for log + // lines is less useful compared to the min/max values for timestamps. + // + // In the future, we may wish to add more options to pageBuilder to tell + // it to compute a subset of stats to avoid needing a second iteration + // over the page to compute them. + Stats: nil, + }, + + Data: finalData.Bytes(), + } + + b.Reset() // Reset state before returning. + return &page, nil +} + +// Reset resets the pageBuilder to a fresh state, allowing it to be reused. +func (b *pageBuilder) Reset() { + b.presenceBuffer.Reset() + b.valuesBuffer.Reset() + b.valuesWriter.Reset(b.valuesBuffer) + b.presenceBuffer.Reset() + b.valuesEnc.Reset(b.valuesWriter) + b.rows = 0 +} diff --git a/pkg/dataobj/internal/dataset/page_compress_writer.go b/pkg/dataobj/internal/dataset/page_compress_writer.go new file mode 100644 index 0000000000000..3fad4a0edfe0f --- /dev/null +++ b/pkg/dataobj/internal/dataset/page_compress_writer.go @@ -0,0 +1,123 @@ +package dataset + +import ( + "bufio" + "fmt" + "io" + + "github.com/golang/snappy" + "github.com/klauspost/compress/zstd" + + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" + "github.com/grafana/loki/v3/pkg/dataobj/internal/streamio" +) + +// A compressWriter is a [streamio.Writer] that compresses data passed to it. +type compressWriter struct { + // To be able to implmeent [io.ByteWriter], we always write directly to buf, + // which then flushes to w once it's full. + + w io.WriteCloser // Compressing writer. + buf *bufio.Writer // Buffered writer in front of w to be able to call WriteByte. + + compression datasetmd.CompressionType // Compression type being used. + rawBytes int // Number of uncompressed bytes written. +} + +var _ streamio.Writer = (*compressWriter)(nil) + +func newCompressWriter(w io.Writer, ty datasetmd.CompressionType) *compressWriter { + c := compressWriter{compression: ty} + c.Reset(w) + return &c +} + +// Write writes p to c. +func (c *compressWriter) Write(p []byte) (n int, err error) { + n, err = c.buf.Write(p) + c.rawBytes += n + return +} + +// WriteByte writes a single byte to c. +func (c *compressWriter) WriteByte(b byte) error { + if err := c.buf.WriteByte(b); err != nil { + return err + } + c.rawBytes++ + return nil +} + +// Flush compresses any pending uncompressed data in the buffer. +func (c *compressWriter) Flush() error { + // Flush our buffer first so c.w is up to date. + if err := c.buf.Flush(); err != nil { + return fmt.Errorf("flushing buffer: %w", err) + } + + // c.w may not support Flush (such as when using no compression), so we check + // first. + if f, ok := c.w.(interface{ Flush() error }); ok { + if err := f.Flush(); err != nil { + return fmt.Errorf("flushing compressing writer: %w", err) + } + } + + return nil +} + +// Reset discards the writer's state and switches the compressor to write to w. +// This permits reusing a compressWriter rather than allocating a new one. +func (c *compressWriter) Reset(w io.Writer) { + resetter, ok := c.w.(interface{ Reset(io.Writer) }) + switch ok { + case true: + resetter.Reset(w) + default: + // c.w is unset or doesn't support Reset; build a new writer. + var compressedWriter io.WriteCloser + + switch c.compression { + case datasetmd.COMPRESSION_TYPE_UNSPECIFIED, datasetmd.COMPRESSION_TYPE_NONE: + compressedWriter = nopCloseWriter{w} + + case datasetmd.COMPRESSION_TYPE_SNAPPY: + compressedWriter = snappy.NewBufferedWriter(w) + + case datasetmd.COMPRESSION_TYPE_ZSTD: + zw, err := zstd.NewWriter(w, zstd.WithEncoderLevel(zstd.SpeedBestCompression)) + if err != nil { + panic(fmt.Sprintf("compressWriter.Reset: creating zstd writer: %v", err)) + } + compressedWriter = zw + + default: + panic(fmt.Sprintf("compressWriter.Reset: unknown compression type %v", c.compression)) + } + + c.w = compressedWriter + } + + if c.buf != nil { + c.buf.Reset(c.w) + } else { + c.buf = bufio.NewWriter(c.w) + } + c.rawBytes = 0 +} + +// BytesWritten returns the number of uncompressed bytes written to c. +func (c *compressWriter) BytesWritten() int { return c.rawBytes } + +// Close flushes and then closes c. +func (c *compressWriter) Close() error { + if err := c.Flush(); err != nil { + return err + } + return c.w.Close() +} + +type nopCloseWriter struct{ w io.Writer } + +func (w nopCloseWriter) Write(p []byte) (n int, err error) { return w.w.Write(p) } +func (w nopCloseWriter) Close() error { return nil } diff --git a/pkg/dataobj/internal/dataset/page_iter.go b/pkg/dataobj/internal/dataset/page_iter.go new file mode 100644 index 0000000000000..442e104135344 --- /dev/null +++ b/pkg/dataobj/internal/dataset/page_iter.go @@ -0,0 +1,53 @@ +package dataset + +import ( + "bufio" + "errors" + "fmt" + "io" + + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" + "github.com/grafana/loki/v3/pkg/dataobj/internal/result" +) + +func iterMemPage(p *MemPage, valueType datasetmd.ValueType, compressionType datasetmd.CompressionType) result.Seq[Value] { + return result.Iter(func(yield func(Value) bool) error { + presenceReader, valuesReader, err := p.reader(compressionType) + if err != nil { + return fmt.Errorf("opening page for reading: %w", err) + } + defer valuesReader.Close() + + presenceDec := newBitmapDecoder(bufio.NewReader(presenceReader)) + valuesDec, ok := newValueDecoder(valueType, p.Info.Encoding, bufio.NewReader(valuesReader)) + if !ok { + return fmt.Errorf("no decoder available for %s/%s", valueType, p.Info.Encoding) + } + + for { + var value Value + + present, err := presenceDec.Decode() + if errors.Is(err, io.EOF) { + return nil + } else if err != nil { + return fmt.Errorf("decoding presence bitmap: %w", err) + } else if present.Type() != datasetmd.VALUE_TYPE_UINT64 { + return fmt.Errorf("unexpected presence type %s", present.Type()) + } + + // value is currently nil. If the presence bitmap says our row has a + // value, we decode it into value. + if present.Uint64() == 1 { + value, err = valuesDec.Decode() + if err != nil { + return fmt.Errorf("decoding value: %w", err) + } + } + + if !yield(value) { + return nil + } + } + }) +} diff --git a/pkg/dataobj/internal/dataset/page_test.go b/pkg/dataobj/internal/dataset/page_test.go new file mode 100644 index 0000000000000..1cbd025576a4d --- /dev/null +++ b/pkg/dataobj/internal/dataset/page_test.go @@ -0,0 +1,83 @@ +package dataset + +import ( + "math/rand" + "testing" + "time" + + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/v3/pkg/dataobj/internal/metadata/datasetmd" +) + +func Test_pageBuilder_WriteRead(t *testing.T) { + in := []string{ + "hello, world!", + "", + "this is a test of the emergency broadcast system", + "this is only a test", + "if this were a real emergency, you would be instructed to panic", + "but it's not, so don't", + "", + "this concludes the test", + "thank you for your cooperation", + "goodbye", + } + + opts := BuilderOptions{ + PageSizeHint: 1024, + Value: datasetmd.VALUE_TYPE_STRING, + Compression: datasetmd.COMPRESSION_TYPE_ZSTD, + Encoding: datasetmd.ENCODING_TYPE_PLAIN, + } + b, err := newPageBuilder(opts) + require.NoError(t, err) + + for _, s := range in { + require.True(t, b.Append(StringValue(s))) + } + + page, err := b.Flush() + require.NoError(t, err) + + t.Log("Uncompressed size: ", page.Info.UncompressedSize) + t.Log("Compressed size: ", page.Info.CompressedSize) + + var actual []string + for result := range iterMemPage(page, opts.Value, opts.Compression) { + val, err := result.Value() + require.NoError(t, err) + + if val.IsNil() || val.IsZero() { + actual = append(actual, "") + } else { + require.Equal(t, datasetmd.VALUE_TYPE_STRING, val.Type()) + actual = append(actual, val.String()) + } + } + require.Equal(t, in, actual) +} + +func Test_pageBuilder_Fill(t *testing.T) { + opts := BuilderOptions{ + PageSizeHint: 1_500_000, + Value: datasetmd.VALUE_TYPE_INT64, + Compression: datasetmd.COMPRESSION_TYPE_NONE, + Encoding: datasetmd.ENCODING_TYPE_DELTA, + } + buf, err := newPageBuilder(opts) + require.NoError(t, err) + + ts := time.Now().UTC() + for buf.Append(Int64Value(ts.UnixNano())) { + ts = ts.Add(time.Duration(rand.Intn(5000)) * time.Millisecond) + } + + page, err := buf.Flush() + require.NoError(t, err) + require.Equal(t, page.Info.UncompressedSize, page.Info.CompressedSize) + + t.Log("Uncompressed size: ", page.Info.UncompressedSize) + t.Log("Compressed size: ", page.Info.CompressedSize) + t.Log("Row count: ", page.Info.RowCount) +} diff --git a/pkg/dataobj/internal/metadata/datasetmd/datasetmd.pb.go b/pkg/dataobj/internal/metadata/datasetmd/datasetmd.pb.go index 52f2a35c53e9d..4121ca928c598 100644 --- a/pkg/dataobj/internal/metadata/datasetmd/datasetmd.pb.go +++ b/pkg/dataobj/internal/metadata/datasetmd/datasetmd.pb.go @@ -4,10 +4,15 @@ package datasetmd import ( + bytes "bytes" fmt "fmt" proto "github.com/gogo/protobuf/proto" + io "io" math "math" + math_bits "math/bits" + reflect "reflect" strconv "strconv" + strings "strings" ) // Reference imports to suppress errors if they are not otherwise used. @@ -88,9 +93,99 @@ func (EncodingType) EnumDescriptor() ([]byte, []int) { return fileDescriptor_7ab9d5b21b743868, []int{1} } +// CompressionType represents the valid compression types that can be used for +// compressing values in a page. +type CompressionType int32 + +const ( + // Invalid compression type. + COMPRESSION_TYPE_UNSPECIFIED CompressionType = 0 + // No compression. + COMPRESSION_TYPE_NONE CompressionType = 1 + // Snappy compression. + COMPRESSION_TYPE_SNAPPY CompressionType = 2 + // Zstd compression. + COMPRESSION_TYPE_ZSTD CompressionType = 3 +) + +var CompressionType_name = map[int32]string{ + 0: "COMPRESSION_TYPE_UNSPECIFIED", + 1: "COMPRESSION_TYPE_NONE", + 2: "COMPRESSION_TYPE_SNAPPY", + 3: "COMPRESSION_TYPE_ZSTD", +} + +var CompressionType_value = map[string]int32{ + "COMPRESSION_TYPE_UNSPECIFIED": 0, + "COMPRESSION_TYPE_NONE": 1, + "COMPRESSION_TYPE_SNAPPY": 2, + "COMPRESSION_TYPE_ZSTD": 3, +} + +func (CompressionType) EnumDescriptor() ([]byte, []int) { + return fileDescriptor_7ab9d5b21b743868, []int{2} +} + +// Statistics about a column or a page. All statistics are optional and are +// conditionally set depending on the column type. +type Statistics struct { + // Minimum value. + MinValue []byte `protobuf:"bytes,1,opt,name=min_value,json=minValue,proto3" json:"min_value,omitempty"` + // Maximum value. + MaxValue []byte `protobuf:"bytes,2,opt,name=max_value,json=maxValue,proto3" json:"max_value,omitempty"` +} + +func (m *Statistics) Reset() { *m = Statistics{} } +func (*Statistics) ProtoMessage() {} +func (*Statistics) Descriptor() ([]byte, []int) { + return fileDescriptor_7ab9d5b21b743868, []int{0} +} +func (m *Statistics) XXX_Unmarshal(b []byte) error { + return m.Unmarshal(b) +} +func (m *Statistics) XXX_Marshal(b []byte, deterministic bool) ([]byte, error) { + if deterministic { + return xxx_messageInfo_Statistics.Marshal(b, m, deterministic) + } else { + b = b[:cap(b)] + n, err := m.MarshalToSizedBuffer(b) + if err != nil { + return nil, err + } + return b[:n], nil + } +} +func (m *Statistics) XXX_Merge(src proto.Message) { + xxx_messageInfo_Statistics.Merge(m, src) +} +func (m *Statistics) XXX_Size() int { + return m.Size() +} +func (m *Statistics) XXX_DiscardUnknown() { + xxx_messageInfo_Statistics.DiscardUnknown(m) +} + +var xxx_messageInfo_Statistics proto.InternalMessageInfo + +func (m *Statistics) GetMinValue() []byte { + if m != nil { + return m.MinValue + } + return nil +} + +func (m *Statistics) GetMaxValue() []byte { + if m != nil { + return m.MaxValue + } + return nil +} + func init() { proto.RegisterEnum("dataobj.metadata.dataset.v1.ValueType", ValueType_name, ValueType_value) proto.RegisterEnum("dataobj.metadata.dataset.v1.EncodingType", EncodingType_name, EncodingType_value) + proto.RegisterEnum("dataobj.metadata.dataset.v1.CompressionType", CompressionType_name, CompressionType_value) + proto.RegisterType((*Statistics)(nil), "dataobj.metadata.dataset.v1.Statistics") } func init() { @@ -98,27 +193,34 @@ func init() { } var fileDescriptor_7ab9d5b21b743868 = []byte{ - // 310 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xe2, 0x32, 0x2f, 0xc8, 0x4e, 0xd7, - 0x4f, 0x49, 0x2c, 0x49, 0xcc, 0x4f, 0xca, 0xd2, 0xcf, 0xcc, 0x2b, 0x49, 0x2d, 0xca, 0x4b, 0xcc, - 0xd1, 0xcf, 0x4d, 0x2d, 0x49, 0x04, 0x09, 0x82, 0x65, 0x8a, 0x53, 0x4b, 0x72, 0x53, 0x10, 0x2c, - 0xbd, 0x82, 0xa2, 0xfc, 0x92, 0x7c, 0x21, 0x69, 0xa8, 0x26, 0x3d, 0x98, 0x5a, 0x3d, 0xa8, 0x0a, - 0xbd, 0x32, 0x43, 0xad, 0x6c, 0x2e, 0xce, 0xb0, 0xc4, 0x9c, 0xd2, 0xd4, 0x90, 0xca, 0x82, 0x54, - 0x21, 0x29, 0x2e, 0xb1, 0x30, 0x47, 0x9f, 0x50, 0xd7, 0xf8, 0x90, 0xc8, 0x00, 0xd7, 0xf8, 0x50, - 0xbf, 0xe0, 0x00, 0x57, 0x67, 0x4f, 0x37, 0x4f, 0x57, 0x17, 0x01, 0x06, 0x21, 0x11, 0x2e, 0x01, - 0x24, 0x39, 0x4f, 0xbf, 0x10, 0x33, 0x13, 0x01, 0x46, 0x21, 0x51, 0x2e, 0x41, 0x64, 0x1d, 0x10, - 0x61, 0x26, 0x34, 0xe1, 0xe0, 0x90, 0x20, 0x4f, 0x3f, 0x77, 0x01, 0x66, 0xad, 0x4a, 0x2e, 0x1e, - 0xd7, 0xbc, 0xe4, 0xfc, 0x94, 0xcc, 0xbc, 0x74, 0xb0, 0x7d, 0xb2, 0x5c, 0x92, 0xae, 0x7e, 0xce, - 0xfe, 0x2e, 0x9e, 0x7e, 0xee, 0xd8, 0xac, 0x14, 0xe7, 0x12, 0x46, 0x95, 0x0e, 0xf0, 0x71, 0xf4, - 0xf4, 0x13, 0x60, 0xc4, 0x94, 0x70, 0x71, 0xf5, 0x09, 0x71, 0x14, 0x60, 0x12, 0x92, 0xe0, 0x12, - 0x41, 0x95, 0x70, 0xf2, 0x0c, 0xf1, 0x75, 0x0c, 0x10, 0x60, 0x76, 0xaa, 0xb8, 0xf0, 0x50, 0x8e, - 0xe1, 0xc6, 0x43, 0x39, 0x86, 0x0f, 0x0f, 0xe5, 0x18, 0x1b, 0x1e, 0xc9, 0x31, 0xae, 0x78, 0x24, - 0xc7, 0x78, 0xe2, 0x91, 0x1c, 0xe3, 0x85, 0x47, 0x72, 0x8c, 0x0f, 0x1e, 0xc9, 0x31, 0xbe, 0x78, - 0x24, 0xc7, 0xf0, 0xe1, 0x91, 0x1c, 0xe3, 0x84, 0xc7, 0x72, 0x0c, 0x17, 0x1e, 0xcb, 0x31, 0xdc, - 0x78, 0x2c, 0xc7, 0x10, 0xe5, 0x94, 0x9e, 0x59, 0x92, 0x51, 0x9a, 0xa4, 0x97, 0x9c, 0x9f, 0xab, - 0x9f, 0x5e, 0x94, 0x98, 0x96, 0x98, 0x97, 0xa8, 0x9f, 0x93, 0x9f, 0x9d, 0xa9, 0x5f, 0x66, 0xac, - 0x4f, 0x64, 0x74, 0x24, 0xb1, 0x81, 0x63, 0xc1, 0x18, 0x10, 0x00, 0x00, 0xff, 0xff, 0xee, 0xfd, - 0x13, 0xfe, 0xc0, 0x01, 0x00, 0x00, + // 420 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0x8c, 0x92, 0x31, 0x6f, 0xd3, 0x40, + 0x1c, 0xc5, 0x7d, 0x89, 0x84, 0xe8, 0x5f, 0x95, 0x38, 0x4c, 0x4b, 0x5b, 0x02, 0xa7, 0x8a, 0x09, + 0x65, 0xb0, 0x85, 0x8a, 0x60, 0x76, 0x92, 0x6b, 0x75, 0x52, 0x7a, 0xb1, 0x62, 0xb7, 0x52, 0xbb, + 0x44, 0x97, 0xc4, 0x84, 0x23, 0xf1, 0x9d, 0x65, 0x5f, 0xa3, 0x74, 0x63, 0x62, 0xe6, 0x63, 0xf0, + 0x51, 0x18, 0x33, 0x76, 0x24, 0xce, 0xc2, 0xd8, 0x8f, 0x80, 0x6a, 0x8c, 0x68, 0x48, 0x86, 0x6e, + 0xff, 0x7b, 0xbf, 0xf7, 0xf4, 0x6e, 0x78, 0xf0, 0x21, 0x19, 0x8f, 0xdc, 0xa1, 0x30, 0x42, 0xf7, + 0x3f, 0xbb, 0x52, 0x99, 0x28, 0x55, 0x62, 0xe2, 0xc6, 0x91, 0x11, 0x77, 0x62, 0x41, 0xb2, 0xc8, + 0xc4, 0xc3, 0x7f, 0x97, 0x93, 0xa4, 0xda, 0x68, 0xbb, 0x56, 0x86, 0x9c, 0xbf, 0x5e, 0xa7, 0x74, + 0x38, 0xd3, 0xb7, 0xaf, 0x8f, 0x01, 0x02, 0x23, 0x8c, 0xcc, 0x8c, 0x1c, 0x64, 0x76, 0x0d, 0xb6, + 0x62, 0xa9, 0x7a, 0x53, 0x31, 0xb9, 0x8a, 0xf6, 0xd1, 0x21, 0x7a, 0xb3, 0xdd, 0x7d, 0x1c, 0x4b, + 0x75, 0x7e, 0xf7, 0x2e, 0xa0, 0x98, 0x95, 0xb0, 0x52, 0x42, 0x31, 0x2b, 0x60, 0x7d, 0x0c, 0x5b, + 0xc5, 0x11, 0x5e, 0x27, 0x91, 0xfd, 0x02, 0x9e, 0x9f, 0x7b, 0xed, 0x33, 0xda, 0x0b, 0x2f, 0x7c, + 0xda, 0x3b, 0xe3, 0x81, 0x4f, 0x9b, 0xec, 0x98, 0xd1, 0x16, 0xb6, 0xec, 0x1d, 0xc0, 0xf7, 0x18, + 0xe3, 0xe1, 0xfb, 0x77, 0x18, 0xd9, 0xbb, 0xf0, 0xf4, 0x7e, 0xe2, 0x8f, 0x5c, 0xf9, 0x4f, 0x0e, + 0xc2, 0x2e, 0xe3, 0x27, 0xb8, 0x5a, 0xbf, 0x86, 0x6d, 0xaa, 0x06, 0x7a, 0x28, 0xd5, 0xa8, 0xe8, + 0x7b, 0x05, 0x07, 0x94, 0x37, 0x3b, 0x2d, 0xc6, 0x4f, 0x36, 0x55, 0xee, 0xc1, 0xb3, 0x55, 0xec, + 0xb7, 0x3d, 0xc6, 0x31, 0x5a, 0x07, 0x2d, 0xda, 0x0e, 0x3d, 0x5c, 0xb1, 0xf7, 0x61, 0x67, 0x15, + 0x34, 0x58, 0x78, 0xea, 0xf9, 0xb8, 0x5a, 0xff, 0x8a, 0xe0, 0x49, 0x53, 0xc7, 0x49, 0x1a, 0x65, + 0x99, 0xd4, 0xaa, 0xa8, 0x3f, 0x84, 0x97, 0xcd, 0xce, 0xa9, 0xdf, 0xa5, 0x41, 0xc0, 0x3a, 0x7c, + 0xd3, 0x0f, 0x0e, 0x60, 0x77, 0xcd, 0xc1, 0x3b, 0x9c, 0x62, 0x64, 0xd7, 0x60, 0x6f, 0x0d, 0x05, + 0xdc, 0xf3, 0xfd, 0x0b, 0x5c, 0xd9, 0x98, 0xbb, 0x0c, 0xc2, 0x16, 0xae, 0x36, 0x66, 0xf3, 0x05, + 0xb1, 0x6e, 0x16, 0xc4, 0xba, 0x5d, 0x10, 0xf4, 0x25, 0x27, 0xe8, 0x7b, 0x4e, 0xd0, 0x8f, 0x9c, + 0xa0, 0x79, 0x4e, 0xd0, 0xcf, 0x9c, 0xa0, 0x5f, 0x39, 0xb1, 0x6e, 0x73, 0x82, 0xbe, 0x2d, 0x89, + 0x35, 0x5f, 0x12, 0xeb, 0x66, 0x49, 0xac, 0xcb, 0xc6, 0x48, 0x9a, 0x4f, 0x57, 0x7d, 0x67, 0xa0, + 0x63, 0x77, 0x94, 0x8a, 0x8f, 0x42, 0x09, 0x77, 0xa2, 0xc7, 0xd2, 0x9d, 0x1e, 0xb9, 0x0f, 0xdc, + 0x57, 0xff, 0x51, 0x31, 0xab, 0xa3, 0xdf, 0x01, 0x00, 0x00, 0xff, 0xff, 0x79, 0x22, 0x11, 0xce, + 0x91, 0x02, 0x00, 0x00, } func (x ValueType) String() string { @@ -135,3 +237,377 @@ func (x EncodingType) String() string { } return strconv.Itoa(int(x)) } +func (x CompressionType) String() string { + s, ok := CompressionType_name[int32(x)] + if ok { + return s + } + return strconv.Itoa(int(x)) +} +func (this *Statistics) Equal(that interface{}) bool { + if that == nil { + return this == nil + } + + that1, ok := that.(*Statistics) + if !ok { + that2, ok := that.(Statistics) + if ok { + that1 = &that2 + } else { + return false + } + } + if that1 == nil { + return this == nil + } else if this == nil { + return false + } + if !bytes.Equal(this.MinValue, that1.MinValue) { + return false + } + if !bytes.Equal(this.MaxValue, that1.MaxValue) { + return false + } + return true +} +func (this *Statistics) GoString() string { + if this == nil { + return "nil" + } + s := make([]string, 0, 6) + s = append(s, "&datasetmd.Statistics{") + s = append(s, "MinValue: "+fmt.Sprintf("%#v", this.MinValue)+",\n") + s = append(s, "MaxValue: "+fmt.Sprintf("%#v", this.MaxValue)+",\n") + s = append(s, "}") + return strings.Join(s, "") +} +func valueToGoStringDatasetmd(v interface{}, typ string) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt.Sprintf("func(v %v) *%v { return &v } ( %#v )", typ, typ, pv) +} +func (m *Statistics) Marshal() (dAtA []byte, err error) { + size := m.Size() + dAtA = make([]byte, size) + n, err := m.MarshalToSizedBuffer(dAtA[:size]) + if err != nil { + return nil, err + } + return dAtA[:n], nil +} + +func (m *Statistics) MarshalTo(dAtA []byte) (int, error) { + size := m.Size() + return m.MarshalToSizedBuffer(dAtA[:size]) +} + +func (m *Statistics) MarshalToSizedBuffer(dAtA []byte) (int, error) { + i := len(dAtA) + _ = i + var l int + _ = l + if len(m.MaxValue) > 0 { + i -= len(m.MaxValue) + copy(dAtA[i:], m.MaxValue) + i = encodeVarintDatasetmd(dAtA, i, uint64(len(m.MaxValue))) + i-- + dAtA[i] = 0x12 + } + if len(m.MinValue) > 0 { + i -= len(m.MinValue) + copy(dAtA[i:], m.MinValue) + i = encodeVarintDatasetmd(dAtA, i, uint64(len(m.MinValue))) + i-- + dAtA[i] = 0xa + } + return len(dAtA) - i, nil +} + +func encodeVarintDatasetmd(dAtA []byte, offset int, v uint64) int { + offset -= sovDatasetmd(v) + base := offset + for v >= 1<<7 { + dAtA[offset] = uint8(v&0x7f | 0x80) + v >>= 7 + offset++ + } + dAtA[offset] = uint8(v) + return base +} +func (m *Statistics) Size() (n int) { + if m == nil { + return 0 + } + var l int + _ = l + l = len(m.MinValue) + if l > 0 { + n += 1 + l + sovDatasetmd(uint64(l)) + } + l = len(m.MaxValue) + if l > 0 { + n += 1 + l + sovDatasetmd(uint64(l)) + } + return n +} + +func sovDatasetmd(x uint64) (n int) { + return (math_bits.Len64(x|1) + 6) / 7 +} +func sozDatasetmd(x uint64) (n int) { + return sovDatasetmd(uint64((x << 1) ^ uint64((int64(x) >> 63)))) +} +func (this *Statistics) String() string { + if this == nil { + return "nil" + } + s := strings.Join([]string{`&Statistics{`, + `MinValue:` + fmt.Sprintf("%v", this.MinValue) + `,`, + `MaxValue:` + fmt.Sprintf("%v", this.MaxValue) + `,`, + `}`, + }, "") + return s +} +func valueToStringDatasetmd(v interface{}) string { + rv := reflect.ValueOf(v) + if rv.IsNil() { + return "nil" + } + pv := reflect.Indirect(rv).Interface() + return fmt.Sprintf("*%v", pv) +} +func (m *Statistics) Unmarshal(dAtA []byte) error { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + preIndex := iNdEx + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDatasetmd + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= uint64(b&0x7F) << shift + if b < 0x80 { + break + } + } + fieldNum := int32(wire >> 3) + wireType := int(wire & 0x7) + if wireType == 4 { + return fmt.Errorf("proto: Statistics: wiretype end group for non-group") + } + if fieldNum <= 0 { + return fmt.Errorf("proto: Statistics: illegal tag %d (wire type %d)", fieldNum, wire) + } + switch fieldNum { + case 1: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field MinValue", wireType) + } + var byteLen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDatasetmd + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + byteLen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if byteLen < 0 { + return ErrInvalidLengthDatasetmd + } + postIndex := iNdEx + byteLen + if postIndex < 0 { + return ErrInvalidLengthDatasetmd + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.MinValue = append(m.MinValue[:0], dAtA[iNdEx:postIndex]...) + if m.MinValue == nil { + m.MinValue = []byte{} + } + iNdEx = postIndex + case 2: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field MaxValue", wireType) + } + var byteLen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowDatasetmd + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + byteLen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if byteLen < 0 { + return ErrInvalidLengthDatasetmd + } + postIndex := iNdEx + byteLen + if postIndex < 0 { + return ErrInvalidLengthDatasetmd + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + m.MaxValue = append(m.MaxValue[:0], dAtA[iNdEx:postIndex]...) + if m.MaxValue == nil { + m.MaxValue = []byte{} + } + iNdEx = postIndex + default: + iNdEx = preIndex + skippy, err := skipDatasetmd(dAtA[iNdEx:]) + if err != nil { + return err + } + if skippy < 0 { + return ErrInvalidLengthDatasetmd + } + if (iNdEx + skippy) < 0 { + return ErrInvalidLengthDatasetmd + } + if (iNdEx + skippy) > l { + return io.ErrUnexpectedEOF + } + iNdEx += skippy + } + } + + if iNdEx > l { + return io.ErrUnexpectedEOF + } + return nil +} +func skipDatasetmd(dAtA []byte) (n int, err error) { + l := len(dAtA) + iNdEx := 0 + for iNdEx < l { + var wire uint64 + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowDatasetmd + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + wire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + wireType := int(wire & 0x7) + switch wireType { + case 0: + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowDatasetmd + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + iNdEx++ + if dAtA[iNdEx-1] < 0x80 { + break + } + } + return iNdEx, nil + case 1: + iNdEx += 8 + return iNdEx, nil + case 2: + var length int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowDatasetmd + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + length |= (int(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + if length < 0 { + return 0, ErrInvalidLengthDatasetmd + } + iNdEx += length + if iNdEx < 0 { + return 0, ErrInvalidLengthDatasetmd + } + return iNdEx, nil + case 3: + for { + var innerWire uint64 + var start int = iNdEx + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return 0, ErrIntOverflowDatasetmd + } + if iNdEx >= l { + return 0, io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + innerWire |= (uint64(b) & 0x7F) << shift + if b < 0x80 { + break + } + } + innerWireType := int(innerWire & 0x7) + if innerWireType == 4 { + break + } + next, err := skipDatasetmd(dAtA[start:]) + if err != nil { + return 0, err + } + iNdEx = start + next + if iNdEx < 0 { + return 0, ErrInvalidLengthDatasetmd + } + } + return iNdEx, nil + case 4: + return iNdEx, nil + case 5: + iNdEx += 4 + return iNdEx, nil + default: + return 0, fmt.Errorf("proto: illegal wireType %d", wireType) + } + } + panic("unreachable") +} + +var ( + ErrInvalidLengthDatasetmd = fmt.Errorf("proto: negative length found during unmarshaling") + ErrIntOverflowDatasetmd = fmt.Errorf("proto: integer overflow") +) diff --git a/pkg/dataobj/internal/metadata/datasetmd/datasetmd.proto b/pkg/dataobj/internal/metadata/datasetmd/datasetmd.proto index 478f74037871f..e396f9a627f49 100644 --- a/pkg/dataobj/internal/metadata/datasetmd/datasetmd.proto +++ b/pkg/dataobj/internal/metadata/datasetmd/datasetmd.proto @@ -37,3 +37,29 @@ enum EncodingType { // integers using a combination of run-length encoding and bitpacking. ENCODING_TYPE_BITMAP = 3; } + +// CompressionType represents the valid compression types that can be used for +// compressing values in a page. +enum CompressionType { + // Invalid compression type. + COMPRESSION_TYPE_UNSPECIFIED = 0; + + // No compression. + COMPRESSION_TYPE_NONE = 1; + + // Snappy compression. + COMPRESSION_TYPE_SNAPPY = 2; + + // Zstd compression. + COMPRESSION_TYPE_ZSTD = 3; +} + +// Statistics about a column or a page. All statistics are optional and are +// conditionally set depending on the column type. +message Statistics { + // Minimum value. + bytes min_value = 1; + + // Maximum value. + bytes max_value = 2; +} diff --git a/pkg/dataobj/internal/result/result.go b/pkg/dataobj/internal/result/result.go new file mode 100644 index 0000000000000..7712e7a46ff7c --- /dev/null +++ b/pkg/dataobj/internal/result/result.go @@ -0,0 +1,101 @@ +// Package result provides utilities for dealing with iterators that can fail +// during iteration. +// +// Result is useful to make it harder for callers to ignore errors. Using +// iter.Seq2[V, error] can make it easy to accidentally ignore errors: +// +// func myIter() iter.Seq2[V, error] { ... } +// +// func main() { +// for v := range myIter() { /* errors are ignored! */ } +// } +package result + +import ( + "errors" + "iter" +) + +// Result is a type used for representing a result from an operation that can +// fail. +type Result[V any] struct { + value V // Valid only if err is nil. + err error +} + +// Value returns a successful result with the given value. +func Value[V any](v V) Result[V] { + return Result[V]{value: v} +} + +// Error returns a failed result with the given error. +func Error[V any](err error) Result[V] { + return Result[V]{err: err} +} + +// Value returns r's value and error. +func (r Result[V]) Value() (V, error) { + return r.value, r.err +} + +// MustValue returns r's value. If r is an error, MustValue panics. +func (r Result[V]) MustValue() V { + if r.err != nil { + panic(r.err) + } + return r.value +} + +// Err returns r's error, if any. +func (r Result[V]) Err() error { + return r.err +} + +// Seq is an iterator over sequences of result values. When called as +// seq(yield), seq calls yield(r) for each value r in the sequence, stopping +// early if yield returns false. +// +// See the [iter] package for more information on iterators. +type Seq[V any] func(yield func(Result[V]) bool) + +// Iter produces a new Seq[V] from a given function that can fail. Values +// passed to yield are wrapped in a call to [Value], while a non-nil error is +// wrapped in a call to [Error]. +// +// Iter makes it easier to write failable iterators and removes the need to +// manually wrap values and errors into a [Result]. +func Iter[V any](seq func(yield func(V) bool) error) Seq[V] { + return func(yield func(Result[V]) bool) { + err := seq(func(v V) bool { return yield(Value(v)) }) + if err != nil { + yield(Error[V](err)) + } + } +} + +// Pull converts the "push-style" Result iterator sequence seq into a +// "pull-style" iterator accessed by the two functions next and stop. +// +// Pull is a wrapper around [iter.Pull]. +func Pull[V any](seq Seq[V]) (next func() (Result[V], bool), stop func()) { + iseq := iter.Seq[Result[V]](seq) + return iter.Pull(iseq) +} + +// Collect collects values from seq into a new slice and returns it. Any errors +// from seq are joined and returned as the second value. +func Collect[V any](seq Seq[V]) ([]V, error) { + var ( + vals []V + errs []error + ) + for res := range seq { + val, err := res.Value() + if err != nil { + errs = append(errs, err) + } else { + vals = append(vals, val) + } + } + return vals, errors.Join(errs...) +} diff --git a/pkg/dataobj/internal/streamio/varint.go b/pkg/dataobj/internal/streamio/varint.go index e8420605e8c45..832a0cf4eb1e4 100644 --- a/pkg/dataobj/internal/streamio/varint.go +++ b/pkg/dataobj/internal/streamio/varint.go @@ -3,6 +3,7 @@ package streamio import ( "encoding/binary" "io" + "math/bits" ) // [binary] does not have an implementation to write varints directly @@ -10,6 +11,23 @@ import ( // encoders to stream values, we provide equivalent implementations of // [binary.AppendUvarint] and [binary.AppendVarint] which accept a ByteWriter. +// VarintSize returns the number of bytes needed to encode x. +func VarintSize(x int64) int { + ux := uint64(x) << 1 + if x < 0 { + ux = ^ux + } + return UvarintSize(ux) +} + +// UvarintSize returns the number of bytes needed to encode x. +func UvarintSize(x uint64) int { + if x == 0 { + return 1 + } + return 1 + (63-bits.LeadingZeros64(x))/7 +} + // WriteVarint writes an encoded signed integer to w. func WriteVarint(w io.ByteWriter, x int64) error { // Like [binary.AppendVarint], we use zig-zag encoding so small negative From 9e13abd454536646b8334a31af4cd600222a569e Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 8 Jan 2025 19:06:50 -0500 Subject: [PATCH 3/9] fix(deps): update module github.com/ibm/sarama to v1.45.0 (#15636) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 +- vendor/github.com/IBM/sarama/Dockerfile.kafka | 38 ++- vendor/github.com/IBM/sarama/Makefile | 2 +- vendor/github.com/IBM/sarama/admin.go | 217 ++++++++++++------ .../github.com/IBM/sarama/balance_strategy.go | 5 + .../IBM/sarama/create_topics_request.go | 15 ++ .../IBM/sarama/delete_topics_request.go | 15 ++ .../github.com/IBM/sarama/docker-compose.yml | 23 +- vendor/github.com/IBM/sarama/errors.go | 2 +- .../github.com/IBM/sarama/server.properties | 138 +++++++++++ vendor/github.com/IBM/sarama/utils.go | 4 +- vendor/modules.txt | 4 +- 13 files changed, 373 insertions(+), 96 deletions(-) create mode 100644 vendor/github.com/IBM/sarama/server.properties diff --git a/go.mod b/go.mod index 071ee3704f32d..f13f5b9f6be1d 100644 --- a/go.mod +++ b/go.mod @@ -13,7 +13,7 @@ require ( github.com/Azure/azure-storage-blob-go v0.15.0 github.com/Azure/go-autorest/autorest/adal v0.9.24 github.com/Azure/go-autorest/autorest/azure/auth v0.5.13 - github.com/IBM/sarama v1.44.0 + github.com/IBM/sarama v1.45.0 github.com/Masterminds/sprig/v3 v3.3.0 github.com/NYTimes/gziphandler v1.1.1 github.com/Workiva/go-datastructures v1.1.5 diff --git a/go.sum b/go.sum index 695b88caaca38..15cb39121db77 100644 --- a/go.sum +++ b/go.sum @@ -134,8 +134,8 @@ github.com/IBM/go-sdk-core/v5 v5.18.3 h1:q6IDU3N2bHGwijK9pMnzKC5gqdaRII56NzB4ZNd github.com/IBM/go-sdk-core/v5 v5.18.3/go.mod h1:5kILxqEWOrwMhoD2b7J6Xv9Z2M6YIdT/6Oy+XRSsCGQ= github.com/IBM/ibm-cos-sdk-go v1.12.0 h1:Wrk3ve4JS3euhl7XjNFd3RlvPT56199G2/rKaPWpRKU= github.com/IBM/ibm-cos-sdk-go v1.12.0/go.mod h1:v/VBvFuysZMIX9HcaIrz6a+FLVw9px8fq6XabFwD+E4= -github.com/IBM/sarama v1.44.0 h1:puNKqcScjSAgVLramjsuovZrS0nJZFVsrvuUymkWqhE= -github.com/IBM/sarama v1.44.0/go.mod h1:MxQ9SvGfvKIorbk077Ff6DUnBlGpidiQOtU2vuBaxVw= +github.com/IBM/sarama v1.45.0 h1:IzeBevTn809IJ/dhNKhP5mpxEXTmELuezO2tgHD9G5E= +github.com/IBM/sarama v1.45.0/go.mod h1:EEay63m8EZkeumco9TDXf2JT3uDnZsZqFgV46n4yZdY= github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0= github.com/MasslessParticle/azure-storage-blob-go v0.14.1-0.20240322194317-344980fda573 h1:DCPjdUAi+jcGnL7iN+A7uNY8xG584oMRuisYh/VE21E= github.com/MasslessParticle/azure-storage-blob-go v0.14.1-0.20240322194317-344980fda573/go.mod h1:SMqIBi+SuiQH32bvyjngEewEeXoPfKMgWlBDaYf6fck= diff --git a/vendor/github.com/IBM/sarama/Dockerfile.kafka b/vendor/github.com/IBM/sarama/Dockerfile.kafka index b4d5c6acbbcef..d2234e3918f2d 100644 --- a/vendor/github.com/IBM/sarama/Dockerfile.kafka +++ b/vendor/github.com/IBM/sarama/Dockerfile.kafka @@ -1,17 +1,17 @@ -FROM registry.access.redhat.com/ubi8/ubi-minimal:8.10@sha256:cf095e5668919ba1b4ace3888107684ad9d587b1830d3eb56973e6a54f456e67 +FROM registry.access.redhat.com/ubi9/ubi-minimal:9.5@sha256:daa61d6103e98bccf40d7a69a0d4f8786ec390e2204fd94f7cc49053e9949360 USER root RUN microdnf update -y \ - && microdnf install -y curl gzip java-11-openjdk-headless tar tzdata-java \ + && microdnf install -y git gzip java-17-openjdk-headless tar tzdata-java \ && microdnf reinstall -y tzdata \ && microdnf clean all -ENV JAVA_HOME=/usr/lib/jvm/jre-11 +ENV JAVA_HOME=/usr/lib/jvm/jre-17 # https://docs.oracle.com/javase/7/docs/technotes/guides/net/properties.html # Ensure Java doesn't cache any dns results -RUN cd /etc/java/java-11-openjdk/*/conf/security \ +RUN cd /etc/java/java-17-openjdk/*/conf/security \ && sed -e '/networkaddress.cache.ttl/d' -e '/networkaddress.cache.negative.ttl/d' -i java.security \ && echo 'networkaddress.cache.ttl=0' >> java.security \ && echo 'networkaddress.cache.negative.ttl=0' >> java.security @@ -19,24 +19,46 @@ RUN cd /etc/java/java-11-openjdk/*/conf/security \ ARG SCALA_VERSION="2.13" ARG KAFKA_VERSION="3.6.2" +WORKDIR /tmp + # https://github.com/apache/kafka/blob/2e2b0a58eda3e677763af974a44a6aaa3c280214/tests/docker/Dockerfile#L77-L105 ARG KAFKA_MIRROR="https://s3-us-west-2.amazonaws.com/kafka-packages" SHELL ["/bin/bash", "-o", "pipefail", "-c"] -RUN mkdir -p "/opt/kafka-${KAFKA_VERSION}" \ +RUN --mount=type=bind,target=.,rw=true \ + mkdir -p "/opt/kafka-${KAFKA_VERSION}" \ && chmod a+rw "/opt/kafka-${KAFKA_VERSION}" \ - && curl -s "$KAFKA_MIRROR/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz" | tar xz --strip-components=1 -C "/opt/kafka-${KAFKA_VERSION}" + && if [ "$KAFKA_VERSION" = "4.0.0" ]; then \ + microdnf install -y java-17-openjdk-devel \ + && git clone --depth=50 --single-branch -b 4.0 https://github.com/apache/kafka /usr/src/kafka \ + && cd /usr/src/kafka \ + && : PIN TO COMMIT BEFORE KAFKA-17616 ZOOKEEPER REMOVAL STARTED \ + && git reset --hard d1504649fb \ + && export JAVA_TOOL_OPTIONS=-XX:MaxRAMPercentage=80 \ + && sed -e '/version=/s/-SNAPSHOT//' -e '/org.gradle.jvmargs/d' -e '/org.gradle.parallel/s/true/false/' -i gradle.properties && ./gradlew -PmaxParallelForks=1 -PmaxScalacThreads=1 --no-daemon releaseTarGz -x siteDocsTar -x javadoc \ + && tar xzf core/build/distributions/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz --strip-components=1 -C "/opt/kafka-${KAFKA_VERSION}" \ + && cp /tmp/server.properties "/opt/kafka-${KAFKA_VERSION}/config/" \ + && microdnf remove -y java-17-openjdk-devel \ + && rm -rf /usr/src/kafka ; \ + else \ + curl -s "$KAFKA_MIRROR/kafka_${SCALA_VERSION}-${KAFKA_VERSION}.tgz" | tar xz --strip-components=1 -C "/opt/kafka-${KAFKA_VERSION}" ; \ + fi # older kafka versions depend upon jaxb-api being bundled with the JDK, but it # was removed from Java 11 so work around that by including it in the kafka # libs dir regardless -WORKDIR /tmp RUN curl -sLO "https://repo1.maven.org/maven2/javax/xml/bind/jaxb-api/2.3.0/jaxb-api-2.3.0.jar" \ && for DIR in /opt/kafka-*; do cp -v jaxb-api-2.3.0.jar $DIR/libs/ ; done \ && rm -f jaxb-api-2.3.0.jar +# older kafka versions with the zookeeper 3.4.13 client aren't compatible with Java 17 so quietly bump them to 3.5.9 +RUN [ -f "/opt/kafka-${KAFKA_VERSION}/libs/zookeeper-3.4.13.jar" ] || exit 0 ; \ + rm -f "/opt/kafka-${KAFKA_VERSION}/libs/zookeeper-3.4.13.jar" \ + && curl --fail -sSL -o "/opt/kafka-${KAFKA_VERSION}/libs/zookeeper-3.5.9.jar" "https://repo1.maven.org/maven2/org/apache/zookeeper/zookeeper/3.5.9/zookeeper-3.5.9.jar" \ + && curl --fail -sSL -o "/opt/kafka-${KAFKA_VERSION}/libs/zookeeper-jute-3.5.9.jar" "https://repo1.maven.org/maven2/org/apache/zookeeper/zookeeper-jute/3.5.9/zookeeper-jute-3.5.9.jar" + WORKDIR /opt/kafka-${KAFKA_VERSION} -ENV JAVA_MAJOR_VERSION=11 +ENV JAVA_MAJOR_VERSION=17 RUN sed -e "s/JAVA_MAJOR_VERSION=.*/JAVA_MAJOR_VERSION=${JAVA_MAJOR_VERSION}/" -i"" ./bin/kafka-run-class.sh diff --git a/vendor/github.com/IBM/sarama/Makefile b/vendor/github.com/IBM/sarama/Makefile index 7cefc2a2c36bf..ba6f46e41a5ba 100644 --- a/vendor/github.com/IBM/sarama/Makefile +++ b/vendor/github.com/IBM/sarama/Makefile @@ -9,7 +9,7 @@ FILES := $(shell find . -name '*.go' -type f -not -name '*.pb.go' -not -name TESTS := $(shell find . -name '*.go' -type f -not -name '*.pb.go' -not -name '*_generated.go' -name '*_test.go') $(GOBIN)/tparse: - GOBIN=$(GOBIN) go install github.com/mfridman/tparse@v0.11.1 + GOBIN=$(GOBIN) go install github.com/mfridman/tparse@v0.16.0 get: $(GO) get ./... $(GO) mod verify diff --git a/vendor/github.com/IBM/sarama/admin.go b/vendor/github.com/IBM/sarama/admin.go index 6549c7e6fb07f..8aa1f374e4d63 100644 --- a/vendor/github.com/IBM/sarama/admin.go +++ b/vendor/github.com/IBM/sarama/admin.go @@ -3,6 +3,7 @@ package sarama import ( "errors" "fmt" + "io" "math/rand" "strconv" "sync" @@ -144,6 +145,10 @@ type ClusterAdmin interface { // locally cached value if it's available. Controller() (*Broker, error) + // Coordinator returns the coordinating broker for a consumer group. It will + // return a locally cached value if it's available. + Coordinator(group string) (*Broker, error) + // Remove members from the consumer group by given member identities. // This operation is supported by brokers with version 2.3 or higher // This is for static membership feature. KIP-345 @@ -195,14 +200,25 @@ func (ca *clusterAdmin) Controller() (*Broker, error) { return ca.client.Controller() } +func (ca *clusterAdmin) Coordinator(group string) (*Broker, error) { + return ca.client.Coordinator(group) +} + func (ca *clusterAdmin) refreshController() (*Broker, error) { return ca.client.RefreshController() } -// isErrNotController returns `true` if the given error type unwraps to an -// `ErrNotController` response from Kafka -func isErrNotController(err error) bool { - return errors.Is(err, ErrNotController) +// isRetriableControllerError returns `true` if the given error type unwraps to +// an `ErrNotController` or `EOF` response from Kafka +func isRetriableControllerError(err error) bool { + return errors.Is(err, ErrNotController) || errors.Is(err, io.EOF) +} + +// isRetriableGroupCoordinatorError returns `true` if the given error type +// unwraps to an `ErrNotCoordinatorForConsumer`, +// `ErrConsumerCoordinatorNotAvailable` or `EOF` response from Kafka +func isRetriableGroupCoordinatorError(err error) bool { + return errors.Is(err, ErrNotCoordinatorForConsumer) || errors.Is(err, ErrConsumerCoordinatorNotAvailable) || errors.Is(err, io.EOF) } // retryOnError will repeatedly call the given (error-returning) func in the @@ -252,7 +268,7 @@ func (ca *clusterAdmin) CreateTopic(topic string, detail *TopicDetail, validateO request.Version = 1 } - return ca.retryOnError(isErrNotController, func() error { + return ca.retryOnError(isRetriableControllerError, func() error { b, err := ca.Controller() if err != nil { return err @@ -269,7 +285,7 @@ func (ca *clusterAdmin) CreateTopic(topic string, detail *TopicDetail, validateO } if !errors.Is(topicErr.Err, ErrNoError) { - if errors.Is(topicErr.Err, ErrNotController) { + if isRetriableControllerError(topicErr.Err) { _, _ = ca.refreshController() } return topicErr @@ -281,14 +297,14 @@ func (ca *clusterAdmin) CreateTopic(topic string, detail *TopicDetail, validateO func (ca *clusterAdmin) DescribeTopics(topics []string) (metadata []*TopicMetadata, err error) { var response *MetadataResponse - err = ca.retryOnError(isErrNotController, func() error { + err = ca.retryOnError(isRetriableControllerError, func() error { controller, err := ca.Controller() if err != nil { return err } request := NewMetadataRequest(ca.conf.Version, topics) response, err = controller.GetMetadata(request) - if isErrNotController(err) { + if isRetriableControllerError(err) { _, _ = ca.refreshController() } return err @@ -301,7 +317,7 @@ func (ca *clusterAdmin) DescribeTopics(topics []string) (metadata []*TopicMetada func (ca *clusterAdmin) DescribeCluster() (brokers []*Broker, controllerID int32, err error) { var response *MetadataResponse - err = ca.retryOnError(isErrNotController, func() error { + err = ca.retryOnError(isRetriableControllerError, func() error { controller, err := ca.Controller() if err != nil { return err @@ -309,7 +325,7 @@ func (ca *clusterAdmin) DescribeCluster() (brokers []*Broker, controllerID int32 request := NewMetadataRequest(ca.conf.Version, nil) response, err = controller.GetMetadata(request) - if isErrNotController(err) { + if isRetriableControllerError(err) { _, _ = ca.refreshController() } return err @@ -441,7 +457,7 @@ func (ca *clusterAdmin) DeleteTopic(topic string) error { request.Version = 1 } - return ca.retryOnError(isErrNotController, func() error { + return ca.retryOnError(isRetriableControllerError, func() error { b, err := ca.Controller() if err != nil { return err @@ -485,7 +501,7 @@ func (ca *clusterAdmin) CreatePartitions(topic string, count int32, assignment [ request.Version = 1 } - return ca.retryOnError(isErrNotController, func() error { + return ca.retryOnError(isRetriableControllerError, func() error { b, err := ca.Controller() if err != nil { return err @@ -526,7 +542,7 @@ func (ca *clusterAdmin) AlterPartitionReassignments(topic string, assignment [][ request.AddBlock(topic, int32(i), assignment[i]) } - return ca.retryOnError(isErrNotController, func() error { + return ca.retryOnError(isRetriableControllerError, func() error { b, err := ca.Controller() if err != nil { return err @@ -573,7 +589,7 @@ func (ca *clusterAdmin) ListPartitionReassignments(topic string, partitions []in request.AddBlock(topic, partitions) var rsp *ListPartitionReassignmentsResponse - err = ca.retryOnError(isErrNotController, func() error { + err = ca.retryOnError(isRetriableControllerError, func() error { b, err := ca.Controller() if err != nil { return err @@ -581,7 +597,7 @@ func (ca *clusterAdmin) ListPartitionReassignments(topic string, partitions []in _ = b.Open(ca.client.Config()) rsp, err = b.ListPartitionReassignments(request) - if isErrNotController(err) { + if isRetriableControllerError(err) { _, _ = ca.refreshController() } return err @@ -924,7 +940,7 @@ func (ca *clusterAdmin) ElectLeaders(electionType ElectionType, partitions map[s } var res *ElectLeadersResponse - err := ca.retryOnError(isErrNotController, func() error { + if err := ca.retryOnError(isRetriableControllerError, func() error { b, err := ca.Controller() if err != nil { return err @@ -932,12 +948,17 @@ func (ca *clusterAdmin) ElectLeaders(electionType ElectionType, partitions map[s _ = b.Open(ca.client.Config()) res, err = b.ElectLeaders(request) - if isErrNotController(err) { - _, _ = ca.refreshController() + if err != nil { + return err } - return err - }) - if err != nil { + if !errors.Is(res.ErrorCode, ErrNoError) { + if isRetriableControllerError(res.ErrorCode) { + _, _ = ca.refreshController() + } + return res.ErrorCode + } + return nil + }); err != nil { return nil, err } return res.ReplicaElectionResults, nil @@ -947,11 +968,11 @@ func (ca *clusterAdmin) DescribeConsumerGroups(groups []string) (result []*Group groupsPerBroker := make(map[*Broker][]string) for _, group := range groups { - controller, err := ca.client.Coordinator(group) + coordinator, err := ca.client.Coordinator(group) if err != nil { return nil, err } - groupsPerBroker[controller] = append(groupsPerBroker[controller], group) + groupsPerBroker[coordinator] = append(groupsPerBroker[coordinator], group) } for broker, brokerGroups := range groupsPerBroker { @@ -1043,22 +1064,36 @@ func (ca *clusterAdmin) ListConsumerGroups() (allGroups map[string]string, err e } func (ca *clusterAdmin) ListConsumerGroupOffsets(group string, topicPartitions map[string][]int32) (*OffsetFetchResponse, error) { - coordinator, err := ca.client.Coordinator(group) - if err != nil { - return nil, err - } - + var response *OffsetFetchResponse request := NewOffsetFetchRequest(ca.conf.Version, group, topicPartitions) + err := ca.retryOnError(isRetriableGroupCoordinatorError, func() (err error) { + defer func() { + if err != nil && isRetriableGroupCoordinatorError(err) { + _ = ca.client.RefreshCoordinator(group) + } + }() + + coordinator, err := ca.client.Coordinator(group) + if err != nil { + return err + } - return coordinator.FetchOffset(request) + response, err = coordinator.FetchOffset(request) + if err != nil { + return err + } + if !errors.Is(response.Err, ErrNoError) { + return response.Err + } + + return nil + }) + + return response, err } func (ca *clusterAdmin) DeleteConsumerGroupOffset(group string, topic string, partition int32) error { - coordinator, err := ca.client.Coordinator(group) - if err != nil { - return err - } - + var response *DeleteOffsetsResponse request := &DeleteOffsetsRequest{ Group: group, partitions: map[string][]int32{ @@ -1066,27 +1101,35 @@ func (ca *clusterAdmin) DeleteConsumerGroupOffset(group string, topic string, pa }, } - resp, err := coordinator.DeleteOffsets(request) - if err != nil { - return err - } + return ca.retryOnError(isRetriableGroupCoordinatorError, func() (err error) { + defer func() { + if err != nil && isRetriableGroupCoordinatorError(err) { + _ = ca.client.RefreshCoordinator(group) + } + }() - if !errors.Is(resp.ErrorCode, ErrNoError) { - return resp.ErrorCode - } + coordinator, err := ca.client.Coordinator(group) + if err != nil { + return err + } - if !errors.Is(resp.Errors[topic][partition], ErrNoError) { - return resp.Errors[topic][partition] - } - return nil + response, err = coordinator.DeleteOffsets(request) + if err != nil { + return err + } + if !errors.Is(response.ErrorCode, ErrNoError) { + return response.ErrorCode + } + if !errors.Is(response.Errors[topic][partition], ErrNoError) { + return response.Errors[topic][partition] + } + + return nil + }) } func (ca *clusterAdmin) DeleteConsumerGroup(group string) error { - coordinator, err := ca.client.Coordinator(group) - if err != nil { - return err - } - + var response *DeleteGroupsResponse request := &DeleteGroupsRequest{ Groups: []string{group}, } @@ -1094,21 +1137,34 @@ func (ca *clusterAdmin) DeleteConsumerGroup(group string) error { request.Version = 1 } - resp, err := coordinator.DeleteGroups(request) - if err != nil { - return err - } + return ca.retryOnError(isRetriableGroupCoordinatorError, func() (err error) { + defer func() { + if err != nil && isRetriableGroupCoordinatorError(err) { + _ = ca.client.RefreshCoordinator(group) + } + }() - groupErr, ok := resp.GroupErrorCodes[group] - if !ok { - return ErrIncompleteResponse - } + coordinator, err := ca.client.Coordinator(group) + if err != nil { + return err + } - if !errors.Is(groupErr, ErrNoError) { - return groupErr - } + response, err = coordinator.DeleteGroups(request) + if err != nil { + return err + } - return nil + groupErr, ok := response.GroupErrorCodes[group] + if !ok { + return ErrIncompleteResponse + } + + if !errors.Is(groupErr, ErrNoError) { + return groupErr + } + + return nil + }) } func (ca *clusterAdmin) DescribeLogDirs(brokerIds []int32) (allLogDirs map[int32][]DescribeLogDirsResponseDirMetadata, err error) { @@ -1206,7 +1262,7 @@ func (ca *clusterAdmin) AlterUserScramCredentials(u []AlterUserScramCredentialsU } var rsp *AlterUserScramCredentialsResponse - err := ca.retryOnError(isErrNotController, func() error { + err := ca.retryOnError(isRetriableControllerError, func() error { b, err := ca.Controller() if err != nil { return err @@ -1284,18 +1340,14 @@ func (ca *clusterAdmin) AlterClientQuotas(entity []QuotaEntityComponent, op Clie return nil } -func (ca *clusterAdmin) RemoveMemberFromConsumerGroup(groupId string, groupInstanceIds []string) (*LeaveGroupResponse, error) { +func (ca *clusterAdmin) RemoveMemberFromConsumerGroup(group string, groupInstanceIds []string) (*LeaveGroupResponse, error) { if !ca.conf.Version.IsAtLeast(V2_4_0_0) { return nil, ConfigurationError("Removing members from a consumer group headers requires Kafka version of at least v2.4.0") } - - controller, err := ca.client.Coordinator(groupId) - if err != nil { - return nil, err - } + var response *LeaveGroupResponse request := &LeaveGroupRequest{ Version: 3, - GroupId: groupId, + GroupId: group, } for _, instanceId := range groupInstanceIds { groupInstanceId := instanceId @@ -1303,5 +1355,28 @@ func (ca *clusterAdmin) RemoveMemberFromConsumerGroup(groupId string, groupInsta GroupInstanceId: &groupInstanceId, }) } - return controller.LeaveGroup(request) + err := ca.retryOnError(isRetriableGroupCoordinatorError, func() (err error) { + defer func() { + if err != nil && isRetriableGroupCoordinatorError(err) { + _ = ca.client.RefreshCoordinator(group) + } + }() + + coordinator, err := ca.client.Coordinator(group) + if err != nil { + return err + } + + response, err = coordinator.LeaveGroup(request) + if err != nil { + return err + } + if !errors.Is(response.Err, ErrNoError) { + return response.Err + } + + return nil + }) + + return response, err } diff --git a/vendor/github.com/IBM/sarama/balance_strategy.go b/vendor/github.com/IBM/sarama/balance_strategy.go index 30d41779c1e9b..b5bc30a13bdfc 100644 --- a/vendor/github.com/IBM/sarama/balance_strategy.go +++ b/vendor/github.com/IBM/sarama/balance_strategy.go @@ -989,6 +989,7 @@ func (p *partitionMovements) getTheActualPartitionToBeMoved(partition topicParti return reversePairPartition } +//nolint:unused // this is used but only in unittests as a helper (which are excluded by the integration build tag) func (p *partitionMovements) isLinked(src, dst string, pairs []consumerPair, currentPath []string) ([]string, bool) { if src == dst { return currentPath, false @@ -1023,6 +1024,7 @@ func (p *partitionMovements) isLinked(src, dst string, pairs []consumerPair, cur return currentPath, false } +//nolint:unused // this is used but only in unittests as a helper (which are excluded by the integration build tag) func (p *partitionMovements) in(cycle []string, cycles [][]string) bool { superCycle := make([]string, len(cycle)-1) for i := 0; i < len(cycle)-1; i++ { @@ -1037,6 +1039,7 @@ func (p *partitionMovements) in(cycle []string, cycles [][]string) bool { return false } +//nolint:unused // this is used but only in unittests as a helper (which are excluded by the integration build tag) func (p *partitionMovements) hasCycles(pairs []consumerPair) bool { cycles := make([][]string, 0) for _, pair := range pairs { @@ -1068,6 +1071,7 @@ func (p *partitionMovements) hasCycles(pairs []consumerPair) bool { return false } +//nolint:unused // this is used but only in unittests as a helper (which are excluded by the integration build tag) func (p *partitionMovements) isSticky() bool { for topic, movements := range p.PartitionMovementsByTopic { movementPairs := make([]consumerPair, len(movements)) @@ -1085,6 +1089,7 @@ func (p *partitionMovements) isSticky() bool { return true } +//nolint:unused // this is used but only in unittests as a helper (which are excluded by the integration build tag) func indexOfSubList(source []string, target []string) int { targetSize := len(target) maxCandidate := len(source) - targetSize diff --git a/vendor/github.com/IBM/sarama/create_topics_request.go b/vendor/github.com/IBM/sarama/create_topics_request.go index 8382d17c20a74..e8c0f01472c35 100644 --- a/vendor/github.com/IBM/sarama/create_topics_request.go +++ b/vendor/github.com/IBM/sarama/create_topics_request.go @@ -16,6 +16,21 @@ type CreateTopicsRequest struct { ValidateOnly bool } +func NewCreateTopicsRequest(version KafkaVersion, topicDetails map[string]*TopicDetail, timeout time.Duration) *CreateTopicsRequest { + r := &CreateTopicsRequest{ + TopicDetails: topicDetails, + Timeout: timeout, + } + if version.IsAtLeast(V2_0_0_0) { + r.Version = 3 + } else if version.IsAtLeast(V0_11_0_0) { + r.Version = 2 + } else if version.IsAtLeast(V0_10_2_0) { + r.Version = 1 + } + return r +} + func (c *CreateTopicsRequest) encode(pe packetEncoder) error { if err := pe.putArrayLength(len(c.TopicDetails)); err != nil { return err diff --git a/vendor/github.com/IBM/sarama/delete_topics_request.go b/vendor/github.com/IBM/sarama/delete_topics_request.go index 252c0d0259461..f38f32770be7b 100644 --- a/vendor/github.com/IBM/sarama/delete_topics_request.go +++ b/vendor/github.com/IBM/sarama/delete_topics_request.go @@ -8,6 +8,21 @@ type DeleteTopicsRequest struct { Timeout time.Duration } +func NewDeleteTopicsRequest(version KafkaVersion, topics []string, timeout time.Duration) *DeleteTopicsRequest { + d := &DeleteTopicsRequest{ + Topics: topics, + Timeout: timeout, + } + if version.IsAtLeast(V2_1_0_0) { + d.Version = 3 + } else if version.IsAtLeast(V2_0_0_0) { + d.Version = 2 + } else if version.IsAtLeast(V0_11_0_0) { + d.Version = 1 + } + return d +} + func (d *DeleteTopicsRequest) encode(pe packetEncoder) error { if err := pe.putStringArray(d.Topics); err != nil { return err diff --git a/vendor/github.com/IBM/sarama/docker-compose.yml b/vendor/github.com/IBM/sarama/docker-compose.yml index a0e3d2e21e925..1e66cca0ce5d4 100644 --- a/vendor/github.com/IBM/sarama/docker-compose.yml +++ b/vendor/github.com/IBM/sarama/docker-compose.yml @@ -1,6 +1,6 @@ services: zookeeper-1: - hostname: 'zookeeper-1' + container_name: 'zookeeper-1' image: 'docker.io/library/zookeeper:3.7.2' init: true restart: always @@ -13,7 +13,7 @@ services: ZOO_MAX_CLIENT_CNXNS: '0' ZOO_4LW_COMMANDS_WHITELIST: 'mntr,conf,ruok' zookeeper-2: - hostname: 'zookeeper-2' + container_name: 'zookeeper-2' image: 'docker.io/library/zookeeper:3.7.2' init: true restart: always @@ -26,7 +26,7 @@ services: ZOO_MAX_CLIENT_CNXNS: '0' ZOO_4LW_COMMANDS_WHITELIST: 'mntr,conf,ruok' zookeeper-3: - hostname: 'zookeeper-3' + container_name: 'zookeeper-3' image: 'docker.io/library/zookeeper:3.7.2' init: true restart: always @@ -39,7 +39,7 @@ services: ZOO_MAX_CLIENT_CNXNS: '0' ZOO_4LW_COMMANDS_WHITELIST: 'mntr,conf,ruok' kafka-1: - hostname: 'kafka-1' + container_name: 'kafka-1' image: 'sarama/fv-kafka-${KAFKA_VERSION:-3.6.2}' init: true build: @@ -74,6 +74,7 @@ services: KAFKA_CFG_INTER_BROKER_LISTENER_NAME: 'LISTENER_INTERNAL' KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP: 'LISTENER_INTERNAL:PLAINTEXT,LISTENER_LOCAL:PLAINTEXT' KAFKA_CFG_DEFAULT_REPLICATION_FACTOR: '2' + KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: '2' KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: '2' KAFKA_CFG_BROKER_ID: '1' KAFKA_CFG_BROKER_RACK: '1' @@ -85,7 +86,7 @@ services: KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 KAFKA_JVM_PERFORMANCE_OPTS: "-XX:+IgnoreUnrecognizedVMOptions" kafka-2: - hostname: 'kafka-2' + container_name: 'kafka-2' image: 'sarama/fv-kafka-${KAFKA_VERSION:-3.6.2}' init: true build: @@ -120,6 +121,7 @@ services: KAFKA_CFG_INTER_BROKER_LISTENER_NAME: 'LISTENER_INTERNAL' KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP: 'LISTENER_INTERNAL:PLAINTEXT,LISTENER_LOCAL:PLAINTEXT' KAFKA_CFG_DEFAULT_REPLICATION_FACTOR: '2' + KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: '2' KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: '2' KAFKA_CFG_BROKER_ID: '2' KAFKA_CFG_BROKER_RACK: '2' @@ -131,7 +133,7 @@ services: KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 KAFKA_JVM_PERFORMANCE_OPTS: "-XX:+IgnoreUnrecognizedVMOptions" kafka-3: - hostname: 'kafka-3' + container_name: 'kafka-3' image: 'sarama/fv-kafka-${KAFKA_VERSION:-3.6.2}' init: true build: @@ -166,6 +168,7 @@ services: KAFKA_CFG_INTER_BROKER_LISTENER_NAME: 'LISTENER_INTERNAL' KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP: 'LISTENER_INTERNAL:PLAINTEXT,LISTENER_LOCAL:PLAINTEXT' KAFKA_CFG_DEFAULT_REPLICATION_FACTOR: '2' + KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: '2' KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: '2' KAFKA_CFG_BROKER_ID: '3' KAFKA_CFG_BROKER_RACK: '3' @@ -177,7 +180,7 @@ services: KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 KAFKA_JVM_PERFORMANCE_OPTS: "-XX:+IgnoreUnrecognizedVMOptions" kafka-4: - hostname: 'kafka-4' + container_name: 'kafka-4' image: 'sarama/fv-kafka-${KAFKA_VERSION:-3.6.2}' init: true build: @@ -212,6 +215,7 @@ services: KAFKA_CFG_INTER_BROKER_LISTENER_NAME: 'LISTENER_INTERNAL' KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP: 'LISTENER_INTERNAL:PLAINTEXT,LISTENER_LOCAL:PLAINTEXT' KAFKA_CFG_DEFAULT_REPLICATION_FACTOR: '2' + KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: '2' KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: '2' KAFKA_CFG_BROKER_ID: '4' KAFKA_CFG_BROKER_RACK: '4' @@ -223,7 +227,7 @@ services: KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 KAFKA_JVM_PERFORMANCE_OPTS: "-XX:+IgnoreUnrecognizedVMOptions" kafka-5: - hostname: 'kafka-5' + container_name: 'kafka-5' image: 'sarama/fv-kafka-${KAFKA_VERSION:-3.6.2}' init: true build: @@ -258,6 +262,7 @@ services: KAFKA_CFG_INTER_BROKER_LISTENER_NAME: 'LISTENER_INTERNAL' KAFKA_CFG_LISTENER_SECURITY_PROTOCOL_MAP: 'LISTENER_INTERNAL:PLAINTEXT,LISTENER_LOCAL:PLAINTEXT' KAFKA_CFG_DEFAULT_REPLICATION_FACTOR: '2' + KAFKA_CFG_OFFSETS_TOPIC_REPLICATION_FACTOR: '2' KAFKA_CFG_TRANSACTION_STATE_LOG_REPLICATION_FACTOR: '2' KAFKA_CFG_BROKER_ID: '5' KAFKA_CFG_BROKER_RACK: '5' @@ -269,7 +274,7 @@ services: KAFKA_CFG_GROUP_INITIAL_REBALANCE_DELAY_MS: 0 KAFKA_JVM_PERFORMANCE_OPTS: "-XX:+IgnoreUnrecognizedVMOptions" toxiproxy: - hostname: 'toxiproxy' + container_name: 'toxiproxy' image: 'ghcr.io/shopify/toxiproxy:2.4.0' init: true healthcheck: diff --git a/vendor/github.com/IBM/sarama/errors.go b/vendor/github.com/IBM/sarama/errors.go index 2c431aecb05f0..842d302571ae7 100644 --- a/vendor/github.com/IBM/sarama/errors.go +++ b/vendor/github.com/IBM/sarama/errors.go @@ -304,7 +304,7 @@ func (err KError) Error() string { case ErrOffsetsLoadInProgress: return "kafka server: The coordinator is still loading offsets and cannot currently process requests" case ErrConsumerCoordinatorNotAvailable: - return "kafka server: Offset's topic has not yet been created" + return "kafka server: The coordinator is not available" case ErrNotCoordinatorForConsumer: return "kafka server: Request was for a consumer group that is not coordinated by this broker" case ErrInvalidTopic: diff --git a/vendor/github.com/IBM/sarama/server.properties b/vendor/github.com/IBM/sarama/server.properties new file mode 100644 index 0000000000000..21ba1c7d9c61b --- /dev/null +++ b/vendor/github.com/IBM/sarama/server.properties @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# +# This configuration file is intended for use in ZK-based mode, where Apache ZooKeeper is required. +# See kafka.server.KafkaConfig for additional details and defaults +# + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +broker.id=0 + +############################# Socket Server Settings ############################# + +# The address the socket server listens on. If not configured, the host name will be equal to the value of +# java.net.InetAddress.getCanonicalHostName(), with PLAINTEXT listener name, and port 9092. +# FORMAT: +# listeners = listener_name://host_name:port +# EXAMPLE: +# listeners = PLAINTEXT://your.host.name:9092 +#listeners=PLAINTEXT://:9092 + +# Listener name, hostname and port the broker will advertise to clients. +# If not set, it uses the value for "listeners". +#advertised.listeners=PLAINTEXT://your.host.name:9092 + +# Maps listener names to security protocols, the default is for them to be the same. See the config documentation for more details +#listener.security.protocol.map=PLAINTEXT:PLAINTEXT,SSL:SSL,SASL_PLAINTEXT:SASL_PLAINTEXT,SASL_SSL:SASL_SSL + +# The number of threads that the server uses for receiving requests from the network and sending responses to the network +num.network.threads=3 + +# The number of threads that the server uses for processing requests, which may include disk I/O +num.io.threads=8 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer.bytes=102400 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer.bytes=102400 + +# The maximum size of a request that the socket server will accept (protection against OOM) +socket.request.max.bytes=104857600 + + +############################# Log Basics ############################# + +# A comma separated list of directories under which to store log files +log.dirs=/tmp/kafka-logs + +# The default number of log partitions per topic. More partitions allow greater +# parallelism for consumption, but this will also result in more files across +# the brokers. +num.partitions=1 + +# The number of threads per data directory to be used for log recovery at startup and flushing at shutdown. +# This value is recommended to be increased for installations with data dirs located in RAID array. +num.recovery.threads.per.data.dir=1 + +############################# Internal Topic Settings ############################# +# The replication factor for the group metadata internal topics "__consumer_offsets" and "__transaction_state" +# For anything other than development testing, a value greater than 1 is recommended to ensure availability such as 3. +offsets.topic.replication.factor=1 +transaction.state.log.replication.factor=1 +transaction.state.log.min.isr=1 + +############################# Log Flush Policy ############################# + +# Messages are immediately written to the filesystem but by default we only fsync() to sync +# the OS cache lazily. The following configurations control the flush of data to disk. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data may be lost if you are not using replication. +# 2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush. +# 3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to excessive seeks. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +#log.flush.interval.messages=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +#log.flush.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion due to age +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log unless the remaining +# segments drop below log.retention.bytes. Functions independently of log.retention.hours. +#log.retention.bytes=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +#log.segment.bytes=1073741824 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.retention.check.interval.ms=300000 + +############################# Zookeeper ############################# + +# Zookeeper connection string (see zookeeper docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zookeeper.connect=localhost:2181 + +# Timeout in ms for connecting to zookeeper +zookeeper.connection.timeout.ms=18000 + + +############################# Group Coordinator Settings ############################# + +# The following configuration specifies the time, in milliseconds, that the GroupCoordinator will delay the initial consumer rebalance. +# The rebalance will be further delayed by the value of group.initial.rebalance.delay.ms as new members join the group, up to a maximum of max.poll.interval.ms. +# The default value for this is 3 seconds. +# We override this to 0 here as it makes for a better out-of-the-box experience for development and testing. +# However, in production environments the default value of 3 seconds is more suitable as this will help to avoid unnecessary, and potentially expensive, rebalances during application startup. +group.initial.rebalance.delay.ms=0 diff --git a/vendor/github.com/IBM/sarama/utils.go b/vendor/github.com/IBM/sarama/utils.go index d5b77e0d92b36..b0e1aceff14d4 100644 --- a/vendor/github.com/IBM/sarama/utils.go +++ b/vendor/github.com/IBM/sarama/utils.go @@ -206,6 +206,7 @@ var ( V3_8_0_0 = newKafkaVersion(3, 8, 0, 0) V3_8_1_0 = newKafkaVersion(3, 8, 1, 0) V3_9_0_0 = newKafkaVersion(3, 9, 0, 0) + V4_0_0_0 = newKafkaVersion(4, 0, 0, 0) SupportedVersions = []KafkaVersion{ V0_8_2_0, @@ -277,9 +278,10 @@ var ( V3_8_0_0, V3_8_1_0, V3_9_0_0, + V4_0_0_0, } MinVersion = V0_8_2_0 - MaxVersion = V3_9_0_0 + MaxVersion = V4_0_0_0 DefaultVersion = V2_1_0_0 // reduced set of protocol versions to matrix test diff --git a/vendor/modules.txt b/vendor/modules.txt index 5ff5397c6ec11..5040e9ea62e0e 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -263,8 +263,8 @@ github.com/IBM/ibm-cos-sdk-go/private/protocol/restxml github.com/IBM/ibm-cos-sdk-go/private/protocol/xml/xmlutil github.com/IBM/ibm-cos-sdk-go/service/s3 github.com/IBM/ibm-cos-sdk-go/service/s3/s3iface -# github.com/IBM/sarama v1.44.0 -## explicit; go 1.20 +# github.com/IBM/sarama v1.45.0 +## explicit; go 1.21 github.com/IBM/sarama # github.com/Masterminds/goutils v1.1.1 ## explicit From b6df82e9654fb8ae529fdc442ae2427971fa7955 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 8 Jan 2025 19:07:21 -0500 Subject: [PATCH 4/9] fix(deps): update module github.com/axiomhq/hyperloglog to v0.2.3 (#15641) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 +- .../axiomhq/hyperloglog/compressed.go | 51 +++++++++---------- .../axiomhq/hyperloglog/hyperloglog.go | 30 ++++++----- .../github.com/axiomhq/hyperloglog/sparse.go | 25 ++++----- vendor/modules.txt | 2 +- 6 files changed, 58 insertions(+), 56 deletions(-) diff --git a/go.mod b/go.mod index f13f5b9f6be1d..dd11345babaea 100644 --- a/go.mod +++ b/go.mod @@ -116,7 +116,7 @@ require ( github.com/DmitriyVTitov/size v1.5.0 github.com/IBM/go-sdk-core/v5 v5.18.3 github.com/IBM/ibm-cos-sdk-go v1.12.0 - github.com/axiomhq/hyperloglog v0.2.2 + github.com/axiomhq/hyperloglog v0.2.3 github.com/buger/jsonparser v1.1.1 github.com/d4l3k/messagediff v1.2.1 github.com/dolthub/swiss v0.2.1 diff --git a/go.sum b/go.sum index 15cb39121db77..85319be119f5d 100644 --- a/go.sum +++ b/go.sum @@ -222,8 +222,8 @@ github.com/aws/aws-sdk-go-v2/service/sts v1.32.4 h1:yDxvkz3/uOKfxnv8YhzOi9m+2OGI github.com/aws/aws-sdk-go-v2/service/sts v1.32.4/go.mod h1:9XEUty5v5UAsMiFOBJrNibZgwCeOma73jgGwwhgffa8= github.com/aws/smithy-go v1.22.1 h1:/HPHZQ0g7f4eUeK6HKglFz8uwVfZKgoI25rb/J+dnro= github.com/aws/smithy-go v1.22.1/go.mod h1:irrKGvNn1InZwb2d7fkIRNucdfwR8R+Ts3wxYa/cJHg= -github.com/axiomhq/hyperloglog v0.2.2 h1:9X9rOdYx82zXKgd1aMsDZNUw3d7DKAHhd2J305HZPA8= -github.com/axiomhq/hyperloglog v0.2.2/go.mod h1:DLUK9yIzpU5B6YFLjxTIcbHu1g4Y1WQb1m5RH3radaM= +github.com/axiomhq/hyperloglog v0.2.3 h1:2ZGwz3FGcx77e9/aNjqJijsGhH6RZOlglzxnDpVBCQY= +github.com/axiomhq/hyperloglog v0.2.3/go.mod h1:DLUK9yIzpU5B6YFLjxTIcbHu1g4Y1WQb1m5RH3radaM= github.com/baidubce/bce-sdk-go v0.9.212 h1:B3PUoaFi4m13wP7gWObznjPLZ5umQ1BHjO/UoSsj3x4= github.com/baidubce/bce-sdk-go v0.9.212/go.mod h1:zbYJMQwE4IZuyrJiFO8tO8NbtYiKTFTbwh4eIsqjVdg= github.com/bboreham/go-loser v0.0.0-20230920113527-fcc2c21820a3 h1:6df1vn4bBlDDo4tARvBm7l6KA9iVMnE3NWizDeWSrps= diff --git a/vendor/github.com/axiomhq/hyperloglog/compressed.go b/vendor/github.com/axiomhq/hyperloglog/compressed.go index d140b88c16cdc..865901bbc690d 100644 --- a/vendor/github.com/axiomhq/hyperloglog/compressed.go +++ b/vendor/github.com/axiomhq/hyperloglog/compressed.go @@ -1,6 +1,9 @@ package hyperloglog -import "encoding/binary" +import ( + "encoding/binary" + "slices" +) // Original author of this file is github.com/clarkduvall/hyperloglog type iterable interface { @@ -52,32 +55,26 @@ func (v *compressedList) Clone() *compressedList { return newV } -func (v *compressedList) MarshalBinary() (data []byte, err error) { - // Marshal the variableLengthList - bdata, err := v.b.MarshalBinary() - if err != nil { - return nil, err - } - - // At least 4 bytes for the two fixed sized values plus the size of bdata. - data = make([]byte, 0, 4+4+len(bdata)) +func (v *compressedList) AppendBinary(data []byte) ([]byte, error) { + // At least 4 bytes for the two fixed sized values + data = slices.Grow(data, 4+4) // Marshal the count and last values. - data = append(data, []byte{ + data = append(data, // Number of items in the list. - byte(v.count >> 24), - byte(v.count >> 16), - byte(v.count >> 8), + byte(v.count>>24), + byte(v.count>>16), + byte(v.count>>8), byte(v.count), // The last item in the list. - byte(v.last >> 24), - byte(v.last >> 16), - byte(v.last >> 8), + byte(v.last>>24), + byte(v.last>>16), + byte(v.last>>8), byte(v.last), - }...) + ) - // Append the list - return append(data, bdata...), nil + // Append the variableLengthList + return v.b.AppendBinary(data) } func (v *compressedList) UnmarshalBinary(data []byte) error { @@ -130,20 +127,20 @@ func (v *compressedList) Iter() *iterator { type variableLengthList []uint8 -func (v variableLengthList) MarshalBinary() (data []byte, err error) { +func (v variableLengthList) AppendBinary(data []byte) ([]byte, error) { // 4 bytes for the size of the list, and a byte for each element in the // list. - data = make([]byte, 0, 4+v.Len()) + data = slices.Grow(data, 4+v.Len()) // Length of the list. We only need 32 bits because the size of the set // couldn't exceed that on 32 bit architectures. sz := v.Len() - data = append(data, []byte{ - byte(sz >> 24), - byte(sz >> 16), - byte(sz >> 8), + data = append(data, + byte(sz>>24), + byte(sz>>16), + byte(sz>>8), byte(sz), - }...) + ) // Marshal each element in the list. for i := 0; i < sz; i++ { diff --git a/vendor/github.com/axiomhq/hyperloglog/hyperloglog.go b/vendor/github.com/axiomhq/hyperloglog/hyperloglog.go index 24b39e43562aa..f4fbb01e96592 100644 --- a/vendor/github.com/axiomhq/hyperloglog/hyperloglog.go +++ b/vendor/github.com/axiomhq/hyperloglog/hyperloglog.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "math" + "slices" "sort" ) @@ -203,8 +204,16 @@ func (sk *Sketch) mergeSparse() { } // MarshalBinary implements the encoding.BinaryMarshaler interface. +// +// When the result will be appended to another buffer, consider using +// AppendBinary to avoid additional allocations and copying. func (sk *Sketch) MarshalBinary() (data []byte, err error) { - data = make([]byte, 0, 8+len(sk.regs)) + return sk.AppendBinary(nil) +} + +// AppendBinary implements the encoding.BinaryAppender interface. +func (sk *Sketch) AppendBinary(data []byte) ([]byte, error) { + data = slices.Grow(data, 8+len(sk.regs)) // Marshal a version marker. data = append(data, version) // Marshal p. @@ -217,18 +226,13 @@ func (sk *Sketch) MarshalBinary() (data []byte, err error) { data = append(data, byte(1)) // Add the tmp_set - tsdata, err := sk.tmpSet.MarshalBinary() + data, err := sk.tmpSet.AppendBinary(data) if err != nil { return nil, err } - data = append(data, tsdata...) // Add the sparse Sketch - sdata, err := sk.sparseList.MarshalBinary() - if err != nil { - return nil, err - } - return append(data, sdata...), nil + return sk.sparseList.AppendBinary(data) } // It's using the dense Sketch. @@ -236,12 +240,12 @@ func (sk *Sketch) MarshalBinary() (data []byte, err error) { // Add the dense sketch Sketch. sz := len(sk.regs) - data = append(data, []byte{ - byte(sz >> 24), - byte(sz >> 16), - byte(sz >> 8), + data = append(data, + byte(sz>>24), + byte(sz>>16), + byte(sz>>8), byte(sz), - }...) + ) // Marshal each element in the list. for _, v := range sk.regs { diff --git a/vendor/github.com/axiomhq/hyperloglog/sparse.go b/vendor/github.com/axiomhq/hyperloglog/sparse.go index 0151740df9859..ca2b939516664 100644 --- a/vendor/github.com/axiomhq/hyperloglog/sparse.go +++ b/vendor/github.com/axiomhq/hyperloglog/sparse.go @@ -2,6 +2,7 @@ package hyperloglog import ( "math/bits" + "slices" "github.com/kamstrup/intmap" ) @@ -83,29 +84,29 @@ func (s *set) Clone() *set { return &set{m: newS} } -func (s *set) MarshalBinary() (data []byte, err error) { +func (s *set) AppendBinary(data []byte) ([]byte, error) { // 4 bytes for the size of the set, and 4 bytes for each key. // list. - data = make([]byte, 0, 4+(4*s.m.Len())) + data = slices.Grow(data, 4+(4*s.m.Len())) // Length of the set. We only need 32 bits because the size of the set // couldn't exceed that on 32 bit architectures. sl := s.m.Len() - data = append(data, []byte{ - byte(sl >> 24), - byte(sl >> 16), - byte(sl >> 8), + data = append(data, + byte(sl>>24), + byte(sl>>16), + byte(sl>>8), byte(sl), - }...) + ) // Marshal each element in the set. s.m.ForEach(func(k uint32) bool { - data = append(data, []byte{ - byte(k >> 24), - byte(k >> 16), - byte(k >> 8), + data = append(data, + byte(k>>24), + byte(k>>16), + byte(k>>8), byte(k), - }...) + ) return true }) diff --git a/vendor/modules.txt b/vendor/modules.txt index 5040e9ea62e0e..a22ba57b6d10c 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -487,7 +487,7 @@ github.com/aws/smithy-go/time github.com/aws/smithy-go/tracing github.com/aws/smithy-go/transport/http github.com/aws/smithy-go/transport/http/internal/io -# github.com/axiomhq/hyperloglog v0.2.2 +# github.com/axiomhq/hyperloglog v0.2.3 ## explicit; go 1.23 github.com/axiomhq/hyperloglog # github.com/baidubce/bce-sdk-go v0.9.212 From 74885a20c735e8cd02bb590336bb7ae81c54bb33 Mon Sep 17 00:00:00 2001 From: "renovate[bot]" <29139614+renovate[bot]@users.noreply.github.com> Date: Wed, 8 Jan 2025 19:08:04 -0500 Subject: [PATCH 5/9] fix(deps): update module google.golang.org/protobuf to v1.36.2 (#15635) Co-authored-by: renovate[bot] <29139614+renovate[bot]@users.noreply.github.com> --- go.mod | 2 +- go.sum | 4 ++-- .../protobuf/internal/impl/message_opaque.go | 24 ++++++++++++++++--- .../protobuf/internal/version/version.go | 2 +- vendor/modules.txt | 2 +- 5 files changed, 26 insertions(+), 8 deletions(-) diff --git a/go.mod b/go.mod index dd11345babaea..ef312d46309fa 100644 --- a/go.mod +++ b/go.mod @@ -147,7 +147,7 @@ require ( go4.org/netipx v0.0.0-20230125063823-8449b0a6169f golang.org/x/oauth2 v0.25.0 golang.org/x/text v0.21.0 - google.golang.org/protobuf v1.36.1 + google.golang.org/protobuf v1.36.2 gotest.tools v2.2.0+incompatible k8s.io/apimachinery v0.32.0 k8s.io/utils v0.0.0-20241104163129-6fe5fd82f078 diff --git a/go.sum b/go.sum index 85319be119f5d..0452a1696821f 100644 --- a/go.sum +++ b/go.sum @@ -1653,8 +1653,8 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.27.1/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= -google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk= -google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.2 h1:R8FeyR1/eLmkutZOM5CWghmo5itiG9z0ktFlTVLuTmU= +google.golang.org/protobuf v1.36.2/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/alecthomas/kingpin.v2 v2.2.6/go.mod h1:FMv+mEhP44yOT+4EoQTLFTRgOQ1FBLkstjWtayDeSgw= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/vendor/google.golang.org/protobuf/internal/impl/message_opaque.go b/vendor/google.golang.org/protobuf/internal/impl/message_opaque.go index d407dd791e89f..d7ec53f074ac4 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/message_opaque.go +++ b/vendor/google.golang.org/protobuf/internal/impl/message_opaque.go @@ -88,9 +88,7 @@ func opaqueInitHook(mi *MessageInfo) bool { mi.oneofs = map[protoreflect.Name]*oneofInfo{} for i := 0; i < mi.Desc.Oneofs().Len(); i++ { od := mi.Desc.Oneofs().Get(i) - if !od.IsSynthetic() { - mi.oneofs[od.Name()] = makeOneofInfo(od, si.structInfo, mi.Exporter) - } + mi.oneofs[od.Name()] = makeOneofInfoOpaque(mi, od, si.structInfo, mi.Exporter) } mi.denseFields = make([]*fieldInfo, fds.Len()*2) @@ -119,6 +117,26 @@ func opaqueInitHook(mi *MessageInfo) bool { return true } +func makeOneofInfoOpaque(mi *MessageInfo, od protoreflect.OneofDescriptor, si structInfo, x exporter) *oneofInfo { + oi := &oneofInfo{oneofDesc: od} + if od.IsSynthetic() { + fd := od.Fields().Get(0) + index, _ := presenceIndex(mi.Desc, fd) + oi.which = func(p pointer) protoreflect.FieldNumber { + if p.IsNil() { + return 0 + } + if !mi.present(p, index) { + return 0 + } + return od.Fields().Get(0).Number() + } + return oi + } + // Dispatch to non-opaque oneof implementation for non-synthetic oneofs. + return makeOneofInfo(od, si, x) +} + func (mi *MessageInfo) fieldInfoForMapOpaque(si opaqueStructInfo, fd protoreflect.FieldDescriptor, fs reflect.StructField) fieldInfo { ft := fs.Type if ft.Kind() != reflect.Map { diff --git a/vendor/google.golang.org/protobuf/internal/version/version.go b/vendor/google.golang.org/protobuf/internal/version/version.go index 3018450df7998..386c823aa641c 100644 --- a/vendor/google.golang.org/protobuf/internal/version/version.go +++ b/vendor/google.golang.org/protobuf/internal/version/version.go @@ -52,7 +52,7 @@ import ( const ( Major = 1 Minor = 36 - Patch = 1 + Patch = 2 PreRelease = "" ) diff --git a/vendor/modules.txt b/vendor/modules.txt index a22ba57b6d10c..bf14fd4052559 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -2184,7 +2184,7 @@ google.golang.org/grpc/xds/internal/xdsclient/xdsresource/version ## explicit; go 1.21 google.golang.org/grpc/stats/opentelemetry google.golang.org/grpc/stats/opentelemetry/internal -# google.golang.org/protobuf v1.36.1 +# google.golang.org/protobuf v1.36.2 ## explicit; go 1.21 google.golang.org/protobuf/encoding/protodelim google.golang.org/protobuf/encoding/protojson From e347eb7c803d573e56b4730bbaeb83bbd4a6596d Mon Sep 17 00:00:00 2001 From: matthewhudsonedb <94822000+matthewhudsonedb@users.noreply.github.com> Date: Thu, 9 Jan 2025 18:30:54 +1100 Subject: [PATCH 6/9] fix: Fix loki ruler generator url left parameter url encoding (#15601) The generate URL `left` parameter is not URL encoded. The double quotes in str variable around "queries" break hyperlinking. --- pkg/ruler/base/ruler.go | 6 +++--- pkg/ruler/base/ruler_test.go | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pkg/ruler/base/ruler.go b/pkg/ruler/base/ruler.go index adb1a7c136def..4e466810570b1 100644 --- a/pkg/ruler/base/ruler.go +++ b/pkg/ruler/base/ruler.go @@ -407,9 +407,9 @@ func grafanaLinkForExpression(expr, datasourceUID string) string { } marshaledExpression, _ := json.Marshal(exprStruct) - escapedExpression := url.QueryEscape(string(marshaledExpression)) - str := `/explore?left={"queries":[%s]}` - return fmt.Sprintf(str, escapedExpression) + params := url.Values{} + params.Set("left", fmt.Sprintf(`{"queries":[%s]}`, marshaledExpression)) + return `/explore?` + params.Encode() } // SendAlerts implements a rules.NotifyFunc for a Notifier. diff --git a/pkg/ruler/base/ruler_test.go b/pkg/ruler/base/ruler_test.go index bdb437ed9279d..6636f742456a2 100644 --- a/pkg/ruler/base/ruler_test.go +++ b/pkg/ruler/base/ruler_test.go @@ -1733,7 +1733,7 @@ func TestSendAlerts(t *testing.T) { Annotations: []labels.Label{{Name: "a2", Value: "v2"}}, StartsAt: time.Unix(2, 0), EndsAt: time.Unix(3, 0), - GeneratorURL: fmt.Sprintf("http://localhost:8080/explore?left={\"queries\":[%s]}", escapedExpression), + GeneratorURL: fmt.Sprintf("http://localhost:8080/explore?left=%%7B%%22queries%%22%%3A%%5B%s%%5D%%7D", escapedExpression), }, }, }, @@ -1753,7 +1753,7 @@ func TestSendAlerts(t *testing.T) { Annotations: []labels.Label{{Name: "a2", Value: "v2"}}, StartsAt: time.Unix(2, 0), EndsAt: time.Unix(4, 0), - GeneratorURL: fmt.Sprintf("http://localhost:8080/explore?left={\"queries\":[%s]}", escapedExpression), + GeneratorURL: fmt.Sprintf("http://localhost:8080/explore?left=%%7B%%22queries%%22%%3A%%5B%s%%5D%%7D", escapedExpression), }, }, }, From 5f476a39e130a22dc5b641af84a0c902f83a4ad3 Mon Sep 17 00:00:00 2001 From: jackyin Date: Thu, 9 Jan 2025 18:25:03 +0800 Subject: [PATCH 7/9] fix: Fix goroutine leak in queryrange downstreamer (#15665) --- pkg/querier/queryrange/downstreamer.go | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/pkg/querier/queryrange/downstreamer.go b/pkg/querier/queryrange/downstreamer.go index c6fba0fbf49a1..b2aa50c292154 100644 --- a/pkg/querier/queryrange/downstreamer.go +++ b/pkg/querier/queryrange/downstreamer.go @@ -170,10 +170,12 @@ func (in instance) For( go func() { err := concurrency.ForEachJob(ctx, len(queries), in.parallelism, func(ctx context.Context, i int) error { res, err := fn(queries[i]) + if err != nil { + return err + } response := logql.Resp{ I: i, Res: res, - Err: err, } // Feed the result into the channel unless the work has completed. @@ -181,7 +183,7 @@ func (in instance) For( case <-ctx.Done(): case ch <- response: } - return err + return nil }) if err != nil { ch <- logql.Resp{ @@ -192,15 +194,19 @@ func (in instance) For( close(ch) }() + var err error for resp := range ch { - if resp.Err != nil { - return nil, resp.Err + if err != nil { + continue } - if err := acc.Accumulate(ctx, resp.Res, resp.I); err != nil { - return nil, err + if resp.Err != nil { + err = resp.Err + continue } + err = acc.Accumulate(ctx, resp.Res, resp.I) } - return acc.Result(), nil + + return acc.Result(), err } // convert to matrix From bef20431cbbf302e584c4eea2eb423537bcf86e7 Mon Sep 17 00:00:00 2001 From: Karsten Jeschkies Date: Thu, 9 Jan 2025 11:27:23 +0100 Subject: [PATCH 8/9] perf(approx_topk): Reduce memory usage of HyperLogLog in approx_topk. (#15559) The count min sketch data structure backing the new approx_topk aggregation uses HyperLogLog (HLL) to track the actual cardinality of the aggregated vector. We were using the sparse version of the HLL. However, that resulted in memory and allocation overhead. --- pkg/logql/count_min_sketch.go | 7 +++++-- pkg/logql/log/labels.go | 5 ++++- pkg/logql/log/parser_test.go | 14 +++++++++++++- pkg/logql/sketch/cms.go | 2 +- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/pkg/logql/count_min_sketch.go b/pkg/logql/count_min_sketch.go index e24e089ad307b..927ff6ff772e7 100644 --- a/pkg/logql/count_min_sketch.go +++ b/pkg/logql/count_min_sketch.go @@ -3,6 +3,8 @@ package logql import ( "container/heap" "fmt" + "slices" + "strings" "github.com/axiomhq/hyperloglog" "github.com/cespare/xxhash/v2" @@ -187,16 +189,17 @@ func NewHeapCountMinSketchVector(ts int64, metricsLength, maxLabels int) HeapCou } func (v *HeapCountMinSketchVector) Add(metric labels.Labels, value float64) { + slices.SortFunc(metric, func(a, b labels.Label) int { return strings.Compare(a.Name, b.Name) }) v.buffer = metric.Bytes(v.buffer) v.F.Add(v.buffer, value) - // Add our metric if we haven't seen it - // TODO(karsten): There is a chance that the ids match but not the labels due to hash collision. Ideally there's // an else block the compares the series labels. However, that's not trivial. Besides, instance.Series has the // same issue in its deduping logic. id := xxhash.Sum64(v.buffer) + + // Add our metric if we haven't seen it if _, ok := v.observed[id]; !ok { heap.Push(v, metric) v.observed[id] = struct{}{} diff --git a/pkg/logql/log/labels.go b/pkg/logql/log/labels.go index c5ef408cc21ed..9e494f99237d4 100644 --- a/pkg/logql/log/labels.go +++ b/pkg/logql/log/labels.go @@ -2,7 +2,9 @@ package log import ( "fmt" + "slices" "sort" + "strings" "sync" "github.com/prometheus/prometheus/model/labels" @@ -585,7 +587,8 @@ func (b *LabelsBuilder) LabelsResult() LabelsResult { // Get all labels at once and sort them b.buf = b.UnsortedLabels(b.buf) - sort.Sort(b.buf) + // sort.Sort(b.buf) + slices.SortFunc(b.buf, func(a, b labels.Label) int { return strings.Compare(a.Name, b.Name) }) hash := b.hasher.Hash(b.buf) if cached, ok := b.resultCache[hash]; ok { diff --git a/pkg/logql/log/parser_test.go b/pkg/logql/log/parser_test.go index 5ac3a87503634..af332c8cb54c7 100644 --- a/pkg/logql/log/parser_test.go +++ b/pkg/logql/log/parser_test.go @@ -241,25 +241,32 @@ func (p *fakeParseHints) ShouldExtract(key string) bool { p.checkCount++ return key == p.label || p.extractAll } + func (p *fakeParseHints) ShouldExtractPrefix(prefix string) bool { return prefix == p.label || p.extractAll } + func (p *fakeParseHints) NoLabels() bool { return false } + func (p *fakeParseHints) RecordExtracted(_ string) { p.count++ } + func (p *fakeParseHints) AllRequiredExtracted() bool { return !p.extractAll && p.count == 1 } + func (p *fakeParseHints) Reset() { p.checkCount = 0 p.count = 0 } + func (p *fakeParseHints) PreserveError() bool { return false } + func (p *fakeParseHints) ShouldContinueParsingLine(_ string, _ *LabelsBuilder) bool { return p.keepGoing } @@ -656,30 +663,36 @@ func Benchmark_Parser(b *testing.B) { b.Run(tt.name, func(b *testing.B) { line := []byte(tt.line) b.Run("no labels hints", func(b *testing.B) { + b.ReportAllocs() builder := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash()) for n := 0; n < b.N; n++ { builder.Reset() _, _ = tt.s.Process(0, line, builder) + builder.LabelsResult() } }) b.Run("labels hints", func(b *testing.B) { + b.ReportAllocs() builder := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash()) builder.parserKeyHints = NewParserHint(tt.LabelParseHints, tt.LabelParseHints, false, false, "", nil) for n := 0; n < b.N; n++ { builder.Reset() _, _ = tt.s.Process(0, line, builder) + builder.LabelsResult() } }) b.Run("inline stages", func(b *testing.B) { + b.ReportAllocs() stages := []Stage{NewStringLabelFilter(tt.LabelFilterParseHint)} builder := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash()) builder.parserKeyHints = NewParserHint(nil, nil, false, false, ", nil", stages) for n := 0; n < b.N; n++ { builder.Reset() _, _ = tt.s.Process(0, line, builder) + builder.LabelsResult() } }) }) @@ -1251,7 +1264,6 @@ func TestXExpressionParserFailures(t *testing.T) { }, } for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { _, err := NewLogfmtExpressionParser([]LabelExtractionExpr{tt.expression}, false) diff --git a/pkg/logql/sketch/cms.go b/pkg/logql/sketch/cms.go index b510ee8504ea0..67f72be976c19 100644 --- a/pkg/logql/sketch/cms.go +++ b/pkg/logql/sketch/cms.go @@ -19,7 +19,7 @@ func NewCountMinSketch(w, d uint32) (*CountMinSketch, error) { Depth: d, Width: w, Counters: make2dslice(w, d), - HyperLogLog: hyperloglog.New16(), + HyperLogLog: hyperloglog.New16NoSparse(), }, nil } From b5c627b81360d7042fd04e4d1827b9ef79ac70e0 Mon Sep 17 00:00:00 2001 From: Ashwanth Date: Thu, 9 Jan 2025 19:32:14 +0530 Subject: [PATCH 9/9] chore(blockbuilder): mark a job as complete only after stopping sync updates (#15670) --- docs/sources/shared/configuration.md | 4 ---- pkg/blockbuilder/builder/builder.go | 14 +++++++++----- pkg/blockbuilder/scheduler/queue.go | 1 + pkg/blockbuilder/scheduler/scheduler.go | 2 -- 4 files changed, 10 insertions(+), 11 deletions(-) diff --git a/docs/sources/shared/configuration.md b/docs/sources/shared/configuration.md index 91b560e106c87..82b5294daa191 100644 --- a/docs/sources/shared/configuration.md +++ b/docs/sources/shared/configuration.md @@ -210,10 +210,6 @@ block_builder: [scheduler_grpc_client_config: ] block_scheduler: - # Consumer group used by block scheduler to track the last consumed offset. - # CLI flag: -block-scheduler.consumer-group - [consumer_group: | default = "block-scheduler"] - # How often the scheduler should plan jobs. # CLI flag: -block-scheduler.interval [interval: | default = 15m] diff --git a/pkg/blockbuilder/builder/builder.go b/pkg/blockbuilder/builder/builder.go index 61ff6ffcbc782..018ccc1365441 100644 --- a/pkg/blockbuilder/builder/builder.go +++ b/pkg/blockbuilder/builder/builder.go @@ -279,6 +279,12 @@ func (i *BlockBuilder) runOne(ctx context.Context, c *kgo.Client, workerID strin completion.Success = false } + // remove from inflight jobs to stop sending sync requests + i.jobsMtx.Lock() + delete(i.inflightJobs, job.ID()) + i.metrics.inflightJobs.Set(float64(len(i.inflightJobs))) + i.jobsMtx.Unlock() + if _, err := withBackoff( ctx, i.cfg.Backoff, @@ -292,16 +298,12 @@ func (i *BlockBuilder) runOne(ctx context.Context, c *kgo.Client, workerID strin return true, err } - i.jobsMtx.Lock() - delete(i.inflightJobs, job.ID()) - i.metrics.inflightJobs.Set(float64(len(i.inflightJobs))) - i.jobsMtx.Unlock() - return true, err } func (i *BlockBuilder) processJob(ctx context.Context, c *kgo.Client, job *types.Job, logger log.Logger) (lastOffsetConsumed int64, err error) { level.Debug(logger).Log("msg", "beginning job") + start := time.Now() indexer := newTsdbCreator() appender := newAppender(i.id, @@ -505,6 +507,8 @@ func (i *BlockBuilder) processJob(ctx context.Context, c *kgo.Client, job *types level.Info(logger).Log( "msg", "successfully processed job", "last_offset", lastOffset, + "duration", time.Since(start), + "records", lastOffset-job.Offsets().Min, ) return lastOffset, nil diff --git a/pkg/blockbuilder/scheduler/queue.go b/pkg/blockbuilder/scheduler/queue.go index 019ca61a0c487..4758561fd6c2b 100644 --- a/pkg/blockbuilder/scheduler/queue.go +++ b/pkg/blockbuilder/scheduler/queue.go @@ -332,6 +332,7 @@ func (q *JobQueue) SyncJob(jobID string, job *types.Job) { case types.JobStatusInProgress: case types.JobStatusComplete, types.JobStatusFailed, types.JobStatusExpired: // Job already completed, re-enqueue a new one + level.Warn(q.logger).Log("msg", "job already completed, re-enqueuing", "job", jobID, "status", jobMeta.Status) registerInProgress() return default: diff --git a/pkg/blockbuilder/scheduler/scheduler.go b/pkg/blockbuilder/scheduler/scheduler.go index d4e8c0935ce68..55ecc066dc582 100644 --- a/pkg/blockbuilder/scheduler/scheduler.go +++ b/pkg/blockbuilder/scheduler/scheduler.go @@ -26,7 +26,6 @@ var ( ) type Config struct { - ConsumerGroup string `yaml:"consumer_group"` Interval time.Duration `yaml:"interval"` LookbackPeriod time.Duration `yaml:"lookback_period"` Strategy string `yaml:"strategy"` @@ -36,7 +35,6 @@ type Config struct { func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.DurationVar(&cfg.Interval, prefix+"interval", 15*time.Minute, "How often the scheduler should plan jobs.") - f.StringVar(&cfg.ConsumerGroup, prefix+"consumer-group", "block-scheduler", "Consumer group used by block scheduler to track the last consumed offset.") f.DurationVar(&cfg.LookbackPeriod, prefix+"lookback-period", 0, "Lookback period used by the scheduler to plan jobs when the consumer group has no commits. 0 consumes from the start of the partition.") f.StringVar( &cfg.Strategy,