Skip to content

Commit

Permalink
chore: add official document around auto discovery (#269)
Browse files Browse the repository at this point in the history
  • Loading branch information
ZhiHanZ authored Sep 6, 2024
1 parent 19245ea commit bab24f4
Show file tree
Hide file tree
Showing 7 changed files with 84 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ public final class ConnectionProperties {
public static final ConnectionProperty<Integer> MAX_FAILOVER_RETRY = new MaxFailoverRetry();
public static final ConnectionProperty<String> LOAD_BALANCING_POLICY = new LoadBalancingPolicy();
public static final ConnectionProperty<Boolean> AUTO_DISCOVERY = new AutoDiscovery();
public static final ConnectionProperty<Integer> NODE_DISCOVERY_INTERVAL = new NodeDiscoveryInterval();
public static final ConnectionProperty<Boolean> ENABLE_MOCK = new EnableMock();
public static final ConnectionProperty<String> DATABASE = new Database();
public static final ConnectionProperty<String> ACCESS_TOKEN = new AccessToken();
Expand Down Expand Up @@ -162,6 +163,12 @@ public AutoDiscovery() {
}
}

private static class NodeDiscoveryInterval extends AbstractConnectionProperty<Integer> {
public NodeDiscoveryInterval() {
super("node_discovery_interval", Optional.of("300000"), NOT_REQUIRED, ALLOWED, INTEGER_CONVERTER);
}
}

private static class EnableMock extends AbstractConnectionProperty<Boolean> {
public EnableMock() {
super("enable_mock", Optional.of("false"), NOT_REQUIRED, ALLOWED, BOOLEAN_CONVERTER);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public final class DatabendDriverUri {
private final Integer waitTimeSecs;
private final Integer maxRowsInBuffer;
private final Integer maxRowsPerPage;
private final int nodeDiscoveryInterval;

// private final boolean useSecureConnection;

Expand All @@ -81,12 +82,14 @@ private DatabendDriverUri(String url, Properties driverProperties)
this.tenant = TENANT.getValue(properties).orElse("");
this.maxFailoverRetry = MAX_FAILOVER_RETRY.getValue(properties).orElse(0);
this.autoDiscovery = AUTO_DISCOVERY.getValue(properties).orElse(false);
this.nodeDiscoveryInterval = NODE_DISCOVERY_INTERVAL.getValue(properties).orElse(5 * 60 * 1000);
List<URI> finalUris = canonicalizeUris(uris, this.useSecureConnection, this.sslmode);
DatabendClientLoadBalancingPolicy policy = DatabendClientLoadBalancingPolicy.create(LOAD_BALANCING_POLICY.getValue(properties).orElse(DatabendClientLoadBalancingPolicy.DISABLED));
DatabendNodes nodes = uriAndProperties.getKey();
nodes.updateNodes(finalUris);
nodes.updatePolicy(policy);
nodes.setSSL(this.useSecureConnection, this.sslmode);
nodes.setDiscoveryInterval(this.nodeDiscoveryInterval);
this.nodes = nodes;
this.database = DATABASE.getValue(properties).orElse("default");
this.presignedUrlDisabled = PRESIGNED_URL_DISABLED.getRequiredValue(properties);
Expand Down Expand Up @@ -278,7 +281,7 @@ private static Map.Entry<DatabendNodes, Map<String, String>> parse(String url)
uris.addAll(uriSet);
// Create DatabendNodes object
DatabendClientLoadBalancingPolicy policy = DatabendClientLoadBalancingPolicy.create(DatabendClientLoadBalancingPolicy.DISABLED); // You might want to make this configurable
DatabendNodes databendNodes = new DatabendNodes(uris, policy, uriPath, uriQuery, uriFragment);
DatabendNodes databendNodes = new DatabendNodes(uris, policy, uriPath, uriQuery, uriFragment, 5 * 60 * 1000);
return new AbstractMap.SimpleImmutableEntry<>(databendNodes, uriProperties);
} catch (URISyntaxException e) {
throw new SQLException("Invalid URI: " + raw, e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public class DatabendNodes implements DatabendNodeRouter {
@Setter
private boolean debug = false;
// minimum time between discovery
protected long discoveryInterval = 1000 * 60 * 5;
protected long discoveryInterval;
protected DatabendClientLoadBalancingPolicy policy;

private final String uriPath;
Expand All @@ -36,13 +36,14 @@ public class DatabendNodes implements DatabendNodeRouter {
private boolean useSecureConnection = false;
private String sslmode = "disable";

public DatabendNodes(List<URI> queryNodesUris, DatabendClientLoadBalancingPolicy policy, String UriPath, String UriQuery, String UriFragment) {
public DatabendNodes(List<URI> queryNodesUris, DatabendClientLoadBalancingPolicy policy, String UriPath, String UriQuery, String UriFragment, long discoveryInterval) {
this.query_nodes_uris = new AtomicReference<>(queryNodesUris);
this.policy = policy;
this.index = new AtomicInteger(0);
this.uriPath = UriPath;
this.uriQuery = UriQuery;
this.uriFragment = UriFragment;
this.discoveryInterval = discoveryInterval;
}

@Override
Expand All @@ -55,6 +56,10 @@ public void setSSL(boolean useSecureConnection, String sslmode) {
this.sslmode = sslmode;
}

public void setDiscoveryInterval(long discoveryInterval) {
this.discoveryInterval = discoveryInterval;
}

public void updateNodes(List<URI> query_nodes_uris) {
this.query_nodes_uris.set(query_nodes_uris);
}
Expand Down
83 changes: 61 additions & 22 deletions docs/Connection.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,11 +82,48 @@ There are three load balancing options available:

**NOTICE:**

When configuring SSL, it's recommended to use the approach shown in the last example, which allows for more detailed SSL configuration including certificate verification.
1. When configuring SSL, it's recommended to use the approach shown in the last example, which allows for more detailed SSL configuration including certificate verification.

Remember to replace the hostnames, ports, and file paths with your actual Databend cluster configuration and SSL certificate locations.
2. Remember to replace the hostnames, ports, and file paths with your actual Databend cluster configuration and SSL certificate locations.

3. Failover retry occur only for connection issues (java.net.ConnectException), other exception will NOT trigger retry.
4. Databend-jdbc support Transaction. During a transaction, the connection will be pinned to the same node, and the load balancing policy will be disabled. once the transaction is commited or aborted the connection will be released and the load balancing policy will be enabled again.




#### Automatic Node Discovery

| Parameter | Description | Default | example |
|------------------------|---------------------------------------------------------------------------------------------------------------------------|---------------|-----------------------------------------------------------------------------------------|
| auto_discovery | Automatically discover possible cluster nodes in a databend query cluster | false | jdbc:databend://0.0.0.0:8000/default?auto_discovery=true |
| node_discovery_interval | Minimum interval between two automatic node discovery actions in milliseconds | 5 * 60 * 1000 | jdbc:databend://0.0.0.0:8000/default?auto_discovery=true&node_discovery_interval=600000 |

Automatic Node Discovery will try to discover existing databend query cluster using /v1/discovery_nodes api, it will be closed if the target api is not supported on your databend version(minimum version: v1.2.629-nightly), it passsively probe the possible node list which new query occured after given `node_discovery_interval` and update possible node lists used for load balancing and failover. it will not use thread pool or executor service to start a background thread to handle the task.

**NOTICE:**
As the cluster ip/dns may vary based your network environment, it is recommend to give all possible nodes in the same warehouse and tenant a fixed ip or dns for reliable node discovery.
Sample Configuration:

```toml
[query]
discovery_address = "localhost:8000"

# Databend Query HTTP Handler.
http_handler_host = "0.0.0.0"
http_handler_port = 8000

tenant_id = "test_tenant"
cluster_id = "test_cluster"
```


In the above node configuration file, `discovery_address` is used for jdbc to connect the target node if it was discovered by the node discovery api from other nodes located in the same warehouse(with same tenant_id and cluster_id)
If `discovery_address` is not set, the address is determined based on three scenarios:
1. If the user has directly modified the discovery-address in the configuration, this value is returned.
2. If the user has configured an HTTP address that is not 0.0.0.0 or 127.0.0.1, this HTTP address is returned.
3. If the user has configured an HTTP address as 0.0.0.0 or 127.0.0.1, the system will probe to detect a suitable IP address. The IP address that is successfully routed through the network and can communicate with the meta service will be returned.

Failover retry occur only for connection issues (java.net.ConnectException), other exception will NOT trigger retry

## Connection parameters

Expand All @@ -109,22 +146,24 @@ String url="jdbc:databend://databend:[email protected]:8000/hello_databend";

### Parameter References

| Parameter | Description | Default | example |
|------------------------|---------------------------------------------------------------------------------------------------------------------------|----------|-------------------------------------------------------------------------|
| user | Databend user name | none | jdbc:databend://0.0.0.0:8000/hello_databend?user=test |
| password | Databend user password | none | jdbc:databend://0.0.0.0:8000/hello_databend?password=secret |
| SSL | Enable SSL | false | jdbc:databend://0.0.0.0:8000/hello_databend?SSL=true |
| sslmode | SSL mode | disable | jdbc:databend://0.0.0.0:8000/hello_databend?sslmode=enable |
| copy_purge | If True, the command will purge the files in the stage after they are loaded successfully into the table | false | jdbc:databend://0.0.0.0:8000/hello_databend?copy_purge=true |
| presigned_url_disabled | whether use presigned url to upload data, generally if you use local disk as your storage layer, it should be set as true | false | jdbc:databend://0.0.0.0:8000/hello_databend?presigned_url_disabled=true |
| wait_time_secs | Restful query api blocking time, if the query is not finished, the api will block for wait_time_secs seconds | 10 | jdbc:databend://0.0.0.0:8000/hello_databend?wait_time_secs=10 |
| max_rows_in_buffer | the maximum rows in server session buffer | 5000000 | jdbc:databend://0.0.0.0:8000/hello_databend?max_rows_in_buffer=5000000 |
| max_rows_per_page | the maximum rows per page in response data body | 100000 | jdbc:databend://0.0.0.0:8000/default?max_rows_per_page=100000 |
| connection_timeout | okhttp connection_timeout param | 0 | jdbc:databend://0.0.0.0:8000/default?connection_timeout=100000 |
| query_timeout | time that you wait a SQL execution | 90 | jdbc:databend://0.0.0.0:8000/default?query_timeout=120 |
| null_display | null value display | \N | jdbc:databend://0.0.0.0:8000/hello_databend?null_display=null |
| binary_format | binary format, support hex and base64 | hex | jdbc:databend://0.0.0.0:8000/default?binary_format=hex |
| use_verify | whether verify the server before establishing the connection | true | jdbc:databend://0.0.0.0:8000/default?use_verify=true |
| debug | whether enable debug mode | false | jdbc:databend://0.0.0.0:8000/default?debug=true |
| load_balancing_policy | Specifies the load balancing policy for multi-host connections. Options are "disabled", "random", and "round_robin". | disabled | jdbc:databend://localhost:8000,localhost:8002,localhost:8003/default?load_balancing_policy=random |
| max_failover_retry | Specifies the maximum number of retry attempts for failover connections. | 0 | jdbc:databend://localhost:7222,localhost:7223,localhost:7224,localhost:8000/default?max_failover_retry=4 |
| Parameter | Description | Default | example |
|------------------------|---------------------------------------------------------------------------------------------------------------------------|---------------|-------------------------------------------------------------------------|
| user | Databend user name | none | jdbc:databend://0.0.0.0:8000/hello_databend?user=test |
| password | Databend user password | none | jdbc:databend://0.0.0.0:8000/hello_databend?password=secret |
| SSL | Enable SSL | false | jdbc:databend://0.0.0.0:8000/hello_databend?SSL=true |
| sslmode | SSL mode | disable | jdbc:databend://0.0.0.0:8000/hello_databend?sslmode=enable |
| copy_purge | If True, the command will purge the files in the stage after they are loaded successfully into the table | false | jdbc:databend://0.0.0.0:8000/hello_databend?copy_purge=true |
| presigned_url_disabled | whether use presigned url to upload data, generally if you use local disk as your storage layer, it should be set as true | false | jdbc:databend://0.0.0.0:8000/hello_databend?presigned_url_disabled=true |
| wait_time_secs | Restful query api blocking time, if the query is not finished, the api will block for wait_time_secs seconds | 10 | jdbc:databend://0.0.0.0:8000/hello_databend?wait_time_secs=10 |
| max_rows_in_buffer | the maximum rows in server session buffer | 5000000 | jdbc:databend://0.0.0.0:8000/hello_databend?max_rows_in_buffer=5000000 |
| max_rows_per_page | the maximum rows per page in response data body | 100000 | jdbc:databend://0.0.0.0:8000/default?max_rows_per_page=100000 |
| connection_timeout | okhttp connection_timeout param | 0 | jdbc:databend://0.0.0.0:8000/default?connection_timeout=100000 |
| query_timeout | time that you wait a SQL execution | 90 | jdbc:databend://0.0.0.0:8000/default?query_timeout=120 |
| null_display | null value display | \N | jdbc:databend://0.0.0.0:8000/hello_databend?null_display=null |
| binary_format | binary format, support hex and base64 | hex | jdbc:databend://0.0.0.0:8000/default?binary_format=hex |
| use_verify | whether verify the server before establishing the connection | true | jdbc:databend://0.0.0.0:8000/default?use_verify=true |
| debug | whether enable debug mode | false | jdbc:databend://0.0.0.0:8000/default?debug=true |
| load_balancing_policy | Specifies the load balancing policy for multi-host connections. Options are "disabled", "random", and "round_robin". | disabled | jdbc:databend://localhost:8000,localhost:8002,localhost:8003/default?load_balancing_policy=random |
| max_failover_retry | Specifies the maximum number of retry attempts for failover connections. | 0 | jdbc:databend://localhost:7222,localhost:7223,localhost:7224,localhost:8000/default?max_failover_retry=4 |
| auto_discovery | Automatically discover possible cluster nodes in a databend query cluster | false | jdbc:databend://0.0.0.0:8000/default?auto_discovery=true |
| node_discovery_interval | Minimum interval between two automatic node discovery actions in milliseconds | 5 * 60 * 1000 | jdbc:databend://0.0.0.0:8000/default?node_discovery_interval=600000 |
2 changes: 1 addition & 1 deletion scripts/deploy/config/databend-query-node-1.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ admin_api_address = "0.0.0.0:8080"

# Databend Query metrics RESET API.
metric_api_address = "0.0.0.0:7070"

discovery_address = "localhost:8000"
# Databend Query MySQL Handler.
mysql_handler_host = "0.0.0.0"
mysql_handler_port = 3307
Expand Down
2 changes: 2 additions & 0 deletions scripts/deploy/config/databend-query-node-2.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ admin_api_address = "0.0.0.0:8082"
# Databend Query metrics RESET API.
metric_api_address = "0.0.0.0:7072"

discovery_address = "localhost:8002"

# Databend Query MySQL Handler.
mysql_handler_host = "0.0.0.0"
mysql_handler_port = 3308
Expand Down
2 changes: 2 additions & 0 deletions scripts/deploy/config/databend-query-node-3.toml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ admin_api_address = "0.0.0.0:8083"
# Databend Query metrics RESET API.
metric_api_address = "0.0.0.0:7073"

discovery_address = "localhost:8003"

# Databend Query MySQL Handler.
mysql_handler_host = "0.0.0.0"
mysql_handler_port = 3309
Expand Down

0 comments on commit bab24f4

Please sign in to comment.