Skip to content

Commit

Permalink
improvement(gemini): run gemini from scylladb/gemini
Browse files Browse the repository at this point in the history
Dockerfile has been moved to gemini project, and now
SCT should use that image.

Changes:
- Use image from scylladb/gemini
- Remove dockerfile for gemini and point in readme location for the
  images
- Add CQL Statement Logging to gemini output
- Forward outputs from docker to $HOME/*.log
- Run default gemini flags from gemini_thread.py

Signed-off-by: Dusan Malusev <[email protected]>
  • Loading branch information
CodeLieutenant committed Nov 6, 2024
1 parent 269e83d commit f20b322
Show file tree
Hide file tree
Showing 18 changed files with 170 additions and 182 deletions.
41 changes: 20 additions & 21 deletions defaults/test_default.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
db_type: "scylla"
db_type: 'scylla'

test_duration: 60
prepare_stress_duration: 300 # 5 hours
Expand Down Expand Up @@ -42,16 +42,16 @@ nemesis_add_node_cnt: 1

nemesis_filter_seeds: false

seeds_selector: "all"
seeds_selector: 'all'
seeds_num: 1

instance_provision: "spot"
instance_provision: 'spot'

execute_post_behavior: false
post_behavior_db_nodes: "keep-on-failure"
post_behavior_loader_nodes: "destroy"
post_behavior_monitor_nodes: "keep-on-failure"
post_behavior_k8s_cluster: "keep-on-failure"
post_behavior_db_nodes: 'keep-on-failure'
post_behavior_loader_nodes: 'destroy'
post_behavior_monitor_nodes: 'keep-on-failure'
post_behavior_k8s_cluster: 'keep-on-failure'

cloud_credentials_path: ''
use_cloud_manager: false
Expand All @@ -62,7 +62,7 @@ print_kernel_callstack: true

update_db_packages: ''

logs_transport: "syslog-ng"
logs_transport: 'syslog-ng'

store_perf_results: false
email_recipients: ['[email protected]']
Expand All @@ -71,7 +71,7 @@ email_subject_postfix: ''
collect_logs: false

hinted_handoff: 'disabled'
parallel_node_operations: false # supported from Scylla 6.0
parallel_node_operations: false # supported from Scylla 6.0

server_encrypt: false
client_encrypt: false
Expand All @@ -89,7 +89,6 @@ alternator_access_key_id: ''
alternator_secret_access_key: ''
dynamodb_primarykey_type: 'HASH'


store_cdclog_reader_stats_in_es: false
region_aware_loader: false

Expand Down Expand Up @@ -141,22 +140,22 @@ target_upgrade_version: ''
disable_raft: true
enable_tablets_on_upgrade: false

stress_cdclog_reader_cmd: "cdc-stressor -stream-query-round-duration 30s"
stress_cdclog_reader_cmd: 'cdc-stressor -stream-query-round-duration 30s'

jepsen_scylla_repo: 'https://github.com/jepsen-io/scylla.git'
jepsen_test_cmd:
- 'test-all -w cas-register --concurrency 10n'
- 'test-all -w counter --concurrency 10n'
- 'test-all -w cmap --concurrency 10n'
- 'test-all -w cset --concurrency 10n'
# - 'test-all -w mv --concurrency 10n'
# - 'test-all -w mv --concurrency 10n'
- 'test-all -w write-isolation --concurrency 10n'
- 'test-all -w list-append --concurrency 10n'
- 'test-all -w wr-register --concurrency 10n'
jepsen_test_count: 1
jepsen_test_run_policy: all

max_events_severities: ""
max_events_severities: ''
scylla_mgmt_agent_version: '3.3.0'
mgmt_docker_image: 'scylladb/scylla-manager:3.3.0'
k8s_log_api_calls: false
Expand All @@ -173,7 +172,7 @@ k8s_scylla_utils_docker_image: ''

scylla_rsyslog_setup: false

backup_bucket_region: '' # use the same region as a cluster
backup_bucket_region: '' # use the same region as a cluster

events_limit_in_email: 10

Expand All @@ -183,17 +182,17 @@ data_volume_disk_size: 0
data_volume_disk_iops: 0 # depend on type iops could be 100-16000 for io2|io3 and 3000-16000 for gp3
gce_pd_standard_disk_size_db: 0

install_mode: 'repo' # install from scylla_repo
install_mode: 'repo' # install from scylla_repo
run_db_node_benchmarks: false

nemesis_multiply_factor: 6

scylla_apt_keys:
- '17723034C56D4B19' # ScyllaDB Package Signing Key 2018 <[email protected]>
- '5E08FBD8B5D6EC9C' # ScyllaDB Package Signing Key 2020 <[email protected]>
- 'D0A112E067426AB2' # ScyllaDB Package Signing Key 2022 <[email protected]>
- '491C93B9DE7496A7' # ScyllaDB Package Signing Key 2024 <[email protected]>
- 'A43E06657BAC99E3' # ScyllaDB Package Signing Key 2024 (RSA) <[email protected]>
- '17723034C56D4B19' # ScyllaDB Package Signing Key 2018 <[email protected]>
- '5E08FBD8B5D6EC9C' # ScyllaDB Package Signing Key 2020 <[email protected]>
- 'D0A112E067426AB2' # ScyllaDB Package Signing Key 2022 <[email protected]>
- '491C93B9DE7496A7' # ScyllaDB Package Signing Key 2024 <[email protected]>
- 'A43E06657BAC99E3' # ScyllaDB Package Signing Key 2024 (RSA) <[email protected]>

raid_level: 0

Expand All @@ -211,7 +210,7 @@ stress_image:
nosqlbench: 'scylladb/hydra-loaders:nosqlbench-5.21.2'
cassandra-stress: 'scylladb/cassandra-stress:3.13.0'
scylla-bench: 'scylladb/hydra-loaders:scylla-bench-v0.1.24'
gemini: 'scylladb/hydra-loaders:gemini-v1.8.6'
gemini: 'scylladb/gemini:1.8.8'
alternator-dns: 'scylladb/hydra-loaders:alternator-dns-0.1'
cdc-stresser: 'scylladb/hydra-loaders:cdc-stresser-20210630'
kcl: 'scylladb/hydra-loaders:kcl-jdk8-20210526-ShardSyncStrategyType-PERIODIC'
Expand Down
19 changes: 0 additions & 19 deletions docker/gemini/Dockerfile

This file was deleted.

17 changes: 6 additions & 11 deletions docker/gemini/README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,6 @@
Currently, when releasing a new version of Gemini, there's no need to push the image to Docker Hub.
The image is built and pushed automatically by `goreleaser` when a new version is released.
Docs from Gemini repo: https://github.com/scylladb/gemini/blob/master/docs/release-process.md
Steps to release gemini :
```
0. Make sure you have proper go installed. See the version in https://github.com/scylladb/gemini/blob/master/go.mod
1. update changelog and tag the commit
2. create github token with write:packages permissions here: https://github.com/settings/tokens/new
3. export GITHUB_TOKEN="YOUR_GH_TOKEN”
4. Run `goreleaser`from`cmd/gemini`directory
```
# Gemini Image

## Locations

- [DockerHub](https://hub.docker.com/r/scylladb/gemini)
- [Gemini Github](https://github.com/scylladb/gemini)
1 change: 0 additions & 1 deletion docker/gemini/image

This file was deleted.

105 changes: 74 additions & 31 deletions sdcm/gemini_thread.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,50 +66,86 @@ def __init__(self, test_cluster, oracle_cluster, loaders, stress_cmd, timeout=No
super().__init__(loader_set=loaders, stress_cmd=stress_cmd, timeout=timeout, params=params)
self.test_cluster = test_cluster
self.oracle_cluster = oracle_cluster
self._gemini_result_file = None
self.gemini_commands = []
self.gemini_request_timeout = 180
self.gemini_connect_timeout = 120

@property
def gemini_result_file(self):
if not self._gemini_result_file:
self._gemini_result_file = os.path.join("/", "gemini_result_{}.log".format(uuid.uuid4()))
return self._gemini_result_file
self.unique_id = uuid.uuid4()
self.gemini_default_flags = {
'consistency': 'LOCAL_QUORUM',
'use-server-timestamps': 'false',
'async-objects-stabilization-backoff': '500ms',
'async-objects-stabilization-attempts': 5,
'max-mutation-retries-backoff': '500ms',
'max-mutation-retries': 10,
'dataset-size': 'large',
'materialized-views': 'false',
'oracle-host-selection-policy': 'token-aware',
'test-host-selection-policy': 'token-aware',
'drop-schema': 'true',
'cql-features': 'normal'
}

self.gemini_oracle_statements_file = f"gemini_oracle_statements_{self.unique_id}.log"
self.gemini_test_statements_file = f"gemini_test_statements_{self.unique_id}.log"
self.gemini_result_file = f"gemini_result_{self.unique_id}.log"

def _generate_gemini_command(self):
seed = self.params.get('gemini_seed')
seed = self.params.get('gemini_seed') or random.randint(1, 100)
table_options = self.params.get('gemini_table_options')
if not seed:
seed = random.randint(1, 100)

test_nodes = ",".join(self.test_cluster.get_node_cql_ips())
oracle_nodes = ",".join(self.oracle_cluster.get_node_cql_ips()) if self.oracle_cluster else None

cmd = "./{} --test-cluster={} --outfile {} --seed {} --request-timeout {}s --connect-timeout {}s ".format(
self.stress_cmd.strip(),
test_nodes,
self.gemini_result_file,
seed,
self.gemini_request_timeout,
self.gemini_connect_timeout)
if oracle_nodes:
cmd += "--oracle-cluster={} ".format(oracle_nodes)
oracle_nodes = ",".join(self.oracle_cluster.get_node_cql_ips())

cmd = f"gemini \
--oracle-cluster={oracle_nodes} \
--test-cluster={test_nodes} \
--seed {seed} \
--schema-seed {seed} \
--request-timeout {self.gemini_request_timeout}s \
--connect-timeout {self.gemini_connect_timeout}s \
--outfile /{self.gemini_result_file} \
--test-statement-log-file /{self.gemini_test_statements_file} \
--oracle-statement-log-file /{self.gemini_oracle_statements_file} \
--fail-fast \
--non-interactive \
--replication-strategy \"{{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}}\" \
--oracle-replication-strategy \"{{'class': 'NetworkTopologyStrategy', 'replication_factor': '1'}}\" "

credentials = self.loader_set.get_db_auth()

if credentials and '--test-username' not in cmd:
cmd += f"--test-username {credentials[0]} --test-password {credentials[1]} "
cmd += f"--oracle-username {credentials[0]} --oracle-password {credentials[1]} "

if table_options:
cmd += " ".join([f"--table-options \"{table_opt}\"" for table_opt in table_options])

stress_cmd = self.stress_cmd.replace('\n', ' ').strip()

for key, value in self.gemini_default_flags.items():
if not key in stress_cmd:
cmd += f"--{key} {value} "

cmd += stress_cmd
self.gemini_commands.append(cmd)
return cmd

def _run_stress(self, loader, loader_idx, cpu_idx):

cpu_options = ""
if self.stress_num > 1:
cpu_options = f'--cpuset-cpus="{cpu_idx}"'

docker = cleanup_context = RemoteDocker(loader, self.docker_image_name,
extra_docker_opts=f'{cpu_options} --label shell_marker={self.shell_marker} '
'--network=host '
'--security-opt seccomp=unconfined '
'--entrypoint=""')
for file_name in [self.gemini_result_file, self.gemini_test_statements_file, self.gemini_oracle_statements_file]:
loader.remoter.run(f"touch $HOME/{file_name}", ignore_status=True, verbose=False)

docker = cleanup_context = RemoteDocker(
loader,
self.docker_image_name,
extra_docker_opts=f'--cpuset-cpus="{cpu_idx}"' if self.stress_num > 1 else ""
'--label shell_marker={self.shell_marker}'
'--network=host '
'--security-opt seccomp=unconfined '
'--entrypoint="" '
f'-v $HOME/{self.gemini_result_file}:/{self.gemini_result_file} '
f'-v $HOME/{self.gemini_test_statements_file}:/{self.gemini_test_statements_file} '
f'-v $HOME/{self.gemini_oracle_statements_file}:/{self.gemini_oracle_statements_file} '
)

if not os.path.exists(loader.logdir):
os.makedirs(loader.logdir, exist_ok=True)
Expand Down Expand Up @@ -148,6 +184,13 @@ def _run_stress(self, loader, loader_idx, cpu_idx):
results_copied = docker.receive_files(src=self.gemini_result_file, dst=local_gemini_result_file)
assert results_copied, "gemini results aren't available, did gemini even run ?"

local_gemini_test_statements_file = os.path.join(
docker.node.logdir, os.path.basename(self.gemini_test_statements_file))
local_gemini_oracle_statements_file = os.path.join(
docker.node.logdir, os.path.basename(self.gemini_oracle_statements_file))
docker.receive_files(src=self.gemini_test_statements_file, dst=local_gemini_test_statements_file)
docker.receive_files(src=self.gemini_oracle_statements_file, dst=local_gemini_oracle_statements_file)

return docker, result, local_gemini_result_file

def get_gemini_results(self):
Expand Down
2 changes: 2 additions & 0 deletions sdcm/logcollector.py
Original file line number Diff line number Diff line change
Expand Up @@ -821,6 +821,8 @@ class LoaderLogCollector(LogCollector):
search_locally=True),
FileLog(name='*gemini-l*.log',
search_locally=True),
FileLog(name='*gemini_*_statements_*.log',
search_locally=True),
FileLog(name='gemini_result*.log',
search_locally=True),
FileLog(name='cdclogreader*.log',
Expand Down
12 changes: 5 additions & 7 deletions test-cases/gemini/gemini-1tb-10h.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,11 @@ nemesis_selector: ['run_with_gemini']
nemesis_interval: 5
nemesis_seed: '041'

gemini_cmd: "gemini -d --duration 8h --warmup 2h -c 50 \
-m mixed -f --non-interactive --cql-features normal \
--max-mutation-retries 10 --max-mutation-retries-backoff 500ms \
--async-objects-stabilization-attempts 5 --async-objects-stabilization-backoff 500ms \
--replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\"
--oracle-replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '1'}\" "

gemini_cmd: |
--duration 8h
--warmup 2h
--concurrency 50
--mode mixed
gemini_schema_url: 'https://s3.amazonaws.com/scylla-gemini/Binaries/schema.json' # currently is not used

Expand Down
12 changes: 5 additions & 7 deletions test-cases/gemini/gemini-3h-cdc-postimage-write.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@ n_loaders: 1
n_monitor_nodes: 1
instance_type_db: 'i4i.4xlarge'

user_prefix: "gemini-cdc-postimage-write"

gemini_cmd: "gemini -d --duration 3h \
-c 30 -m write -f --non-interactive --cql-features normal \
--max-mutation-retries 5 --max-mutation-retries-backoff 500ms \
--async-objects-stabilization-attempts 5 --async-objects-stabilization-backoff 500ms \
--replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" "
user_prefix: 'gemini-cdc-postimage-write'

gemini_cmd: |
--duration 3h
--concurrency 30
--mode write
gemini_schema_url: 'https://s3.amazonaws.com/scylla-gemini/Binaries/schema.json' # currently is not used
gemini_table_options:
Expand Down
12 changes: 5 additions & 7 deletions test-cases/gemini/gemini-3h-cdc-preimage-write.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@ n_loaders: 1
n_monitor_nodes: 1
instance_type_db: 'i4i.4xlarge'

user_prefix: "gemini-cdc-preimage-write"

gemini_cmd: "gemini -d --duration 3h \
-c 30 -m write -f --non-interactive --cql-features normal \
--max-mutation-retries 5 --max-mutation-retries-backoff 500ms \
--async-objects-stabilization-attempts 5 --async-objects-stabilization-backoff 500ms \
--replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" "
user_prefix: 'gemini-cdc-preimage-write'

gemini_cmd: |
--duration 3h
--concurrency 30
--mode write
gemini_schema_url: 'https://s3.amazonaws.com/scylla-gemini/Binaries/schema.json' # currently is not used
gemini_table_options:
Expand Down
14 changes: 6 additions & 8 deletions test-cases/gemini/gemini-3h-cdc-write.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,16 @@ n_loaders: 1
n_monitor_nodes: 1
instance_type_db: 'i4i.4xlarge'

user_prefix: "gemini-cdc-write"
user_prefix: 'gemini-cdc-write'

gemini_cmd: "gemini -d --duration 3h \
-c 30 -m write -f --non-interactive --cql-features normal \
--max-mutation-retries 5 --max-mutation-retries-backoff 500ms \
--async-objects-stabilization-attempts 5 --async-objects-stabilization-backoff 500ms \
--replication-strategy \"{'class': 'NetworkTopologyStrategy', 'replication_factor': '3'}\" "
gemini_cmd: |
--duration 3h
--concurrency 30
--mode write
gemini_schema_url: 'https://s3.amazonaws.com/scylla-gemini/Binaries/schema.json' # currently is not used
gemini_table_options:
- "cdc={'enabled': true}"


gemini_schema_url: 'https://s3.amazonaws.com/scylla-gemini/Binaries/schema.json' # currently is not used

db_type: scylla
Loading

0 comments on commit f20b322

Please sign in to comment.