diff --git a/bastion.tf b/bastion.tf index c8df39cb..a8ace61a 100644 --- a/bastion.tf +++ b/bastion.tf @@ -295,7 +295,11 @@ resource "null_resource" "cluster" { region = var.region, tenancy_ocid = var.tenancy_ocid, api_fingerprint = var.api_fingerprint, - api_user_ocid = var.api_user_ocid + api_user_ocid = var.api_user_ocid, + billing = var.billing, + billing_mysql_db_admin_username = var.billing_mysql_db_admin_username, + billing_mysql_db_admin_password = var.billing_mysql_db_admin_password, + billing_mysql_ip = var.billing ? oci_mysql_mysql_db_system.billing_mysql_db_system[0].ip_address : "" }) destination = "/opt/oci-hpc/playbooks/inventory" @@ -433,7 +437,11 @@ resource "null_resource" "cluster" { compute_username = var.compute_username, pam = var.pam, sacct_limits = var.sacct_limits, - use_compute_agent = var.use_compute_agent + use_compute_agent = var.use_compute_agent, + billing = var.billing, + billing_mysql_db_admin_username = var.billing_mysql_db_admin_username, + billing_mysql_db_admin_password = var.billing_mysql_db_admin_password, + billing_mysql_ip = var.billing ? oci_mysql_mysql_db_system.billing_mysql_db_system[0].ip_address : "" }) destination = "/opt/oci-hpc/conf/variables.tf" diff --git a/conf/variables.tpl b/conf/variables.tpl index 96dd18d3..752ef624 100755 --- a/conf/variables.tpl +++ b/conf/variables.tpl @@ -123,7 +123,6 @@ variable "ldap" { default = ${ldap} } variable "monitoring" { default = ${monitoring} } variable "autoscaling_monitoring" { default = ${autoscaling_monitoring} } - variable "tags" { default = "##TAGS##" } variable "private_deployment" { default = ${private_deployment} } variable "use_multiple_ads" { default = ${use_multiple_ads} } @@ -135,3 +134,8 @@ variable "log_vol" { default = "${log_vol}" } variable "redundancy" { default = "${redundancy}" } variable "instance_pool_ocpus_denseIO_flex" { default = "##OCPU##"} + +variable "billing" { default = "${billing}" } +variable "billing_mysql_db_admin_username" { default = "${billing_mysql_db_admin_username}" } +variable "billing_mysql_db_admin_password" { default = "${billing_mysql_db_admin_password}" } +variable "billing_mysql_ip" { default = "$billing_mysql_ip" } \ No newline at end of file diff --git a/inventory.tpl b/inventory.tpl index 1d1586c2..73469156 100755 --- a/inventory.tpl +++ b/inventory.tpl @@ -76,4 +76,8 @@ inst_prin = ${inst_prin} api_fingerprint = ${api_fingerprint} api_user_ocid = ${api_user_ocid} sacct_limits=${sacct_limits} -use_compute_agent=${use_compute_agent} \ No newline at end of file +use_compute_agent=${use_compute_agent} +billing=${billing} +billing_mysql_db_admin_username=${billing_mysql_db_admin_username} +billing_mysql_db_admin_password=${billing_mysql_db_admin_password} +billing_mysql_ip=${billing_mysql_ip} \ No newline at end of file diff --git a/mysql.tf b/mysql.tf index 78c33ca2..84eb2d20 100644 --- a/mysql.tf +++ b/mysql.tf @@ -13,4 +13,45 @@ resource "oci_mysql_mysql_db_system" "monitoring_mysql_db_system" { backup_policy { is_enabled = false } +} + +resource "oci_mysql_mysql_db_system" "billing_mysql_db_system" { + count = var.billing ? 1 : 0 + + # Required + availability_domain = var.bastion_ad + compartment_id = var.targetCompartment + shape_name = var.billing_shape_name + subnet_id = local.subnet_id + + # Optional + admin_password = var.billing_mysql_db_admin_password + admin_username = var.billing_mysql_db_admin_username + backup_policy { + is_enabled = true + # Point-In-Time Recovery + pitr_policy { + is_enabled = true + } + retention_in_days = "7" + } + description = "MySQL DB System for billing" + display_name = "billing" + mysql_version = "8.0.35" + port = "3306" + port_x = "33060" + + is_highly_available = "true" + crash_recovery = "ENABLED" + data_storage_size_in_gb = "50" + deletion_policy { + automatic_backup_retention = "RETAIN" + final_backup = "REQUIRE_FINAL_BACKUP" + is_delete_protected = "true" + } + + freeform_tags = { + "Template" = "Production", + "CreatedTime" = timestamp() + } } \ No newline at end of file diff --git a/playbooks/README.md b/playbooks/README.md index 26fb820f..cdd0e9ea 100644 --- a/playbooks/README.md +++ b/playbooks/README.md @@ -18,6 +18,10 @@ For the webhooks go to [slack bot app](https://app.slack.com/app-settings/T04PVJ To uninstall notifications run notifications_uninstall.yml. Note that this does not remove the MailProg entry from the slurm.conf. This won't cause problems but will be silently erroring in the slurmctld.log. So if you want to be clean you can remove that from the config. +# Billing + +If the billing system has been deployed a final step needs to be performed before usage data will be collected. The cluster resource usage collection scripts in `/etc/oci-hpc/billing` need to be scheduled in crontab. Each collection script has certain requirements that need to be met before they can be used. More details can be found by reviewing the scripts or by running the scripts with the `-h` or `--help` options. Once the requirements have been satisfied, uncomment the entries in `crontab -e` to begin collecting usage data. + ## Passwordless SSH for Root User We provide playbooks to enable and disable passwordless SSH for the root user. This feature is typically required for automated administrative tasks such as software upgrades, for example with Weka. Caution: Enabling passwordless SSH for the root user poses significant security risks. Be sure to disable it as soon as it is no longer necessary. diff --git a/playbooks/roles/billing/files/billing.sql b/playbooks/roles/billing/files/billing.sql new file mode 100644 index 00000000..f1f2af6e --- /dev/null +++ b/playbooks/roles/billing/files/billing.sql @@ -0,0 +1,90 @@ +CREATE DATABASE billing; + +CREATE TABLE IF NOT EXISTS billing.accounts ( + account_id INT AUTO_INCREMENT PRIMARY KEY, + account_name VARCHAR(255) NOT NULL, + billing_details TEXT, + email VARCHAR(255), + billing_address TEXT, + created_date DATE NOT NULL DEFAULT (CURRENT_DATE), + archived BOOLEAN NOT NULL DEFAULT FALSE +); + +CREATE TABLE IF NOT EXISTS billing.users ( + user_id INT AUTO_INCREMENT PRIMARY KEY, + account_id INT NOT NULL, + user_name VARCHAR(255) NOT NULL, + email VARCHAR(255), + FOREIGN KEY (account_id) REFERENCES billing.accounts(account_id), + created_date DATE NOT NULL DEFAULT (CURRENT_DATE), + archived BOOLEAN NOT NULL DEFAULT FALSE +); + +CREATE TABLE IF NOT EXISTS billing.measurement_units ( + measurement_unit_id INT AUTO_INCREMENT PRIMARY KEY, + measurement_unit_name VARCHAR(50) NOT NULL, + measurement_unit_description TEXT +); + +CREATE TABLE IF NOT EXISTS billing.resource_types ( + resource_type_id INT AUTO_INCREMENT PRIMARY KEY, + resource_name VARCHAR(255) NOT NULL, + resource_description TEXT, + measurement_unit_id INT, + FOREIGN KEY (measurement_unit_id) REFERENCES billing.measurement_units(measurement_unit_id) +); + +CREATE TABLE IF NOT EXISTS billing.resource_specifications ( + resource_spec_id INT AUTO_INCREMENT PRIMARY KEY, + resource_type_id INT NOT NULL, + specification_name VARCHAR(255), + FOREIGN KEY (resource_type_id) REFERENCES billing.resource_types(resource_type_id) +); + +CREATE TABLE IF NOT EXISTS billing.usage_records ( + usage_id INT AUTO_INCREMENT PRIMARY KEY, + user_id INT NOT NULL, + resource_spec_id INT NOT NULL, + usage_start_time DATETIME NOT NULL, + usage_end_time DATETIME NOT NULL, + usage_amount BIGINT NOT NULL, + FOREIGN KEY (user_id) REFERENCES billing.users(user_id), + FOREIGN KEY (resource_spec_id) REFERENCES billing.resource_specifications(resource_spec_id) +); + +CREATE TABLE IF NOT EXISTS billing.pricing ( + pricing_id INT AUTO_INCREMENT PRIMARY KEY, + account_id INT NOT NULL, + resource_spec_id INT NOT NULL, + price_per_unit DECIMAL(10, 2) NOT NULL, + price_effective_date DATE NOT NULL, + price_end_date DATE, + FOREIGN KEY (account_id) REFERENCES billing.accounts(account_id), + FOREIGN KEY (resource_spec_id) REFERENCES billing.resource_specifications(resource_spec_id) +); + +-- Insert units of measure +INSERT INTO billing.measurement_units (measurement_unit_name, measurement_unit_description) +VALUES + ('bytes', 'Used for Filesystem, Network usage.'), + ('bytes per second', 'Used for RAM usage.'), + ('seconds', 'Used for GPU, CPU usage.'); + + +-- Insert resource types +INSERT INTO billing.resource_types (resource_name, measurement_unit_id) +VALUES + ('GPU', 3), + ('CPU', 3), + ('RAM', 2), + ('Filesystem', 1), + ('Network', 1); + +-- Insert resource specifications +INSERT INTO billing.resource_specifications (resource_type_id, specification_name) +VALUES + (1, 'A100'), + (5, 'Compute Node Egress'), + (4, 'Weka'), + (1, 'H100'), + (5, 'Login Node Egress'); diff --git a/playbooks/roles/billing/files/filesystem.sh b/playbooks/roles/billing/files/filesystem.sh new file mode 100755 index 00000000..c0e65555 --- /dev/null +++ b/playbooks/roles/billing/files/filesystem.sh @@ -0,0 +1,308 @@ +#!/bin/bash + +################################################### +# Description: +# This script collects weka filesystem usage for paid users from the Weka filesystem. +# After extracting the relevant usage data, the script aggregates this information and inserts it into a MySQL database. +# +# Features: +# - Efficient Data Collection: Gathers filesystem usage data from the Weka filesystem for paid users. +# - Data Filtering: Isolates the usage data of interest by filtering out non-paid users. +# - Database Integration: Inserts the processed data into a MySQL database using a single, efficient batch INSERT query. +# - Dynamic User List: Dynamically fetchs this list from an external source. +# +# Usage: +# 1. This script should be scheduled to run hourly with a cron job. +# - Example cron job: 0 * * * * /opt/oci-hpc/billing/filesystem.sh +# +# Requirements: +# - Bash shell environment. +# - Access to the Weka filesystem with the necessary permissions to execute the 'weka fs quota list' command. +# - MySQL client installed and network access to the MySQL database server. +# - Properly configured MySQL credentials using mysql_config_editor +# - Necessary permissions to execute and schedule the script in the operating environment. +# +# Company: Center for AI Safety +# Author: Andriy Novykov andriy@safe.ai novykov.andriy@gmail.com +################################################## + +set -u + +# Global Variables +TABLE='usage_records' +START_TIME=$(date -d "-1 hour" +"%Y-%m-%d %H:00:00") +END_TIME=$(date -d "-1 hour" +"%Y-%m-%d %H:59:59") +LOG_FILE="/opt/oci-hpc/logs/billing/filesystem.log" +VERBOSE=false + +# Associative arrays +declare -A TOTAL_FILESYSTEM_USAGE_PER_USER +declare -A PAID_USERS + +# Log error function +log_error() { + local message="$1" + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + + # Append to log file + echo "${timestamp}: ${message}" >> "${LOG_FILE}" +} + +# Log message function +log_message() { + if [ "$VERBOSE" = true ]; then + local message="$1" + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + + # Append to log file + echo "${timestamp}: ${message}" >> "${LOG_FILE}" + fi +} + +# Help function +show_help() { +cat << EOF +Usage: ${0##*/} [options] + +This script collects weka filesystem usage for paid users from the Weka filesystem. +After extracting the relevant usage data, the script aggregates this information and inserts it into a MySQL database. + +Options: + -h, --help Display this help and exit + -v, --verbose Enable verbose mode (log steps and errors otherwise just errors by default) + +Prerequisites: + - Bash shell environment. + - Access to the Weka filesystem with the necessary permissions to execute the 'weka fs quota list' command. + - MySQL client installed and network access to the MySQL database server. + - Properly configured MySQL credentials using mysql_config_editor. + - Necessary permisions to execute and schedule the script in the operating environment. + +EOF +} + +# Parse options +while [ "$#" -gt 0 ]; do + case $1 in + -h|--help) + show_help + exit + ;; + -v|--verbose) + VERBOSE=true + ;; + --) # End of all options + shift + break + ;; + -*) + echo "Error: Unknown option: $1" >&2 + show_help + exit 1 + ;; + *) # No more options + break + ;; + esac + shift +done + +####################################### +# Retrieve a list of paid users from the database. +# This function executes an SQL query to fetch user names and IDs of +# users who are not archived from the database and populates the +# PAID_USERS associative array with this data. +# +# Globals: +# PAID_USERS - Associative array to store user names and IDs. +# LOG_FILE - Path to the log file. +# +# Arguments: +# None +# +# Outputs: +# Writes user_name and user_id to the PAID_USERS associative array. +# +# Returns: +# Returns 0 on successful data retrieval, non-zero on SQL query failure. +# +# Usage: +# get_paid_users_from_database +# if [ $? -ne 0 ]; then +# echo "Failed to retrieve paid users from the database." +# fi +####################################### +get_paid_users_from_database() { + log_message "Starting to retrieve paid users from the database." + + local sql="SELECT user_name, user_id FROM billing.users WHERE archived = false" + local result + + # Execute the SQL query + if ! result=$(mysql --login-path=billing -sN -e "$sql"); then + log_error "Error: Failed to execute SQL query: $sql" + return 1 + fi + + # Check if result is empty + if [ -z "$result" ]; then + log_error "Error: No paid users found in the database." + return 1 + fi + + # Read the result and populate the PAID_USERS associative array + local user_name user_id + while read -r user_name user_id; do + PAID_USERS["$user_name"]="$user_id" + done <<< "$result" + + log_message "Successfully retrieved paid users from the database." + return 0 +} + +####################################### +# Retrieve filesystem usage per user from the Weka filesystem. +# This function executes the Weka filesystem command to fetch usage data +# for all users and populates the TOTAL_FILESYSTEM_USAGE_PER_USER associative +# array with data for paid users only. +# +# Globals: +# PAID_USERS - Associative array to check if a user is paid. +# TOTAL_FILESYSTEM_USAGE_PER_USER - Associative array to store user IDs and their usage in bytes. +# LOG_FILE - Path to the log file. +# +# Arguments: +# None +# +# Outputs: +# Writes user_id and usage in bytes to the TOTAL_FILESYSTEM_USAGE_PER_USER associative array. +# Logs messages and errors to the LOG_FILE. +# +# Returns: +# Returns 0 on successful data retrieval and parsing, non-zero on failure. +# +# Usage: +# get_filesystem_usage_per_user +# if [ $? -ne 0 ]; then +# echo "Failed to retrieve filesystem usage per user." +# fi +####################################### +get_filesystem_usage_per_user() { + local usage_per_user + + if ! usage_per_user=$(weka fs quota list --all --raw-units --output path,used | sed 's/default:\///'); then + log_error "Error: Failed to retrieve filesystem usage data. Command output: $usage_per_user" + return 1 + fi + + # Check if empty + if [ -z "$usage_per_user" ]; then + log_error "Error: No filesystem usage data retrieved." + return 1 + fi + + log_message "Parsing filesystem usage data." + while read -r line; do + local USER_NAME BYTES + read -r USER_NAME BYTES <<< $(awk '{print $1, $2}' <<< "$line") + + # Filter for paid users only + if [[ ${PAID_USERS[$USER_NAME]+_} ]]; then + USER_ID=${PAID_USERS[$USER_NAME]} + TOTAL_FILESYSTEM_USAGE_PER_USER[$USER_ID]=$BYTES + fi + done <<< "$usage_per_user" + + return 0 +} + +####################################### +# Insert filesystem usage data into the database. +# This function constructs and executes an SQL INSERT query to store the +# collected filesystem usage data for paid users in the MySQL database. +# +# Globals: +# TOTAL_FILESYSTEM_USAGE_PER_USER - Associative array containing user IDs and their usage in bytes. +# START_TIME - The start time for the usage data. +# END_TIME - The end time for the usage data. +# TABLE - The name of the database table to insert data into. +# LOG_FILE - Path to the log file. +# +# Arguments: +# None +# +# Outputs: +# Inserts usage data into the database. +# Logs messages and errors to the LOG_FILE. +# +# Returns: +# Returns 0 on successful data insertion, non-zero on failure. +# +# Usage: +# insert_filesystem_usage_into_db +# if [ $? -ne 0 ]; then +# echo "Failed to insert filesystem usage data into the database." +# fi +####################################### +insert_filesystem_usage_into_db() { + local sql_values=() + + for user_id in "${!TOTAL_FILESYSTEM_USAGE_PER_USER[@]}"; do + local bytes=${TOTAL_FILESYSTEM_USAGE_PER_USER[$user_id]} + sql_values+=("($user_id, 3, '$START_TIME', '$END_TIME', $bytes)") + done + + # Check if we have data to insert + if [ ${#sql_values[@]} -eq 0 ]; then + log_error "Error: No data to insert into the database." + return 1 + fi + + # Create SQL query + local sql="INSERT INTO billing.$TABLE (user_id, resource_spec_id, usage_start_time, usage_end_time, usage_amount) VALUES " + sql+=$(IFS=','; echo "${sql_values[*]}") + sql+=";" + + log_message "Executing SQL query to insert data into $TABLE." + + if ! mysql --login-path=billing -e "$sql"; then + log_error "Error: Failed to insert data into $TABLE" + return 1 + fi + + return 0 +} + +# Main function +main() { + log_message "Starting filesystem usage collection." + + log_message "Step 1: Attempting to retrieve paid users from database." + if ! get_paid_users_from_database; then + log_error "Error: Failed to retrieve paid users from the database" + log_error "Exiting." + exit 1 + fi + log_message "Successfully retrieved paid users from database." + + log_message "Step 2: Attempting to retrieve filesystem usage per user." + if ! get_filesystem_usage_per_user; then + log_error "Error: Failed to retrieve filesystem usage per user." + log_error "Exiting." + exit 1 + fi + log_message "Successfully retrived filesystem usage per user." + + log_message "Step 3: Attempting to save filesystem usage to database." + if ! insert_filesystem_usage_into_db; then + log_error "Error: Failed to save filesystem usage to database" + log_error "Exiting." + exit 1 + fi + log_message "Successfully saved filesystem usage to database." + + log_message "Finished filesystem usage collection and storage." +} + +# Start the script +main diff --git a/playbooks/roles/billing/files/gpu.sh b/playbooks/roles/billing/files/gpu.sh new file mode 100755 index 00000000..672d6df6 --- /dev/null +++ b/playbooks/roles/billing/files/gpu.sh @@ -0,0 +1,476 @@ +#!/bin/bash + +################################# +# Description: +# This script calculates and records the daily usage of GPU resources by paid users. +# It retrieves usage data, distinguishes between different GPU types (A100 and H100), +# and inserts summarized data into a database for billing purposes. +# +# Usage: +# - This script should be scheduled to run daily at 00:00:00 with a cron job. +# - Example cron job: 0 0 * * * /opt/oci-hpc/billing/collect_gpu_usage.sh +# +# Company: Center for AI Safety +# Author: Andriy Novykov andriy@safe.ai novykov.andriy@gmail.com +################################# + +set -u + +# Global Variables +readonly PARTITION="compute" +readonly TABLE="usage_records" +readonly DATE=$(date -d "-1 day" +"%Y-%m-%d") +readonly LOG_FILE="/opt/oci-hpc/logs/billing/gpu.log" +VERBOSE=false + +# Associative arrays for each gpu type +declare -A TOTAL_A100_USAGE_PER_USER +declare -A TOTAL_H100_USAGE_PER_USER + +# Associative array of paid users +declare -A PAID_USERS + +# Log error function +log_error() { + local message="$1" + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + + # Append to log file + echo "${timestamp}: ${message}" >> "${LOG_FILE}" +} + +# Log message function +log_message() { + if [ "$VERBOSE" = true ]; then + local message="$1" + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + + # Append to log file + echo "${timestamp}: ${message}" >> "${LOG_FILE}" + fi +} + +# Help function +show_help() { +cat << EOF +Usage: ${0##*/} [options] + +This script calculates and records the daily usage of GPU resources by paid users. +It retrieves usage data, distinguishes between different GPU types (A100 and H100), +and inserts summarized data into a database for billing purposes. + +Options: + -h, --help Display this help and exit + -v, --verbose Enable verbose mode (log steps and errors otherwise just errors by default) + +Prerequisites: + - MySQL client installed and accessible in the PATH + - Properly configured MySQL credentials using mysql_config_editor + +EOF +} + +# Parse options +while [ "$#" -gt 0 ]; do + case $1 in + -h|--help) + show_help + exit + ;; + -v|--verbose) + VERBOSE=true + ;; + --) # End of all options + shift + break + ;; + -*) + echo "Error: Unknown option: $1" >&2 + show_help + exit 1 + ;; + *) # No more options + break + ;; + esac + shift +done + +####################################### +# Retrieve a list of paid users from the database. +# This function executes an SQL query to fetch user names and IDs of +# users who are not archived from the database and populates the +# PAID_USERS associative array with this data. +# +# Globals: +# PAID_USERS - Associative array to store user names and IDs. +# LOG_FILE - Path to the log file. +# +# Arguments: +# None +# +# Outputs: +# Writes user_name and user_id to the PAID_USERS associative array. +# +# Returns: +# Returns 0 on successful data retrieval, non-zero on SQL query failure. +# +# Usage: +# get_paid_users_from_database +# if [ $? -ne 0 ]; then +# echo "Failed to retrieve paid users from the database." +# fi +####################################### +get_paid_users_from_database() { + log_message "Starting to retrieve paid users from the database." + + local sql="SELECT user_name, user_id FROM billing.users WHERE archived = false" + local result + + # Execute the SQL query + if ! result=$(mysql --login-path=billing -sN -e "$sql"); then + log_error "Error: Failed to execute SQL query: $sql" + return 1 + fi + + # Check if result is empty + if [ -z "$result" ]; then + log_error "Error: No paid users found in the database." + return 1 + fi + + # Read the result and populate the PAID_USERS associative array + local user_name user_id + while read -r user_name user_id; do + PAID_USERS["$user_name"]="$user_id" + done <<< "$result" + + log_message "Successfully retrieved paid users from the database." + return 0 +} + +####################################### +# Convert time duration from HH:MM:SS format to seconds. +# This function takes a time duration string in HH:MM:SS format and +# converts it to the total number of seconds. +# +# Globals: +# LOG_FILE - Path to the log file. +# +# Arguments: +# $1 - Time duration string in HH:MM:SS format. +# +# Outputs: +# Prints the total number of seconds. +# +# Returns: +# Returns 0 on successful conversion, non-zero on invalid input format. +# +# Usage: +# seconds=$(convert_to_seconds "01:23:45") +# if [ $? -ne 0 ]; then +# echo "Invalid time format." +# fi +####################################### +convert_to_seconds() { + log_message "Starting conversion of time string to seconds: $1" + + local elapsed_time=$1 + local days=0 + local hours=0 + local minutes=0 + local seconds=0 + + # Check if days are present in the time format + if [[ $elapsed_time == *-* ]]; then + days=${elapsed_time%%-*} + elapsed_time=${elapsed_time#*-} + fi + + # Split the time into hours, minutes, and seconds + IFS=':' read -ra time_parts <<< "$elapsed_time" + hours=$((10#${time_parts[0]})) + minutes=$((10#${time_parts[1]})) + seconds=$((10#${time_parts[2]})) + + # Calculate total seconds + total_seconds=$((days * 86400 + hours * 3600 + minutes * 60 + seconds)) + echo "$total_seconds" + log_message "Converted $1 to $total_seconds seconds" + return 0 +} + +####################################### +# Extract the GPU type from a resource allocation string. +# This function takes a resource allocation string, which includes +# GPU details, and extracts the GPU type (e.g., A100, H100). +# +# Globals: +# LOG_FILE - Path to the log file. +# +# Arguments: +# $1 - Resource allocation string. +# +# Outputs: +# Prints the GPU type (a100, h100). +# +# Returns: +# Returns 0 on successful extraction, non-zero on failure. +# +# Usage: +# gpu_type=$(extract_gpu_type "gpu:A100:4") +# if [ $? -ne 0 ]; then +# echo "Failed to extract GPU type." +# fi +####################################### +extract_gpu_type() { + log_message "Starting extraction of GPU type from allocation string: $alloc_tres" + local gpu_type + + if ! gpu_type=$(echo "$1" | grep -o 'gres/gpu:[^,]*' | cut -d ':' -f2 | cut -d '=' -f1); then + log_error "Error: Failed to extract GPU type from allocation string: $alloc_tres" + return 1 + fi + + log_message "Extracted GPU type: $gpu_type" + echo "$gpu_type" + return 0 +} + +####################################### +# Extract the GPU quantity from a resource allocation string. +# This function takes a resource allocation string that includes +# details about the GPU allocation and extracts the number of GPUs +# allocated. +# +# Globals: +# LOG_FILE - Path to the log file. +# +# Arguments: +# $1 - Resource allocation string (e.g., "gpu:A100:4"). +# +# Outputs: +# Prints the number of GPUs allocated. +# +# Returns: +# Returns 0 on successful extraction, non-zero on failure. +# +# Usage: +# gpu_quantity=$(extract_gpu_quantity "gpu:A100:4") +# if [ $? -ne 0 ]; then +# echo "Failed to extract GPU quantity." +# fi +####################################### +extract_gpu_quantity() { + log_message "Starting extraction of GPU quantity from allocation string: $alloc_tres" + gpu_quantity="" + + # Extract the string that contains 'gres/gpu:' + gpu_info=$(echo "$1" | grep -o 'gres/gpu:[^,]*') + + if [ -n "$gpu_info" ]; then + # Extract the quantity + if ! gpu_quantity=$(echo "$gpu_info" | cut -d '=' -f2); then + log_error "Error: Failed to extract GPU quantity from allocation string: $alloc_tres" + return 1 + fi + fi + + log_message "Extraced GPU quantity: $gpu_quantity" + echo "$gpu_quantity" + return 0 +} + +####################################### +# Retrieve and process GPU usage per user. +# This function fetches GPU usage data from a source, processes the data +# to calculate total usage per user for different GPU types (A100, H100), +# and updates global associative arrays with this information. +# +# Globals: +# PAID_USERS - Associative array of paid users (user_name to user_id). +# TOTAL_A100_USAGE_PER_USER - Associative array of total A100 GPU usage per user. +# TOTAL_H100_USAGE_PER_USER - Associative array of total H100 GPU usage per user. +# LOG_FILE - Path to the log file. +# +# Arguments: +# None +# +# Outputs: +# Updates TOTAL_A100_USAGE_PER_USER and TOTAL_H100_USAGE_PER_USER with GPU usage data. +# +# Returns: +# Returns 0 on successful processing, non-zero on failure. +# +# Usage: +# get_gpu_usage_per_user +# if [ $? -ne 0 ]; then +# echo "Failed to retrieve and process GPU usage data." +# fi +####################################### +get_gpu_usage_per_user() { + local usage_per_user + if ! usage_per_user=$(sacct -a -X --partition $PARTITION --format=user,elapsed,AllocTRES --starttime ${DATE}T00:00:00 --endtime ${DATE}T23:59:59 --state=bf,ca,cd,dl,f,nf,oom,pr,to --parsable2); then + log_error "Error: Failed to fetch GPU usage data." + return 1 + fi + + # Process each line of the fetched data + while read -r line; do + local user_name elapsed_time alloc_tres + read -r user_name elapsed_time alloc_tres <<< $(awk -F '|' '{print $1, $2, $3}' <<< "$line") + log_message "Processing data for user: $user_name, elapsed time: $elapsed_time, allocation: $alloc_tres" + + # Filter for paid users only + if [[ ${PAID_USERS[$user_name]+_} ]]; then + local user_id + local seconds + local gpu_type + local gpu_quantity + + user_id=${PAID_USERS[$user_name]} + + if ! seconds=$(convert_to_seconds "$elapsed_time"); then + log_error "Error: Failed to convert elapsed time to seconds for user: $user_name, elapsed time: $elapsed_time" + return 1 + fi + + if ! gpu_type=$(extract_gpu_type "$alloc_tres"); then + log_error "Error: Failed to extract GPU type from allocation string: $alloc_tres" + return 1 + fi + + if ! gpu_quantity=$(extract_gpu_quantity "$alloc_tres"); then + log_error "Error: Failed to extract GPU quantity from allocation string: $alloc_tres" + return 1 + fi + + # Filter by gpu type + if [[ "$gpu_type" == "a100" ]]; then + TOTAL_A100_USAGE_PER_USER[$user_id]=$((TOTAL_A100_USAGE_PER_USER[$user_id] + $(($seconds * $gpu_quantity)))) + log_message "Updated A100 usage for user_id: $user_id, total usage: ${TOTAL_A100_USAGE_PER_USER[$user_id]}" + elif [[ "$gpu_type" == "h100" ]]; then + TOTAL_H100_USAGE_PER_USER[$user_id]=$((TOTAL_H100_USAGE_PER_USER[$user_id] + $(($seconds * $gpu_quantity)))) + log_message "Updated H100 usage for user_id: $user_id, total usage: ${TOTAL_H100_USAGE_PER_USER[$user_id]}" + fi + fi + done <<< "$usage_per_user" + + log_message "Successfully processed GPU usage per user." + return 0 +} + +####################################### +# Insert GPU usage data into the database. +# This function constructs and executes an SQL INSERT statement to record +# the GPU usage data into the database for billing purposes. +# +# Globals: +# TOTAL_A100_USAGE_PER_USER - Associative array of total A100 GPU usage per user. +# TOTAL_H100_USAGE_PER_USER - Associative array of total H100 GPU usage per user. +# DATE - The date for which usage is being recorded. +# TABLE - The name of the database table where usage records are inserted. +# LOG_FILE - Path to the log file. +# +# Arguments: +# None +# +# Outputs: +# Inserts data into the database and logs the operation. +# +# Returns: +# Returns 0 on successful insertion, non-zero on failure. +# +# Usage: +# insert_gpu_usage_into_db +# if [ $? -ne 0 ]; then +# echo "Failed to insert GPU usage data into the database." +# fi +####################################### +save_gpu_usage_to_database() { + log_message "Starting insertion of GPU usage data into the database" + + local sql_values=() + local user_id total_seconds + + # Append INSERT statements for A100 usage records + for user_id in "${!TOTAL_A100_USAGE_PER_USER[@]}"; do + total_seconds="${TOTAL_A100_USAGE_PER_USER[$user_id]}" + sql_values+=("($user_id, 1, '${DATE} 00:00:00', '${DATE} 23:59:59', $total_seconds)") + done + + # Append INSERT statements for H100 usage records + for user_id in "${!TOTAL_H100_USAGE_PER_USER[@]}"; do + total_seconds="${TOTAL_H100_USAGE_PER_USER[$user_id]}" + sql_values+=("($user_id, 4, '${DATE} 00:00:00', '${DATE} 23:59:59', $total_seconds)") + done + + # Check if there are values to insert + if [ ${#sql_values[@]} -eq 0 ]; then + log_error "Error: No GPU usage data to insert into the database" + return 1 + fi + + local sql="INSERT INTO billing.$TABLE (user_id, resource_spec_id, usage_start_time, usage_end_time, usage_amount) VALUES " + sql+=$(IFS=','; echo "${sql_values[*]}") + sql+=";" + + # Execute the SQL query and capture result + if ! mysql --login-path=billing -e "$sql"; then + log_error "Error: Failed to insert data into $TABLE" + return 1 + fi + + log_message "Successfully inserted GPU usage data into the database" + return 0 +} + +####################################### +# Main function to coordinate the retrieval and insertion of GPU usage data. +# This function orchestrates the process of fetching paid users, retrieving GPU usage data, +# and inserting the usage data into the database. +# +# Globals: +# LOG_FILE - Path to the log file. +# +# Arguments: +# None +# +# Outputs: +# Logs the progress and results of each step to the log file. +# +# Returns: +# Returns 0 on success, exit on failure. +####################################### +main() { + log_message "Starting GPU usage billing process" + + log_message "Step 1: Retrieving paid users from the database" + if ! get_paid_users_from_database; then + log_error "Error: Failed to retrieve paid users from the database" + log_error "Exiting." + exit 1 + fi + log_message "Successfully retrieved paid users" + + log_message "Step 2: Retrieving GPU usage per user" + if ! get_gpu_usage_per_user; then + log_error "Error: Failed to retrieve GPU usage data per user." + log_error "Exiting." + exit 1 + fi + log_message "Successfully retrieved GPU usage data per user" + + log_message "Step 3: Inserting GPU usage data into the database" + if ! save_gpu_usage_to_database; then + log_error "Error: Failed to insert GPU usage data into the database" + log_error "Exiting." + exit 1 + fi + log_message "Successfully inserted GPU usage data into the database" + + log_message "Completed GPU usage billing process" + return 0 +} + +main diff --git a/playbooks/roles/billing/files/invoice.sh b/playbooks/roles/billing/files/invoice.sh new file mode 100755 index 00000000..da14a9fb --- /dev/null +++ b/playbooks/roles/billing/files/invoice.sh @@ -0,0 +1,201 @@ +#!/bin/bash +# +# Generate and execute SQL queries to retrieve usage and cost information from a billing database. +# +# Author: Andriy Novykov +# Description: This script retrieves usage and cost data for various resources and outputs the results in CSV format. + +set -u + +# Global variables +ACCOUNT_NAME="%" +readonly LOG_FILE="/opt/oci-hpc/logs/invoice.log" +VERBOSE=false + +# Log error function +log_error() { + local message="$1" + local timestamp + timestamp=$(date '+%Y-%m-%d %H:%M:%S') + # Append to log file + echo "${timestamp}: ${message}" >> "${LOG_FILE}" +} + +# Log message function +log_message() { + if [ "$VERBOSE" = true ]; then + local message="$1" + local timestamp + timestamp=$(date '+%Y-%m-%d %H:%M:%S') + + # Append to log file + echo "${timestamp}: ${message}" >> "${LOG_FILE}" + fi +} + +# Help function +show_help() { +cat << EOF +Usage: ${0##*/} [options] + +This script generates and executes SQL queries to retrieve usage and cost information from a billing database. + +Options: + -h, --help Display this help and exit + -v, --verbose Enable verbose mode (log steps and errors otherwise just errors by default) + -a, --account Specify the account name to filter on (default: '%') + +This script generates and executes SQL queries to extract usage and cost data for various resources from a billing database. +It supports multiple resource types including A100 Usage, Network Egress, and Filesystem Usage. +The script outputs the results in CSV format. + +Prerequisites: + - MySQL client installed and accessible in the PATH + - Properly configured MySQL credentials using mysql_config_editor + +EOF +} + +# Parse options +while [ "$#" -gt 0 ]; do + case $1 in + -h|--help) + show_help + exit + ;; + -a|--account) + if [ -n "$2" ]; then + ACCOUNT_NAME="$2" + shift + else + echo 'ERROR: "--account" requires a non-empty option argument.' + exit 1 + fi + ;; + -v|--verbose) + VERBOSE=true + ;; + --) # End of all options + shift + break + ;; + --) # End of all options + shift + break + ;; + -*) + echo "Error: Unknown option: $1" >&2 + show_help + exit 1 + ;; + *) # No more options + break + ;; + esac + shift +done + +# Function to generate invoice sql query +generate_sql_query() { + local resource_spec_id="$1" + local usage_divisor="$2" + local cost_divisor="$3" + local account_name="$4" + + # Generate the SQL query using the provided arguments + cat <= ur.usage_end_time) + AND rs.resource_spec_id = ${resource_spec_id} + AND a.account_name LIKE '${account_name}' + AND a.archived = FALSE +GROUP BY + a.account_id, a.account_name, DATE_FORMAT(ur.usage_start_time, '%Y-%m'), rs.specification_name, rt.resource_name, p.price_per_unit +ORDER BY + a.account_id, DATE_FORMAT(ur.usage_start_time, '%Y-%m'), rt.resource_name, rs.specification_name; +EOF +} + +# Main function +main() { + log_message "Processing invoices." + + echo "account_id, account_name, invoice_month, spec_name, resource_name, price_per_unit, total_usage, total_cost" + + # A100 Usage + # Parameters + log_message "Processing A100 usage." + resource_spec_id=1 + usage_divisor=3600 + cost_divisor=$usage_divisor + + query=$(generate_sql_query "$resource_spec_id" "$usage_divisor" "$cost_divisor" "$ACCOUNT_NAME") + result=$(mysql --silent -B -e "${query}") + if [ $? -ne 0 ]; then + log_error "Error: Failed to process A100 usage." + log_error "Exiting." + exit 1 + fi + echo "${result// /,}" + log_message "Successfully processed A100 usage." + + # Network Egress Usage + # Parameters + log_message "Processing network egress usage." + resource_spec_id=2 + usage_divisor=1000000000 + cost_divisor=$usage_divisor + + query=$(generate_sql_query "$resource_spec_id" "$usage_divisor" "$cost_divisor" "$ACCOUNT_NAME") + result=$(mysql --silent -B -e "${query}") + if [ $? -ne 0 ]; then + log_error "Error: Failed to process network egress usage." + log_error "Exiting." + exit 1 + fi + echo "${result// /,}" + log_message "Successfully processed network egress usage." + + # Filesystem Usage + # Parameters + log_message "Processing filesystem usage." + resource_spec_id=3 + usage_divisor=1000000000 + cost_divisor=$usage_divisor + + query=$(generate_sql_query "$resource_spec_id" "$usage_divisor" "$cost_divisor" "$ACCOUNT_NAME") + result=$(mysql --silent -B -e "${query}") + if [ $? -ne 0 ]; then + log_error "Error: Failed to process filesystem usage." + log_error "Exiting." + exit 1 + fi + echo "${result// /,}" + log_message "Successfully processed filesystem usage." + + log_message "Successfully processed invoices." +} + +main diff --git a/playbooks/roles/billing/files/network.sh b/playbooks/roles/billing/files/network.sh new file mode 100755 index 00000000..522d103b --- /dev/null +++ b/playbooks/roles/billing/files/network.sh @@ -0,0 +1,362 @@ +#!/bin/bash + +####################################################### +# Description: +# This script records network egress traffic usage by users across multiple hosts. +# It gathers network usage data from each host, aggregates the data by user, and then stores this +# information in a database for billing and tracking purposes. The script is tailored for an environment +# with Slurm Workload Manager, iptables, SSH, and MySQL. +# +# Features: +# - Gathers network usage data from hosts listed by Slurm Workload Manager. +# - Processes and aggregates network usage data by user ID. +# +# Usage: +# - This script should be scheduled to run hourly with a cron job. +# - Example cron job: 0 * * * * /opt/oci-hpc/billing/network.sh +# +# Requirements: +# - Slurm Workload Manager: for fetching the list of hosts. +# - iptables: for gathering network usage statistics. +# - SSH: for remote execution of commands on listed hosts. +# - MySQL client installed and accessible in the PATH. +# - Properly configured MySQL credentials using mysql_config_editor +# +####################################################### +set -u + +# Global variables +# Gather list of hosts from Slurm +readonly HOSTS=$(sudo sinfo -S "%n" -o "%n" | tail -n +2) +readonly TABLE='usage_records' +readonly START_TIME=$(date -d "-1 hour" +"%Y-%m-%d %H:00:00") +readonly END_TIME=$(date -d "-1 hour" +"%Y-%m-%d %H:59:59") +readonly LOG_FILE="/opt/oci-hpc/logs/billing/network.log" +VERBOSE=false + +# Associative array +declare -A TOTAL_NETWORK_USAGE_PER_USER + +# Log error function +log_error() { + local message="$1" + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + + # Append to log file + echo "${timestamp}: ${message}" >> "${LOG_FILE}" +} + +# Log message function +log_message() { + if [ "$VERBOSE" = true ]; then + local message="$1" + local timestamp=$(date '+%Y-%m-%d %H:%M:%S') + + # Append to log file + echo "${timestamp}: ${message}" >> "${LOG_FILE}" + fi +} + +# Help function +show_help() { +cat << EOF +Usage: ${0##*/} [options] + +This script records network egress traffic usage by users across multiple hosts. +It gathers network usage data from each host, aggregates the data by user, and then stores this +information in a database for billing and tracking purposes. The script is tailored for an environment +with Slurm Workload Manager, iptables, SSH, and MySQL. + +Options: + -h, --help Display this help and exit + -v, --verbose Enable verbose mode (log steps and errors otherwise just errors by default) + +Prerequisites: + - Slurm Workload Manager: for fetching the list of hosts. + - iptables: for gathering network usage statistics. + - SSH: for remote execution for commands on listed hosts. + - MySQL client installed and accessible in the PATH. + - Properly configured MySQL credentials using mysql_config_editor + +EOF +} + +# Parse options +while [ "$#" -gt 0 ]; do + case $1 in + -h|--help) + show_help + exit + ;; + -v|--verbose) + VERBOSE=true + ;; + --) # End of all options + shift + break + ;; + -*) + echo "Error: Unknown option: $1" >&2 + show_help + exit 1 + ;; + *) # No more options + break + ;; + esac + shift +done + +####################################### +# Retrieve network usage per user from a specified host. +# This function connects to a given host via SSH, executes commands to fetch +# network traffic data filtered by user, and then resets the usage counters. +# It uses the global array to accumulate data and logs the operations. +# +# Globals: +# TOTAL_NETWORK_USAGE_PER_USER - Associative array to store accumulated network +# usage data per user. Updated with data fetched. +# LOG_FILE - Path to the log file for logging operation status. +# +# Arguments: +# host - The hostname of the server from which to fetch network usage data. +# +# Outputs: +# Updates the TOTAL_NETWORK_USAGE_PER_USER associative array with the network +# usage data fetched from the specified host. +# Logs messages to LOG_FILE detailing the outcome of the operations. +# +# Returns: +# Returns 0 if the network usage data is successfully retrieved and processed, +# 1 if any errors occur during SSH connection or command execution. +# +# Usage: +# get_network_usage_per_user "server01" +# if [ $? -ne 0 ]; then +# echo "Failed to retrieve network usage data from the host." +# fi +####################################### +get_network_usage_per_user() { + log_message "Starting to retrieve network usage per user." + + local host=$1 + # Validate host + if [[ -z "$host" ]]; then + log_return "No host specified for getting network usage." + return 1 + fi + + local iptable_cmd='sudo iptables -L USER_TRAFFIC -v -x -n | awk '\''NR>3 {print $2, $13}'\'' && sudo iptables -Z USER_TRAFFIC' + local result + result=$(ssh "$host" "$iptable_cmd") + + # Error handling + if [ $? -ne 0 ]; then + log_error "Error: Failed to execute SSH command on host $host with status $?" + return 1 + fi + + echo "$result" + log_message "Successfully retrieved network usage for host $host." + return 0 +} + +####################################### +# Process network usage data. +# This function reads through the network usage data provided as a parameter, +# extracts relevant details (user ID and bytes used), and updates the +# TOTAL_NETWORK_USAGE_PER_USER associative array. It also performs data validation +# and logs any errors encountered during processing. +# +# Globals: +# TOTAL_NETWORK_USAGE_PER_USER - Associative array to store accumulated network +# usage data per user. +# LOG_FILE - Path to the log file for logging operation status. +# +# Arguments: +# usage_data - A string containing raw network usage data, typically multiple +# lines with each line containing a user ID and the amount of data +# used by that user. +# +# Outputs: +# Updates the TOTAL_NETWORK_USAGE_PER_USER associative array with the sum of +# bytes used by each user. +# Logs messages to LOG_FILE detailing any errors encountered during processing. +# +# Returns: +# Returns 0 if all data is processed successfully, 1 if any errors occur. +# +# Usage: +# process_network_usage "$raw_data" +# if [ $? -ne 0 ]; then +# echo "Error processing network usage data." +# fi +####################################### +process_network_usage() { + log_message "Starting to process network usage." + + local usage_data=$1 + local line bytes user_id + + if [[ -z "$usage_data" ]]; then + log_error "Error: Received empty usage data for processing." + return 1 + fi + + while read -r line; do + if ! read -r bytes user_id <<< $(awk '{print $1, $2}' <<< "$line"); then + log_error "Error: Failed to parse line: $line" + return 1 + fi + + # Validate data + if [[ ! "$bytes" =~ ^[0-9]+$ ]] || [[ -z "$user_id" ]]; then + log_error "Error: Invalid data: bytes='$bytes', user_id='$user_id'" + return 1 + fi + + # Update total usage + TOTAL_NETWORK_USAGE_PER_USER[$user_id]=$((TOTAL_NETWORK_USAGE_PER_USER[$user_id] + bytes)) + log_message "Updated user $user_id with $bytes bytes." + done <<< "$usage_data" + + log_message "Finished processing network usage." +} + +####################################### +# Save network usage data to the database. +# This function constructs an SQL query from the TOTAL_NETWORK_USAGE_PER_USER +# associative array to insert data into the specified database table. It executes +# this SQL query using the 'mysql' command-line tool, logs the operation, and +# handles any errors that occur. +# +# Globals: +# TOTAL_NETWORK_USAGE_PER_USER - Associative array containing user_ids as keys +# and network usage data as values. +# TABLE - Name of the database table where data is to be inserted. +# LOG_FILE - Path to the log file for logging operation status. +# +# Arguments: +# None +# +# Outputs: +# Writes network usage data to the database. +# Logs messages to LOG_FILE detailing the outcome of the operation. +# +# Returns: +# Returns 0 on successful data insertion, 1 on failure. +# +# Usage: +# save_network_usage_to_database +# if [ $? -ne 0 ]; then +# echo "Failed to save data to the database." +# fi +####################################### +save_network_usage_to_database() { + log_message "Attempting to save network usage to database." + + local sql_values=() + local user_id bytes + + # Check if there's data to process + if [ ${#TOTAL_NETWORK_USAGE_PER_USER[@]} -eq 0 ]; then + log_error "Error: No data to insert into the database." + return 1 + fi + + # Prepare SQL values + for user_id in "${!TOTAL_NETWORK_USAGE_PER_USER[@]}"; do + bytes=${TOTAL_NETWORK_USAGE_PER_USER[$user_id]} + sql_values+=("(${user_id}, 2, '${START_TIME}', '${END_TIME}', ${bytes})") + done + + # Construct SQL query + local sql="INSERT INTO billing.$TABLE (user_id, resource_spec_id, usage_start_time, usage_end_time, usage_amount) VALUES " + sql+=$(IFS=','; echo "${sql_values[*]}") + sql+=";" + + # Execute query and handle errors + if ! mysql --login-path=billing -e "$sql"; then + log_error "Error: Failed to insert data into $TABLE: $result" + return 1 + fi + + log_message "Succesfully saved network usage into $TABLE." +} + +####################################### +# Main function to orchestrate the network usage data collection process. +# This function initiates by logging the start of the data collection, retrieves +# a list of hosts from the Slurm Workload Manager, and iterates over each host to +# collect and process network usage data. It handles errors at each critical step, +# logs appropriate messages for actions and errors, and finally, saves the processed +# data to a database. The function concludes by logging the completion of the process. +# +# Globals: +# TOTAL_NETWORK_USAGE_PER_USER - Associative array updated with network usage data per user. +# LOG_FILE - Path to the log file where operation logs are stored. +# +# Arguments: +# None +# +# Outputs: +# Logs various operational messages and errors to LOG_FILE. +# Updates the TOTAL_NETWORK_USAGE_PER_USER associative array. +# +# Returns: +# Returns 0 on successful completion of all operations, 1 if any critical operation fails. +# +# Usage: +# main +# if [ $? -ne 0 ]; then +# echo "Network data collection process encountered an error." +# fi +####################################### +main() { + log_message "Starting network usage data collection." + + local -r hosts=$(sudo sinfo -S "%n" -o "%n" | tail -n +2) + + # Check if the hosts string is empty + if [ -z "$hosts" ]; then + log_error "Error: No hosts provided for processing. Exiting." + exit 1 + fi + + # Iterate over each host to gather network usage data. If we have data then process it. + for host in ${hosts[@]}; do + log_message "Collecting data from host: $host" + usage_per_user=$(get_network_usage_per_user "$host") + + # Error handling + if [ $? -ne 0 ]; then + log_error "Error: Failed to retrieve network usage data from host $host." + continue + fi + + # Check if the result is empty + if [ -z "$usage_per_user" ]; then + log_error "Error: No data received from host: $host" + continue + fi + + log_message "Processing data for host: $host" + if ! process_network_usage "$usage_per_user"; then + log_error "Error: Failed to process data for host: $host" + log_error "Exiting." + continue + fi + done + + log_message "Saving accumulated network usage data to the database." + if ! save_network_usage_to_database; then + log_error "Error: Failed to save accumulated network usage data into the database" + log_error "Exiting." + exit 1 + fi + + log_message "Data collection and storage complete." +} + +# Start the script +main diff --git a/playbooks/roles/billing/tasks/main.yml b/playbooks/roles/billing/tasks/main.yml new file mode 100644 index 00000000..38656e2e --- /dev/null +++ b/playbooks/roles/billing/tasks/main.yml @@ -0,0 +1,99 @@ +--- +- name: Ensure billing directory exists + file: + path: "/opt/oci-hpc/billing" + state: directory + owner: '{{ ansible_user }}' + group: '{{ ansible_user }}' + +- name: Copy files + become: true + copy: + src: '{{ item }}' + dest: '/opt/oci-hpc/billing/{{ item }}' + force: no + owner: '{{ ansible_user }}' + group: '{{ ansible_user }}' + with_items: + - billing.sql + +- name: Copy scripts + become: true + copy: + src: '{{ item }}' + dest: '/opt/oci-hpc/billing/{{ item }}' + force: no + owner: '{{ ansible_user }}' + group: '{{ ansible_user }}' + mode: 0755 + with_items: + - filesystem.sh + - gpu.sh + - network.sh + - invoice.sh + +- name: Install pexpect + become: true + shell: "/usr/bin/pip install --trusted-host pypi.org --trusted-host files.pythonhosted.org --trusted-host pypi.python.org pexpect" + +- name: Check if MySQL authentication credentials have been set + shell: "mysql_config_editor print --all | grep billing" + register: billing_credentials_exist_result + ignore_errors: true + +- name: Set a fact based on whether MySQL credentials have been set + set_fact: + billing_credentials_are_set: "{{ 'billing' in billing_credentials_exist_result.stdout }}" + +- name: Add MySQL authentication credentials for billing database + expect: + command: /bin/bash -c "mysql_config_editor set --login-path=billing --host={{ billing_mysql_ip }} --user={{ billing_mysql_db_admin_username }} --password" + responses: + "Enter password:": "{{ billing_mysql_db_admin_password }}\n" + when: not billing_credentials_are_set + +- name: Check if database exists + shell: "mysql --login-path=billing -e 'SHOW DATABASES LIKE \"billing\";'" + register: db_exists_result + ignore_errors: true + +- name: Set a fact based on the database existence + set_fact: + billing_database_exists: "{{ 'billing' in db_exists_result.stdout }}" + +- name: Create billing database + shell: "mysql --login-path=billing < /opt/oci-hpc/billing/billing.sql" + when: not billing_database_exists + +- name: Create a billing filesystem usage cron file under /etc/cron.d + cron: + name: Collect filesystem usage for billing + minute: "0" + user: '{{ ansible_user }}' + job: "/opt/oci-hpc/billing/filesystem.sh" + disabled: true + +- name: Create a billing gpu usage cron file under /etc/cron.d + cron: + name: Collect gpu usage for billing + minute: "0" + hour: "0" + user: '{{ ansible_user }}' + job: "/opt/oci-hpc/billing/gpu.sh" + disabled: true + +- name: Create a network egress usage cron file under /etc/cron.d + cron: + name: network egress usage for billing + minute: "0" + user: '{{ ansible_user }}' + job: "/opt/oci-hpc/billing/network.sh" + disabled: true + +- name: Create example invoice cron file under /etc/cron.d + cron: + name: "Example: Generate invoice for {account_name}" + user: '{{ ansible_user }}' + job: "/opt/oci-hpc/billing/invoice.sh --verbose --account \"{account_name}\" | sudo tee /data/{account_name}/invoices/{account_name}_invoices.csv >/dev/null" + + diff --git a/playbooks/roles/iptables/tasks/main.yml b/playbooks/roles/iptables/tasks/main.yml new file mode 100644 index 00000000..10e5a65f --- /dev/null +++ b/playbooks/roles/iptables/tasks/main.yml @@ -0,0 +1,43 @@ +--- +- name: Install iptables services for persistence + vars: + package_name: + - iptables-services + include_role: + name: safe_yum + +- name: Start iptables services + ansible.builtin.service: + name: "iptables" + enabled: true + state: started + +- name: Setup iptables chains and rules for billing + become: true + block: + # Ansible 2.9 doesn't allow for chain management so we need to create the chain ourselves + - name: Create user-defined chain + shell: iptables -N USER_TRAFFIC + ignore_errors: true + + - name: Insert USER_TRAFFIC chain to OUTPUT + ansible.builtin.iptables: + chain: OUTPUT + jump: USER_TRAFFIC + action: insert + + - name: Append rule to USER_TRAFFIC chain + ansible.builtin.iptables: + table: filter + chain: USER_TRAFFIC + destination: 172.16.0.0/21 + jump: RETURN + action: append + + # Save the rules + - name: Save iptables rules + shell: iptables-save | tee /etc/sysconfig/iptables + + - name: Save ip6tables rules + shell: ip6tables-save | tee /etc/sysconfig/ip6tables + when: billing | default(false) | bool \ No newline at end of file diff --git a/playbooks/site.yml b/playbooks/site.yml index a0911d88..38e685ef 100644 --- a/playbooks/site.yml +++ b/playbooks/site.yml @@ -38,6 +38,7 @@ - etc-hosts - boot-volume - mpivars + - iptables - hosts: all become: true @@ -284,6 +285,9 @@ when: autoscaling_monitoring|default(false)|bool - include_role: name: cron + - include_role: + name: billing + when: billing|default(false)|bool - hosts: compute diff --git a/schema.yaml b/schema.yaml index 037e685b..e761ee77 100755 --- a/schema.yaml +++ b/schema.yaml @@ -171,7 +171,12 @@ variableGroups: - ${pyxis} - ${pam} - ${sacct_limits} - + - title: "Billing" + variables: + - ${billing} + - ${billing_shape_name} + - ${billing_mysql_db_admin_username} + - ${billing_mysql_db_admin_password} - title: "Hidden" variables: - ${region} @@ -1579,3 +1584,52 @@ variables: and: - ${use_marketplace_image_login} - ${login_node} + + old_marketplace_listing_login: + type: enum + title: "Image version" + description: "Marketplace listing to use" + required: true + enum: + - "1. Oracle Linux 7.9 OFED 5.3-1.0.0.1 RHCK 20210607" + - "2. Oracle Linux 7.8 OFED 5.0-1.0.0.0 UEK 20200826" + - "3. Oracle Linux 7.7 OFED 4.4-2.0.7.0 UEK 20200229" + - "4. Oracle Linux 7.9 OFED 5.0-2.1.8.0 RHCK 20210709" + default: "4. Oracle Linux 7.9 OFED 5.0-2.1.8.0 RHCK 20210709" + visible: + and: + - ${use_marketplace_image_login} + - ${use_old_marketplace_image_login} + - not: + - ${use_standard_image_login} + + billing: + type: boolean + title: "Enable billing" + default: false + description: "Enable billing for the cluster." + visible: true + + billing_shape_name: + type: string + title: "Billing Shape Name" + default: "MySQL.VM.Standard.E4.1.8GB" + required: true + description: "Billing Shape Name" + visible: ${billing} + + billing_mysql_db_admin_username: + type: string + title: "MySQL Billing Admin DB username" + default: "opc" + required: true + description: "" + visible: ${billing} + + billing_mysql_db_admin_password: + type: string + title: "MySQL Billing Admin DB password" + default: "Billing1234!" + description: "" + required: true + visible: ${billing} \ No newline at end of file diff --git a/slurm_ha.tf b/slurm_ha.tf index bc3d04cd..d11fc7b3 100644 --- a/slurm_ha.tf +++ b/slurm_ha.tf @@ -249,7 +249,11 @@ resource "null_resource" "cluster_backup" { region = var.region, tenancy_ocid = var.tenancy_ocid, api_fingerprint = var.api_fingerprint, - api_user_ocid = var.api_user_ocid + api_user_ocid = var.api_user_ocid, + billing = var.billing, + billing_mysql_db_admin_username = var.billing_mysql_db_admin_username, + billing_mysql_db_admin_password = var.billing_mysql_db_admin_password, + billing_mysql_ip = var.billing ? oci_mysql_mysql_db_system.billing_mysql_db_system[0].ip_address : "" }) destination = "/opt/oci-hpc/playbooks/inventory" @@ -387,7 +391,11 @@ resource "null_resource" "cluster_backup" { bastion_username = var.bastion_username, compute_username = var.compute_username, use_multiple_ads = var.use_multiple_ads, - use_compute_agent = var.use_compute_agent + use_compute_agent = var.use_compute_agent, + billing = var.billing, + billing_mysql_db_admin_username = var.billing_mysql_db_admin_username, + billing_mysql_db_admin_password = var.billing_mysql_db_admin_password, + billing_mysql_ip = var.billing ? oci_mysql_mysql_db_system.billing_mysql_db_system[0].ip_address : "" }) destination = "/opt/oci-hpc/conf/variables.tf" diff --git a/variables.tf b/variables.tf index 20c4d9ca..8c5c4518 100755 --- a/variables.tf +++ b/variables.tf @@ -224,6 +224,26 @@ variable "monitoring_shape_name" { default = "MySQL.VM.Standard.E3.1.16GB" } +variable "billing" { + type= bool + default = false +} + +variable "billing_mysql_db_admin_username" { + type = string + default = "opc" +} + +variable "billing_mysql_db_admin_password" { + type = string + default = "Billing1234!" +} + +variable "billing_shape_name" { + type = string + default = "MySQL.VM.Standard.E4.1.8GB" +} + variable "admin_username" { type = string default = "admin"