-
Notifications
You must be signed in to change notification settings - Fork 11
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
scripts to convert tests from old format to new format and vice-versa #97
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,3 +6,6 @@ pytest | |
pyvelox | ||
pyyaml | ||
snowflake | ||
ruamel.yaml | ||
deepdiff | ||
pytz |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
import os | ||
import re | ||
import shutil | ||
|
||
from ruamel.yaml import YAML | ||
from deepdiff import DeepDiff | ||
|
||
from convert_tests_to_new_format import convert_directory, load_test_file | ||
from convert_tests_to_old_format import convert_directory as convert_directory_roundtrip | ||
|
||
# Initialize the YAML handler with ruamel to ensure consistency in parsing and dumping | ||
yaml = YAML() | ||
yaml.default_flow_style = None | ||
|
||
|
||
def normalize_data(data): | ||
"""Normalize the data by removing spaces around values and quotes around strings.""" | ||
if isinstance(data, dict): | ||
return {k: normalize_data(v) for k, v in data.items()} | ||
elif isinstance(data, list): | ||
return [normalize_data(item) for item in data] | ||
elif isinstance(data, str): | ||
# Remove extra spaces in specific cases, like decimal<38,0> vs decimal<38, 0> | ||
data = re.sub(r"\s*,\s*", ",", data) # Remove spaces around commas | ||
if data.lower() == "null": | ||
return "null" | ||
return data | ||
else: | ||
return data # Return non-string values as the | ||
|
||
|
||
def compare_yaml_files(file1, file2): | ||
data1 = normalize_data(load_test_file(file1)) | ||
data2 = normalize_data(load_test_file(file2)) | ||
|
||
diff = DeepDiff( | ||
data1, data2, ignore_order=True, ignore_numeric_type_changes=True, view="text" | ||
) | ||
if diff: | ||
print(f"\nDifferences found in '{file1}' vs '{file2}':") | ||
print(diff) | ||
return not diff | ||
|
||
|
||
def compare_directories(original_dir, roundtrip_dir): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Document that this compares either format to a Substrait test format directory. |
||
count = 0 | ||
for root, _, files in os.walk(original_dir): | ||
for file_name in files: | ||
if file_name.endswith(".yaml"): | ||
original_file = os.path.join(root, file_name) | ||
relative_path = os.path.relpath(original_file, original_dir) | ||
roundtrip_file = os.path.join(roundtrip_dir, relative_path).replace( | ||
".test", ".yaml" | ||
) | ||
|
||
if not os.path.exists(roundtrip_file): | ||
print(f"File missing in roundtrip directory: {roundtrip_file}") | ||
count += 1 | ||
continue | ||
|
||
if not compare_yaml_files(original_file, roundtrip_file): | ||
count += 1 | ||
else: | ||
print(f"YAML content matches: {original_file} and {roundtrip_file}") | ||
return count | ||
|
||
|
||
def main(): | ||
# Directories | ||
initial_cases_dir = "../../cases" | ||
temp_dir = "./temp" | ||
intermediate_dir = f"{temp_dir}/substrait_cases" | ||
roundtrip_dir = f"{temp_dir}/cases" | ||
uri_prefix = ( | ||
"https://github.com/substrait-io/substrait/blob/main/extensions/substrait" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. By referring to the GitHub directory this test will fail when the test cases and functions change breaking the CI here. Probably better to refer to the tests in the local submodule (when the tests are available). |
||
) | ||
|
||
# Step 1: Convert from `../../cases` to `./temp/substrait_cases/` | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're converting from initial cases, the variable may change especially if you end up pulling those locations from the command line (so CI can control where they are located). |
||
convert_directory(initial_cases_dir, intermediate_dir, uri_prefix) | ||
|
||
# Step 2: Convert from `./temp/substrait_cases/` to `./temp/roundtrip_cases/` | ||
convert_directory_roundtrip(intermediate_dir, roundtrip_dir) | ||
|
||
# Step 3: Compare YAML content in `./cases` and `./temp/roundtrip_cases/` | ||
count = compare_directories(initial_cases_dir, roundtrip_dir) | ||
if count == 0: | ||
print("All YAML files match between original and roundtrip directories.") | ||
else: | ||
print( | ||
f"Differences found in {count} YAML files between original and roundtrip directories." | ||
) | ||
|
||
shutil.rmtree(temp_dir) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should this run on every PR?