Skip to content

Commit

Permalink
add setup script for dat file download
Browse files Browse the repository at this point in the history
  • Loading branch information
microcassidy committed Jul 21, 2023
1 parent 8f54469 commit 25e9418
Show file tree
Hide file tree
Showing 262 changed files with 76 additions and 2 deletions.
77 changes: 75 additions & 2 deletions rust/tests/dat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,76 @@ use serde::Deserialize;
use std::path::Path;
use std::sync::Arc;
pub type TestResult = Result<(), Box<dyn std::error::Error + 'static>>;
use std::sync::Once;

static INIT: Once = Once::new();

fn initialize() {
INIT.call_once(setup::run);
}

pub mod setup {
//! Build script for DAT
use std::fs::File;
use std::io::{BufReader, BufWriter, Write};
use std::path::Path;

use flate2::read::GzDecoder;
use tar::Archive;

const DAT_EXISTS_FILE_CHECK: &str = "tests/data/dat/v0.0.2/.done";
const VERSION: &str = "0.0.2";
pub const OUTPUT_FOLDER: &str = "tests/data/dat/v0.0.2";

pub fn run() {
if dat_exists() {
return;
}

let tarball_data = download_dat_files();
extract_tarball(tarball_data);
write_done_file();
}

fn dat_exists() -> bool {
Path::new(DAT_EXISTS_FILE_CHECK).exists()
}

fn download_dat_files() -> Vec<u8> {
let tarball_url = format!(
"https://github.com/delta-incubator/dat/releases/download/v{version}/deltalake-dat-v{version}.tar.gz",
version = VERSION
);

let response = ureq::get(&tarball_url).call().unwrap();
let mut tarball_data: Vec<u8> = Vec::new();
response
.into_reader()
.read_to_end(&mut tarball_data)
.unwrap();

tarball_data
}

fn extract_tarball(tarball_data: Vec<u8>) {
let tarball = GzDecoder::new(BufReader::new(&tarball_data[..]));
let mut archive = Archive::new(tarball);
std::fs::create_dir_all(OUTPUT_FOLDER).expect("Failed to create output directory");
archive
.unpack(OUTPUT_FOLDER)
.expect("Failed to unpack tarball");
}

fn write_done_file() {
let mut done_file = BufWriter::new(
File::create(DAT_EXISTS_FILE_CHECK).expect("Failed to create .done file"),
);
write!(done_file, "done").expect("Failed to write .done file");
}
}

/// Utility for comparing a delta table
/// with a dataframe.
async fn deltaeq(ctx: &SessionContext, delta_ctx_name: &str, expected: DataFrame) -> bool {
let delta_df = ctx.table(delta_ctx_name).await.unwrap();
let delta_df_count = delta_df.clone().count().await.unwrap();
Expand All @@ -31,8 +100,10 @@ macro_rules! dat_test {
$(
#[tokio::test]
async fn $test_name() -> TestResult {
initialize();
let test_case = Path::new($test);
let root = Path::new("tests/data/dat-data/v0.0.2/reader_tests/generated").join(test_case);
let root = &format!("{output_folder}/out/reader_tests/generated", output_folder=setup::OUTPUT_FOLDER);
let root = Path::new(root).join(test_case);
let actual_path = root.join(Path::new("delta"));
let expected_path_root = root.join(Path::new("expected"));
let actual = open_table(&actual_path.to_str().unwrap()).await?;
Expand All @@ -56,7 +127,9 @@ async fn $test_name() -> TestResult {
let expected_metadata: TableVersionMetadata = serde_json::from_reader(expected_metadata_rdr)?;
let expected = ctx.read_parquet(expected_path.to_str().unwrap(), ParquetReadOptions::default()).await?;
let mut actual = open_table(&actual_path.to_str().unwrap()).await?;
actual.load_version(version).await?;
if actual.version() != version{
actual.load_version(version).await?;
}
assert!(expected_metadata.version == actual.version());
ctx.register_table("actual", Arc::new(actual))?;
assert!(deltaeq(&ctx, "actual", expected).await);
Expand Down
1 change: 1 addition & 0 deletions rust/tests/data/dat/v0.0.2/.done
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
done
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

0 comments on commit 25e9418

Please sign in to comment.