diff --git a/Cargo.toml b/Cargo.toml index 007482bf6..4e88716dc 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -23,7 +23,6 @@ resolver = "2" [workspace.dependencies] arrow = { version = "53", features = ["ipc_compression"] } arrow-flight = { version = "53", features = ["flight-sql-experimental"] } -arrow-schema = { version = "53", default-features = false } clap = { version = "3", features = ["derive", "cargo"] } configure_me = { version = "0.4.0" } configure_me_codegen = { version = "0.4.4" } @@ -35,7 +34,6 @@ datafusion-proto-common = "42.0.0" object_store = "0.11" prost = "0.13" prost-types = "0.13" -sqlparser = "0.50" tonic = { version = "0.12" } tonic-build = { version = "0.12", default-features = false, features = [ "transport", @@ -53,12 +51,12 @@ futures = { version = "0.3" } log = { version = "0.4" } parking_lot = { version = "0.12" } tempfile = { version = "3" } -dashmap = { version = "5.4.0" } +dashmap = { version = "6.1" } async-trait = { version = "0.1.4" } -serde = { version = "1.0.136" } -num_cpus = { version = "1.13.0" } +serde = { version = "1.0" } tokio-stream = { version = "0.1" } parse_arg = { version = "0.1" } +url = { version = "2.5" } # cargo build --profile release-lto [profile.release-lto] diff --git a/ballista-cli/Cargo.toml b/ballista-cli/Cargo.toml index 891f5a7ce..ec1cb5c5e 100644 --- a/ballista-cli/Cargo.toml +++ b/ballista-cli/Cargo.toml @@ -37,7 +37,6 @@ datafusion-cli = { workspace = true } dirs = "5.0.1" env_logger = { workspace = true } mimalloc = { version = "0.1", default-features = false } -num_cpus = { workspace = true } rustyline = "14.0.0" tokio = { workspace = true, features = ["macros", "rt", "rt-multi-thread", "sync", "parking_lot"] } diff --git a/ballista/client/Cargo.toml b/ballista/client/Cargo.toml index 038c62c82..7a63dcefc 100644 --- a/ballista/client/Cargo.toml +++ b/ballista/client/Cargo.toml @@ -37,10 +37,9 @@ datafusion-proto = { workspace = true } futures = { workspace = true } log = { workspace = true } parking_lot = { workspace = true } -sqlparser = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true } -url = { version = "2.5" } +url = { workspace = true } [dev-dependencies] ballista-executor = { path = "../executor", version = "0.12.0" } diff --git a/ballista/client/src/context.rs b/ballista/client/src/context.rs index 453296c2b..109524aa2 100644 --- a/ballista/client/src/context.rs +++ b/ballista/client/src/context.rs @@ -20,10 +20,10 @@ use datafusion::arrow::datatypes::SchemaRef; use datafusion::execution::context::DataFilePaths; +use datafusion::sql::sqlparser::ast::Statement; use datafusion::sql::TableReference; use log::info; use parking_lot::Mutex; -use sqlparser::ast::Statement; use std::collections::HashMap; use std::sync::Arc; diff --git a/ballista/core/Cargo.toml b/ballista/core/Cargo.toml index 835682934..8e5cb608e 100644 --- a/ballista/core/Cargo.toml +++ b/ballista/core/Cargo.toml @@ -60,12 +60,10 @@ prost = { workspace = true } prost-types = { workspace = true } rand = { workspace = true } serde = { workspace = true, features = ["derive"] } -sqlparser = { workspace = true } tokio = { workspace = true } tokio-stream = { workspace = true, features = ["net"] } tonic = { workspace = true } -url = "2.2" - +url = { workspace = true } [dev-dependencies] tempfile = { workspace = true } diff --git a/ballista/core/src/error.rs b/ballista/core/src/error.rs index 95bee2bf1..cbdd90a71 100644 --- a/ballista/core/src/error.rs +++ b/ballista/core/src/error.rs @@ -25,10 +25,9 @@ use std::{ use crate::serde::protobuf::failed_task::FailedReason; use crate::serde::protobuf::{ExecutionError, FailedTask, FetchPartitionError, IoError}; -use datafusion::arrow::error::ArrowError; use datafusion::error::DataFusionError; +use datafusion::{arrow::error::ArrowError, sql::sqlparser::parser}; use futures::future::Aborted; -use sqlparser::parser; pub type Result = result::Result; diff --git a/ballista/executor/Cargo.toml b/ballista/executor/Cargo.toml index b04abd9d5..e1822e9c1 100644 --- a/ballista/executor/Cargo.toml +++ b/ballista/executor/Cargo.toml @@ -49,7 +49,6 @@ datafusion-proto = { workspace = true } futures = { workspace = true } log = { workspace = true } mimalloc = { version = "0.1", default-features = false, optional = true } -num_cpus ={ workspace = true } parking_lot = { workspace = true } tempfile = { workspace = true } tokio = { workspace = true, features = [ diff --git a/ballista/executor/src/executor_process.rs b/ballista/executor/src/executor_process.rs index a15bfadbd..d3e78bac2 100644 --- a/ballista/executor/src/executor_process.rs +++ b/ballista/executor/src/executor_process.rs @@ -171,7 +171,7 @@ pub async fn start_executor_process(opt: Arc) -> Result<( let concurrent_tasks = if opt.concurrent_tasks == 0 { // use all available cores if no concurrency level is specified - num_cpus::get() + std::thread::available_parallelism().unwrap().get() } else { opt.concurrent_tasks }; diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 8fddfb4ef..84820d48a 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -33,14 +33,12 @@ default = ["mimalloc"] snmalloc = ["snmalloc-rs"] [dependencies] -arrow-schema = { workspace = true } ballista = { path = "../ballista/client", version = "0.12.0" } datafusion = { workspace = true } datafusion-proto = { workspace = true } env_logger = { workspace = true } futures = { workspace = true } mimalloc = { version = "0.1", optional = true, default-features = false } -num_cpus = { workspace = true } rand = { workspace = true } serde = { workspace = true } serde_json = "1.0.78" diff --git a/benchmarks/src/bin/tpch.rs b/benchmarks/src/bin/tpch.rs index 068b61c2d..ac35b3f14 100644 --- a/benchmarks/src/bin/tpch.rs +++ b/benchmarks/src/bin/tpch.rs @@ -17,13 +17,13 @@ //! Benchmark derived from TPC-H. This is not an official TPC-H benchmark. -use arrow_schema::SchemaBuilder; use ballista::extension::SessionConfigExt; use ballista::prelude::{ SessionContextExt, BALLISTA_COLLECT_STATISTICS, BALLISTA_DEFAULT_BATCH_SIZE, BALLISTA_DEFAULT_SHUFFLE_PARTITIONS, BALLISTA_JOB_NAME, }; use datafusion::arrow::array::*; +use datafusion::arrow::datatypes::SchemaBuilder; use datafusion::arrow::util::display::array_value_to_string; use datafusion::common::{DEFAULT_CSV_EXTENSION, DEFAULT_PARQUET_EXTENSION}; use datafusion::datasource::listing::ListingTableUrl; @@ -987,7 +987,7 @@ impl BenchmarkRun { Self { benchmark_version: env!("CARGO_PKG_VERSION").to_owned(), datafusion_version: DATAFUSION_VERSION.to_owned(), - num_cpus: num_cpus::get(), + num_cpus: std::thread::available_parallelism().unwrap().get(), start_time: SystemTime::now() .duration_since(SystemTime::UNIX_EPOCH) .expect("current time is later than UNIX_EPOCH")