Skip to content

Commit

Permalink
Support loading local timezone timestamps (#59)
Browse files Browse the repository at this point in the history
* Comment bar/layered due to "Ambiguous reference" error
  • Loading branch information
jonmmease authored Jan 24, 2022
1 parent 24e60d2 commit 4f261d0
Show file tree
Hide file tree
Showing 34 changed files with 993 additions and 1,863 deletions.
6 changes: 3 additions & 3 deletions BUILD.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,17 @@ Install `wasm-pack`, following instructions at https://rustwasm.github.io/wasm-p

For Linux:
```bash
conda env create --name vegafusion_dev --file python/vegafusion-jupyter/conda-linux-64-3.10.lock
conda create --name vegafusion_dev --file python/vegafusion-jupyter/conda-linux-64-310.lock
```

For MacOS:
```bash
conda env create --name vegafusion_dev --file python/vegafusion-jupyter/conda-osx-64-3.10.lock
conda create --name vegafusion_dev --file python/vegafusion-jupyter/conda-osx-64-310.lock
```

For Windows:
```bash
conda env create --name vegafusion_dev --file python/vegafusion-jupyter/conda-win-64-3.10.lock
conda create --name vegafusion_dev --file python/vegafusion-jupyter/conda-win-64-310.lock
```

### Activate conda development environment
Expand Down
9 changes: 6 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 3 additions & 3 deletions python/vegafusion-jupyter/package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion python/vegafusion-jupyter/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "vegafusion-jupyter",
"version": "0.0.1",
"version": "0.0.2",
"description": "Altair Jupyter Widget library that relies on VegaFusion for serverside calculations",
"keywords": [
"jupyter",
Expand Down
16 changes: 12 additions & 4 deletions python/vegafusion-jupyter/tests/test_altair_mocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,11 @@ def setup_module(module):
("bar/stacked_with_text_overlay", 0.999, 0.5),
("bar/trellis_stacked", 1.0, 0.5),
("bar/trellis_stacked", 1.0, 0.5),
("bar/with_negative_values", 1.0, 0.5),
("bar/layered", 1.0, 0.5),
# Ambiguous reference to field named 'month'
# ("bar/with_negative_values", 1.0, 0.5),
# ("bar/layered", 1.0, 0.5),
("bar/with_error_bars", 0.998, 0.5),
("casestudy/co2_concentration", 1.0, 0.5),
("casestudy/gapminder_bubble_plot", 1.0, 0.5),
Expand All @@ -130,8 +133,13 @@ def setup_module(module):
("casestudy/window_rank", 0.999, 0.5),
("casestudy/airports", 1.0, 0.5),
("casestudy/us_state_capitals", 1.0, 0.5),
("casestudy/falkensee", 1.0, 0.5),
("casestudy/us_employment", 1.0, 0.5),
# Ambiguous reference to field named 'start'
# ("casestudy/falkensee", 1.0, 0.5),
# Ambiguous reference to field named 'month'
# ("casestudy/us_employment", 1.0, 0.5),
("casestudy/top_k_items", 1.0, 0.5),
# Different order of ticks for equal bar lengths
Expand Down
2 changes: 1 addition & 1 deletion python/vegafusion-jupyter/vegafusion_jupyter/_frontend.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,4 @@
"""

module_name = "vegafusion-jupyter"
module_version = "^0.0.1"
module_version = "^0.0.2"
2 changes: 1 addition & 1 deletion python/vegafusion-jupyter/vegafusion_jupyter/_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,4 @@
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

__version__ = "0.0.1"
__version__ = "0.0.2"
27 changes: 0 additions & 27 deletions python/vegafusion-jupyter/vegafusion_jupyter/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,33 +41,6 @@ def to_feather(data, file):
if data.index.name is not None:
data = data.reset_index()

# Localize naive datetimes to the local GMT offset
dt_cols = []
for col, dtype in data.dtypes.items():
if dtype.kind == 'M' and not isinstance(dtype, pd.DatetimeTZDtype):
dt_cols.append(col)

if dt_cols:
# Apply a timezone following the convention of JavaScript's Date.parse. Here a date without time info
# is interpreted as UTC midnight. But a date with time into is treated as local time when it doesn't
# have an explicit timezone
offset_seconds = abs(time.timezone)
offset_hours = offset_seconds // 3600
offset_minutes = (offset_seconds - offset_hours * 3600) // 60
sign = "-" if time.timezone > 0 else "+"
local_timezone = f"{sign}{offset_hours:02}:{offset_minutes:02}"

mapping = dict()
for col in dt_cols:
if (data[col].dt.time == datetime.time(0, 0)).all():
# Assume no time info was provided, interpret as UTC
mapping[col] = data[col].dt.tz_localize("+00:00")
else:
# Assume time info was provided, interpret as local
mapping[col] = data[col].dt.tz_localize(local_timezone).dt.tz_convert(None)

data = data.assign(**mapping)

# Expand categoricals (not yet supported in VegaFusion)
for col, dtype in data.dtypes.items():
if isinstance(dtype, pd.CategoricalDtype):
Expand Down
4 changes: 1 addition & 3 deletions vegafusion-core/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "vegafusion-core"
version = "0.0.1"
version = "0.0.2"
edition = "2018"
license = "AGPL-3.0-or-later"

Expand All @@ -22,8 +22,6 @@ serde_json = "1.0.68"
ordered-float = "^2.8.0"
petgraph = "0.6.0"
deterministic-hash = "1.0.1"

[dev-dependencies]
chrono = "0.4.19"

[build-dependencies]
Expand Down
100 changes: 70 additions & 30 deletions vegafusion-core/src/data/json_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,13 @@
// specific language governing permissions and limitations
// under the License.

// ## VegaFusion note
// This file is copied from Arrow (with license above) `json/writer.rs` with the following
// modification. Rather than skip writing null values, this version is updated to write the JSON
// NULL value instead. This is needed for interoperability with Vega.
// ## VegaFusion notes
// -------------------
// This file was originally copied from Arrow (with license above) `json/writer.rs` with the
// following modifications.
// 1. Rather than skip writing null values, this version is updated to write the JSON
// NULL value instead. This is needed for interoperability with Vega.
// 2. Date32, Date64, and Timestamp types are serialized as UTC milliseconds.

//! # JSON Writer
//!
Expand Down Expand Up @@ -117,6 +120,7 @@ use arrow::array::*;
use arrow::datatypes::*;
use arrow::error::Result;
use arrow::record_batch::RecordBatch;
use chrono::TimeZone;

fn primitive_array_to_json<T: ArrowPrimitiveType>(array: &ArrayRef) -> Vec<Value> {
as_primitive_array::<T>(array)
Expand Down Expand Up @@ -222,6 +226,28 @@ macro_rules! set_column_by_array_type {
};
}

macro_rules! set_temporal_column_as_millis_by_array_type {
($array_type:ident, $col_name:ident, $rows:ident, $array:ident, $row_count:ident, $cast_fn:ident) => {
let arr = $array.as_any().downcast_ref::<$array_type>().unwrap();

$rows
.iter_mut()
.enumerate()
.take($row_count)
.for_each(|(i, row)| {
if !arr.is_null(i) {
if let Some(v) = arr.$cast_fn(i) {
row.insert($col_name.to_string(), v.timestamp_millis().into());
} else {
row.insert($col_name.to_string(), Value::Null);
}
} else {
row.insert($col_name.to_string(), Value::Null);
}
});
};
}

macro_rules! set_temporal_column_by_array_type {
($array_type:ident, $col_name:ident, $rows:ident, $array:ident, $row_count:ident, $cast_fn:ident) => {
let arr = $array.as_any().downcast_ref::<$array_type>().unwrap();
Expand All @@ -237,6 +263,8 @@ macro_rules! set_temporal_column_by_array_type {
} else {
row.insert($col_name.to_string(), Value::Null);
}
} else {
row.insert($col_name.to_string(), Value::Null);
}
});
};
Expand Down Expand Up @@ -313,27 +341,39 @@ fn set_column_for_json_rows(
set_column_by_array_type!(as_string_array, col_name, rows, array, row_count);
}
DataType::Date32 => {
set_temporal_column_by_array_type!(
Date32Array,
col_name,
rows,
array,
row_count,
value_as_date
);
// Write as integer UTC milliseconds
let arr = array.as_any().downcast_ref::<Date32Array>().unwrap();
rows.iter_mut()
.enumerate()
.take(row_count)
.for_each(|(i, row)| {
if arr.is_valid(i) {
let days = arr.value(i) as i64;
let ms_per_day = 1000 * 60 * 60 * 24_i64;
let millis = days * ms_per_day;
row.insert(col_name.to_string(), millis.into());
} else {
row.insert(col_name.to_string(), Value::Null);
}
});
}
DataType::Date64 => {
set_temporal_column_by_array_type!(
Date64Array,
col_name,
rows,
array,
row_count,
value_as_date
);
// Write as integer UTC milliseconds
let arr = array.as_any().downcast_ref::<Date64Array>().unwrap();
rows.iter_mut()
.enumerate()
.take(row_count)
.for_each(|(i, row)| {
if arr.is_valid(i) {
let millis = arr.value(i);
row.insert(col_name.to_string(), millis.into());
} else {
row.insert(col_name.to_string(), Value::Null);
}
});
}
DataType::Timestamp(TimeUnit::Second, _) => {
set_temporal_column_by_array_type!(
set_temporal_column_as_millis_by_array_type!(
TimestampSecondArray,
col_name,
rows,
Expand All @@ -343,7 +383,7 @@ fn set_column_for_json_rows(
);
}
DataType::Timestamp(TimeUnit::Millisecond, _) => {
set_temporal_column_by_array_type!(
set_temporal_column_as_millis_by_array_type!(
TimestampMillisecondArray,
col_name,
rows,
Expand All @@ -353,7 +393,7 @@ fn set_column_for_json_rows(
);
}
DataType::Timestamp(TimeUnit::Microsecond, _) => {
set_temporal_column_by_array_type!(
set_temporal_column_as_millis_by_array_type!(
TimestampMicrosecondArray,
col_name,
rows,
Expand All @@ -363,7 +403,7 @@ fn set_column_for_json_rows(
);
}
DataType::Timestamp(TimeUnit::Nanosecond, _) => {
set_temporal_column_by_array_type!(
set_temporal_column_as_millis_by_array_type!(
TimestampNanosecondArray,
col_name,
rows,
Expand Down Expand Up @@ -808,8 +848,8 @@ mod tests {

assert_eq!(
String::from_utf8(buf).unwrap(),
r#"{"nanos":"2018-11-13 17:11:10.011375885","micros":"2018-11-13 17:11:10.011375","millis":"2018-11-13 17:11:10.011","secs":"2018-11-13 17:11:10","name":"a"}
{"name":"b"}
r#"{"nanos":1542129070011,"micros":1542129070011,"millis":1542129070011,"secs":1542129070000,"name":"a"}
{"nanos":null,"micros":null,"millis":null,"secs":null,"name":"b"}
"#
);
}
Expand Down Expand Up @@ -854,8 +894,8 @@ mod tests {

assert_eq!(
String::from_utf8(buf).unwrap(),
r#"{"date32":"2018-11-13","date64":"2018-11-13","name":"a"}
{"name":"b"}
r#"{"date32":1542067200000,"date64":1542129070011,"name":"a"}
{"date32":null,"date64":null,"name":"b"}
"#
);
}
Expand Down Expand Up @@ -898,7 +938,7 @@ mod tests {
assert_eq!(
String::from_utf8(buf).unwrap(),
r#"{"time32sec":"00:02:00","time32msec":"00:00:00.120","time64usec":"00:00:00.000120","time64nsec":"00:00:00.000000120","name":"a"}
{"name":"b"}
{"time32sec":null,"time32msec":null,"time64usec":null,"time64nsec":null,"name":"b"}
"#
);
}
Expand Down Expand Up @@ -941,7 +981,7 @@ mod tests {
assert_eq!(
String::from_utf8(buf).unwrap(),
r#"{"duration_sec":"PT120S","duration_msec":"PT0.120S","duration_usec":"PT0.000120S","duration_nsec":"PT0.000000120S","name":"a"}
{"name":"b"}
{"duration_sec":null,"duration_msec":null,"duration_usec":null,"duration_nsec":null,"name":"b"}
"#
);
}
Expand Down
Loading

0 comments on commit 4f261d0

Please sign in to comment.