diff --git a/docs/src/split.md b/docs/src/split.md index f7858910..bb919264 100644 --- a/docs/src/split.md +++ b/docs/src/split.md @@ -124,3 +124,14 @@ using MarketData tail(cl) tail(cl, 3) ``` + +## Splitting by period + +Splitting data by a given function, e.g. `Dates.day` into periods. + +```@repl +using TimeSeries +using MarketData + +split(cl, Dates.day) +``` \ No newline at end of file diff --git a/src/TimeSeries.jl b/src/TimeSeries.jl index 727e3be2..8addb07b 100644 --- a/src/TimeSeries.jl +++ b/src/TimeSeries.jl @@ -12,12 +12,12 @@ using Tables using PrettyTables: pretty_table export TimeArray, AbstractTimeSeries, - when, from, to, findwhen, timestamp, values, colnames, meta, head, tail, - lag, lead, diff, percentchange, moving, upto, - uniformspaced, uniformspace, dropnan, - basecall, - merge, collapse, - readtimearray, writetimearray + when, from, to, findwhen, timestamp, values, colnames, meta, head, tail, split, + lag, lead, diff, percentchange, moving, upto, + uniformspaced, uniformspace, dropnan, + basecall, + merge, collapse, + readtimearray, writetimearray # modify.jl export rename, rename! diff --git a/src/split.jl b/src/split.jl index 13688d1e..ca150b56 100644 --- a/src/split.jl +++ b/src/split.jl @@ -7,17 +7,17 @@ when(ta::TimeArray, period::Function, t::String) = # from, to ###################### -from(ta::TimeArray{T, N, D}, d::D) where {T, N, D} = +from(ta::TimeArray{T,N,D}, d::D) where {T,N,D} = length(ta) == 0 ? ta : - d < timestamp(ta)[1] ? ta : - d > timestamp(ta)[end] ? ta[1:0] : - ta[searchsortedfirst(timestamp(ta), d):end] + d < timestamp(ta)[1] ? ta : + d > timestamp(ta)[end] ? ta[1:0] : + ta[searchsortedfirst(timestamp(ta), d):end] -to(ta::TimeArray{T, N, D}, d::D) where {T, N, D} = +to(ta::TimeArray{T,N,D}, d::D) where {T,N,D} = length(ta) == 0 ? ta : - d < timestamp(ta)[1] ? ta[1:0] : - d > timestamp(ta)[end] ? ta : - ta[1:searchsortedlast(timestamp(ta), d)] + d < timestamp(ta)[1] ? ta[1:0] : + d > timestamp(ta)[end] ? ta : + ta[1:searchsortedlast(timestamp(ta), d)] ###### findall ################## @@ -43,7 +43,7 @@ findwhen(ta::TimeArray{Bool,1}) = timestamp(ta)[findall(values(ta))] end end - @generated function tail(ta::TimeArray{T,N}, n::Int=6) where {T,N} +@generated function tail(ta::TimeArray{T,N}, n::Int=6) where {T,N} new_values = (N == 1) ? :(values(ta)[start:end]) : :(values(ta)[start:end, :]) quote @@ -58,3 +58,37 @@ end Base.first(ta::TimeArray) = head(ta, 1) Base.last(ta::TimeArray) = tail(ta, 1) + + +""" + split(data::TimeSeries.TimeArray, period::Function) + +Split `data` by `period` function, returns a vector of `TimeSeries.TimeArray`. + +## Arguments + +- `data::TimeSeries.TimeArray`: Data to split +- `period::Function`: Function, e.g. `Dates.day` that is used to split the `data`. +""" +split(data::TimeSeries.TimeArray, period::Function) = map(i -> data[i], _split(data, period)) +function _split(data::TimeSeries.TimeArray, period::Function) + isempty(data) && return data + + m = length(data) + ts = TimeSeries.timestamp(data) + idx = UnitRange{Int}[] + sizehint!(idx, m) + + t0 = period(ts[1]) + j = 1 + for i in 1:(m-1) + t1 = period(ts[i+1]) + t0 == t1 && continue + push!(idx, j:i) + j = i + 1 + t0 = t1 + end + push!(idx, j:m) + + return idx +end \ No newline at end of file