From 128e8fc2f0aa7bc6a159ce6eb160d36a8f2fefbe Mon Sep 17 00:00:00 2001 From: Alex Biehl Date: Tue, 15 Oct 2024 09:09:40 +0200 Subject: [PATCH] Serialise Texts without going through ByteString --- src/Data/Text.hs | 17 +++++++++++------ src/Data/Text/Lazy.hs | 7 +++++-- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/src/Data/Text.hs b/src/Data/Text.hs index 790e9d1a..05f9eb71 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -235,6 +235,7 @@ import qualified Data.Text.Array as A import qualified Data.List as L hiding (head, tail) import qualified Data.List.NonEmpty as NonEmptyList import Data.Binary (Binary(get, put)) +import Data.Binary.Put (putBuilder) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) @@ -245,7 +246,7 @@ import Data.Text.Internal.Measure (measure_off) import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr3, ord2, ord3, ord4) import qualified Data.Text.Internal.Fusion as S import qualified Data.Text.Internal.Fusion.Common as S -import Data.Text.Encoding (decodeUtf8', encodeUtf8) +import Data.Text.Encoding (decodeUtf8', encodeUtf8Builder) import Data.Text.Internal.Fusion (stream, reverseStream, unstream) import Data.Text.Internal.Private (span_) import Data.Text.Internal (Text(..), StrictText, empty, firstf, mul, safe, text, append, pack) @@ -394,7 +395,11 @@ instance NFData Text where rnf !_ = () -- | @since 1.2.1.0 instance Binary Text where - put t = put (encodeUtf8 t) + put t = do + -- This needs to be in sync with the Binary instance for ByteString + -- in the binary package. + put (lengthWord8 t) + putBuilder (encodeUtf8Builder t) get = do bs <- get case decodeUtf8' bs of @@ -556,7 +561,7 @@ null (Text _arr _off len) = len <= 0 {-# INLINE [1] null #-} -{-# RULES +{-# RULES "TEXT null/empty -> True" null empty = True #-} @@ -1275,7 +1280,7 @@ take :: Int -> Text -> Text take n t@(Text arr off len) | n <= 0 = empty | n >= len || m >= len || m < 0 = t - | otherwise = Text arr off m + | otherwise = Text arr off m where m = measureOff n t {-# INLINE [1] take #-} @@ -1325,7 +1330,7 @@ drop :: Int -> Text -> Text drop n t@(Text arr off len) | n <= 0 = t | n >= len || m >= len || m < 0 = empty - | otherwise = Text arr (off+m) (len-m) + | otherwise = Text arr (off+m) (len-m) where m = measureOff n t {-# INLINE [1] drop #-} @@ -1434,7 +1439,7 @@ splitAt :: Int -> Text -> (Text, Text) splitAt n t@(Text arr off len) | n <= 0 = (empty, t) | n >= len || m >= len || m < 0 = (t, empty) - | otherwise = (Text arr off m, Text arr (off+m) (len-m)) + | otherwise = (Text arr off m, Text arr (off+m) (len-m)) where m = measureOff n t diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index 9c0f0dd6..0dde589c 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -223,6 +223,7 @@ import Data.Char (isSpace) import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf), constrIndex, Constr, mkConstr, DataType, mkDataType, Fixity(Prefix)) import Data.Binary (Binary(get, put)) +import Data.Binary.Put (putBuilder) import Data.List.NonEmpty (NonEmpty(..)) import qualified Data.List.NonEmpty as NE import Data.Monoid (Monoid(..)) @@ -241,7 +242,7 @@ import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldlChunks, import Data.Text.Internal (firstf, safe, text) import Data.Text.Internal.Reverse (reverseNonEmpty) import Data.Text.Internal.Transformation (mapNonEmpty, toCaseFoldNonEmpty, toLowerNonEmpty, toUpperNonEmpty, filter_) -import Data.Text.Lazy.Encoding (decodeUtf8', encodeUtf8) +import Data.Text.Lazy.Encoding (decodeUtf8', encodeUtf8Builder) import Data.Text.Internal.Lazy.Search (indices) import qualified GHC.CString as GHC import qualified GHC.Exts as Exts @@ -352,7 +353,9 @@ instance NFData Text where -- | @since 1.2.1.0 instance Binary Text where - put t = put (encodeUtf8 t) + put t = do + put (foldlChunks (\n c -> n + T.lengthWord8 c ) 0 t) + putBuilder (encodeUtf8Builder t) get = do bs <- get case decodeUtf8' bs of