From ae791f0b8494ae227eb7de3e7ab3da135ee0abbe Mon Sep 17 00:00:00 2001 From: Alex Biehl Date: Tue, 15 Oct 2024 09:09:40 +0200 Subject: [PATCH] Serialise Texts without going through ByteString --- src/Data/Text.hs | 17 +++++++++++------ src/Data/Text/Lazy.hs | 9 +++++++-- tests/Tests/Properties/Instances.hs | 15 ++++++++++++++- text.cabal | 1 + 4 files changed, 33 insertions(+), 9 deletions(-) diff --git a/src/Data/Text.hs b/src/Data/Text.hs index 790e9d1a..05f9eb71 100644 --- a/src/Data/Text.hs +++ b/src/Data/Text.hs @@ -235,6 +235,7 @@ import qualified Data.Text.Array as A import qualified Data.List as L hiding (head, tail) import qualified Data.List.NonEmpty as NonEmptyList import Data.Binary (Binary(get, put)) +import Data.Binary.Put (putBuilder) import Data.Monoid (Monoid(..)) import Data.Semigroup (Semigroup(..)) import Data.String (IsString(..)) @@ -245,7 +246,7 @@ import Data.Text.Internal.Measure (measure_off) import Data.Text.Internal.Encoding.Utf8 (utf8Length, utf8LengthByLeader, chr3, ord2, ord3, ord4) import qualified Data.Text.Internal.Fusion as S import qualified Data.Text.Internal.Fusion.Common as S -import Data.Text.Encoding (decodeUtf8', encodeUtf8) +import Data.Text.Encoding (decodeUtf8', encodeUtf8Builder) import Data.Text.Internal.Fusion (stream, reverseStream, unstream) import Data.Text.Internal.Private (span_) import Data.Text.Internal (Text(..), StrictText, empty, firstf, mul, safe, text, append, pack) @@ -394,7 +395,11 @@ instance NFData Text where rnf !_ = () -- | @since 1.2.1.0 instance Binary Text where - put t = put (encodeUtf8 t) + put t = do + -- This needs to be in sync with the Binary instance for ByteString + -- in the binary package. + put (lengthWord8 t) + putBuilder (encodeUtf8Builder t) get = do bs <- get case decodeUtf8' bs of @@ -556,7 +561,7 @@ null (Text _arr _off len) = len <= 0 {-# INLINE [1] null #-} -{-# RULES +{-# RULES "TEXT null/empty -> True" null empty = True #-} @@ -1275,7 +1280,7 @@ take :: Int -> Text -> Text take n t@(Text arr off len) | n <= 0 = empty | n >= len || m >= len || m < 0 = t - | otherwise = Text arr off m + | otherwise = Text arr off m where m = measureOff n t {-# INLINE [1] take #-} @@ -1325,7 +1330,7 @@ drop :: Int -> Text -> Text drop n t@(Text arr off len) | n <= 0 = t | n >= len || m >= len || m < 0 = empty - | otherwise = Text arr (off+m) (len-m) + | otherwise = Text arr (off+m) (len-m) where m = measureOff n t {-# INLINE [1] drop #-} @@ -1434,7 +1439,7 @@ splitAt :: Int -> Text -> (Text, Text) splitAt n t@(Text arr off len) | n <= 0 = (empty, t) | n >= len || m >= len || m < 0 = (t, empty) - | otherwise = (Text arr off m, Text arr (off+m) (len-m)) + | otherwise = (Text arr off m, Text arr (off+m) (len-m)) where m = measureOff n t diff --git a/src/Data/Text/Lazy.hs b/src/Data/Text/Lazy.hs index 9c0f0dd6..938bd436 100644 --- a/src/Data/Text/Lazy.hs +++ b/src/Data/Text/Lazy.hs @@ -223,6 +223,7 @@ import Data.Char (isSpace) import Data.Data (Data(gfoldl, toConstr, gunfold, dataTypeOf), constrIndex, Constr, mkConstr, DataType, mkDataType, Fixity(Prefix)) import Data.Binary (Binary(get, put)) +import Data.Binary.Put (putBuilder) import Data.List.NonEmpty (NonEmpty(..)) import qualified Data.List.NonEmpty as NE import Data.Monoid (Monoid(..)) @@ -241,7 +242,7 @@ import Data.Text.Internal.Lazy (Text(..), chunk, empty, foldlChunks, import Data.Text.Internal (firstf, safe, text) import Data.Text.Internal.Reverse (reverseNonEmpty) import Data.Text.Internal.Transformation (mapNonEmpty, toCaseFoldNonEmpty, toLowerNonEmpty, toUpperNonEmpty, filter_) -import Data.Text.Lazy.Encoding (decodeUtf8', encodeUtf8) +import Data.Text.Lazy.Encoding (decodeUtf8', encodeUtf8Builder) import Data.Text.Internal.Lazy.Search (indices) import qualified GHC.CString as GHC import qualified GHC.Exts as Exts @@ -352,7 +353,11 @@ instance NFData Text where -- | @since 1.2.1.0 instance Binary Text where - put t = put (encodeUtf8 t) + put t = do + -- This needs to be in sync with the Binary instance for ByteString + -- in the binary package. + put (foldlChunks (\n c -> n + T.lengthWord8 c) 0 t) + putBuilder (encodeUtf8Builder t) get = do bs <- get case decodeUtf8' bs of diff --git a/tests/Tests/Properties/Instances.hs b/tests/Tests/Properties/Instances.hs index 05eee39e..8f120653 100644 --- a/tests/Tests/Properties/Instances.hs +++ b/tests/Tests/Properties/Instances.hs @@ -6,6 +6,7 @@ module Tests.Properties.Instances ( testInstances ) where +import Data.Binary (encode, decodeOrFail) import Data.String (IsString(fromString)) import Test.QuickCheck import Test.Tasty (TestTree, testGroup) @@ -43,6 +44,16 @@ tl_mempty = mempty === (unpackS (mempty :: TL.Text)) t_IsString = fromString `eqP` (T.unpack . fromString) tl_IsString = fromString `eqP` (TL.unpack . fromString) +t_Binary s = + case decodeOrFail . encode $ (s :: T.Text) of + Left _ -> counterexample (show (T.unpack s)) (property False) + Right (_, _, s') -> s === s' + +tl_Binary s = + case decodeOrFail . encode $ (s :: TL.Text) of + Left _ -> counterexample (show (TL.unpack s)) (property False) + Right (_, _, s') -> s === s' + testInstances :: TestTree testInstances = testGroup "instances" [ @@ -65,5 +76,7 @@ testInstances = testProperty "t_mempty" t_mempty, testProperty "tl_mempty" tl_mempty, testProperty "t_IsString" t_IsString, - testProperty "tl_IsString" tl_IsString + testProperty "tl_IsString" tl_IsString, + testProperty "t_Binary" t_Binary, + testProperty "tl_Binary" tl_Binary ] diff --git a/text.cabal b/text.cabal index 5ecbe5c4..4e1fac80 100644 --- a/text.cabal +++ b/text.cabal @@ -294,6 +294,7 @@ test-suite tests build-depends: QuickCheck >= 2.12.6 && < 2.16, base <5, + binary, bytestring, deepseq, directory,