diff --git a/Data/ByteString/Builder.hs b/Data/ByteString/Builder.hs index 09c85b210..981586d49 100644 --- a/Data/ByteString/Builder.hs +++ b/Data/ByteString/Builder.hs @@ -257,12 +257,14 @@ import qualified Data.ByteString.Builder.Prim as P import qualified Data.ByteString.Lazy.Internal as L import Data.ByteString.Builder.ASCII import Data.ByteString.Builder.RealFloat +import Data.ByteString.Internal (byteCountLiteral) import Data.String (IsString(..)) import System.IO (Handle, IOMode(..), withBinaryFile) import Foreign import GHC.Base (unpackCString#, unpackCStringUtf8#, unpackFoldrCString#, build) +import GHC.Ptr (Ptr(..)) -- | Execute a 'Builder' and return the generated chunks as a 'L.LazyByteString'. -- The work is performed lazy, i.e., only when a chunk of the 'L.LazyByteString' @@ -434,7 +436,7 @@ char8 :: Char -> Builder char8 = P.primFixed P.char8 -- | Char8 encode a 'String'. -{-# INLINE [1] string8 #-} -- phased to allow P.cstring rewrite +{-# NOINLINE string8 #-} string8 :: String -> Builder string8 = P.primMapListFixed P.char8 @@ -442,10 +444,12 @@ string8 = P.primMapListFixed P.char8 -- to promptly turn into build (unpackFoldrCString# s), so we match on both. {-# RULES "string8/unpackCString#" forall s. - string8 (unpackCString# s) = P.cstring s + string8 (unpackCString# s) = + asciiLiteralCopy (Ptr s) (byteCountLiteral s) "string8/unpackFoldrCString#" forall s. - string8 (build (unpackFoldrCString# s)) = P.cstring s + string8 (build (unpackFoldrCString# s)) = + asciiLiteralCopy (Ptr s) (byteCountLiteral s) #-} ------------------------------------------------------------------------------ @@ -461,19 +465,22 @@ charUtf8 = P.primBounded P.charUtf8 -- -- Note that 'stringUtf8' performs no codepoint validation and consequently may -- emit invalid UTF-8 if asked (e.g. single surrogates). -{-# INLINE [1] stringUtf8 #-} -- phased to allow P.cstring rewrite +{-# NOINLINE stringUtf8 #-} stringUtf8 :: String -> Builder stringUtf8 = P.primMapListBounded P.charUtf8 {-# RULES "stringUtf8/unpackCStringUtf8#" forall s. - stringUtf8 (unpackCStringUtf8# s) = P.cstringUtf8 s + stringUtf8 (unpackCStringUtf8# s) = + modUtf8LitCopy (Ptr s) (byteCountLiteral s) "stringUtf8/unpackCString#" forall s. - stringUtf8 (unpackCString# s) = P.cstring s + stringUtf8 (unpackCString# s) = + asciiLiteralCopy (Ptr s) (byteCountLiteral s) "stringUtf8/unpackFoldrCString#" forall s. - stringUtf8 (build (unpackFoldrCString# s)) = P.cstring s + stringUtf8 (build (unpackFoldrCString# s)) = + asciiLiteralCopy (Ptr s) (byteCountLiteral s) #-} instance IsString Builder where diff --git a/Data/ByteString/Builder/Internal.hs b/Data/ByteString/Builder/Internal.hs index 82bdf560c..886411550 100644 --- a/Data/ByteString/Builder/Internal.hs +++ b/Data/ByteString/Builder/Internal.hs @@ -1,5 +1,5 @@ {-# LANGUAGE ScopedTypeVariables, CPP, BangPatterns, RankNTypes, TupleSections #-} -{-# LANGUAGE Unsafe #-} +{-# LANGUAGE MagicHash, ViewPatterns, Unsafe #-} {-# OPTIONS_HADDOCK not-home #-} -- | Copyright : (c) 2010 - 2011 Simon Meier -- License : BSD3-style (see LICENSE) @@ -84,6 +84,8 @@ module Data.ByteString.Builder.Internal ( -- , sizedChunksInsert , byteStringCopy + , asciiLiteralCopy + , modUtf8LitCopy , byteStringInsert , byteStringThreshold @@ -127,6 +129,8 @@ module Data.ByteString.Builder.Internal ( ) where import Control.Arrow (second) +import Control.Monad (when) +import Control.DeepSeq (NFData(..)) import Data.Semigroup (Semigroup(..)) import Data.List.NonEmpty (NonEmpty(..)) @@ -146,6 +150,11 @@ import Foreign import Foreign.ForeignPtr.Unsafe (unsafeForeignPtrToPtr) import System.IO.Unsafe (unsafeDupablePerformIO) +#if !(PURE_HASKELL || defined(USE_MEMCHR)) +import Foreign.C.String (CString) +import GHC.Ptr (Ptr(..)) +#endif + ------------------------------------------------------------------------------ -- Buffers ------------------------------------------------------------------------------ @@ -154,11 +163,22 @@ import System.IO.Unsafe (unsafeDupablePerformIO) data BufferRange = BufferRange {-# UNPACK #-} !(Ptr Word8) -- First byte of range {-# UNPACK #-} !(Ptr Word8) -- First byte /after/ range +-- | @since 0.12.1.0 +instance NFData BufferRange where + rnf !_ = () + -- | A 'Buffer' together with the 'BufferRange' of free bytes. The filled -- space starts at offset 0 and ends at the first free byte. data Buffer = Buffer {-# UNPACK #-} !(ForeignPtr Word8) {-# UNPACK #-} !BufferRange +-- | Like the @NFData@ instance for @StrictByteString@, +-- this does not force the @ForeignPtrContents@ field +-- of the underlying @ForeignPtr@. +-- +-- @since 0.12.1.0 +instance NFData Buffer where + rnf !_ = () -- | Combined size of the filled and free space in the buffer. {-# INLINE bufferSize #-} @@ -876,6 +896,93 @@ byteStringInsert :: S.StrictByteString -> Builder byteStringInsert = \bs -> builder $ \k (BufferRange op _) -> return $ insertChunk op bs k + +------------------------------------------------------------------------------ +-- Raw CString encoding +------------------------------------------------------------------------------ + +-- | Builder for raw 'Addr#' pointers to null-terminated primitive ASCII +-- strings that are free of embedded (overlong-encoded as the two-byte sequence +-- @0xC0 0x80@) null characters. +-- +-- @since 0.12.1.0 +{-# INLINABLE asciiLiteralCopy #-} +asciiLiteralCopy :: Ptr Word8 -> Int -> Builder +asciiLiteralCopy = \ !ip !len -> builder $ \k br -> do + let !ipe = ip `plusPtr` len + wrappedBytesCopyStep (BufferRange ip ipe) k br + +getNextEmbeddedNull :: Ptr Word8 -> Int -> IO (Ptr Word8) +#if PURE_HASKELL || defined(USE_MEMCHR) +getNextEmbeddedNull p len = do + c0loc <- S.memchr p 0xC0 (S.checkedCast len) + if c0loc == nullPtr + then pure c0loc + else do + let nextLoc = c0loc `plusPtr` 1 :: Ptr Word8 + nextByte <- peek nextLoc + if nextByte == 0x80 + then pure c0loc + else getNextEmbeddedNull nextLoc (p `minusPtr` nextLoc + len) + +#else +getNextEmbeddedNull p _len = c_strstr (castPtr p) modifiedUtf8NUL + +-- | GHC represents @NUL@ in string literals via an overlong 2-byte encoding, +-- which is part of "modified UTF-8" (GHC does not also implement CESU-8). +modifiedUtf8NUL :: CString +modifiedUtf8NUL = Ptr "\xc0\x80"# + +foreign import ccall unsafe "string.h strstr" c_strstr + :: CString -> CString -> IO (Ptr Word8) +#endif + + +-- | Builder for raw 'Addr#' pointers to null-terminated primitive UTF-8 +-- encoded strings that may contain embedded overlong-encodings (as the +-- two-byte sequence @0xC0 0x80@) of null characters. +-- +-- @since 0.12.1.0 +{-# INLINABLE modUtf8LitCopy #-} +modUtf8LitCopy :: Ptr Word8 -> Int -> Builder +modUtf8LitCopy = \ !ip !len -> builder $ \k br -> do + nullAt <- getNextEmbeddedNull ip len + modUtf8_step ip len nullAt k br + +modUtf8_step :: Ptr Word8 -> Int -> Ptr Word8 -> BuildStep r -> BuildStep r +modUtf8_step !ip !len ((== nullPtr) -> True) k br = + -- Contains no encoded nulls, use simple copy codepath + wrappedBytesCopyStep (BufferRange ip ipe) k br + where + !ipe = ip `plusPtr` len +modUtf8_step !ip !len !nullAt k (BufferRange op0 ope) + -- Copy as much of the null-free portion of the string as fits into the + -- available buffer space. If the string is long enough, we may have asked + -- for less than its full length, filling the buffer with the rest will go + -- into the next builder step. + | avail > nullFree = do + when (nullFree > 0) (copyBytes op0 ip nullFree) + pokeElemOff op0 nullFree 0 + let used = nullFree + 2 + len' = len - used + !ip' = ip `plusPtr` used + !op' = op0 `plusPtr` (nullFree + 1) + nullAt' <- getNextEmbeddedNull ip' len' + modUtf8_step ip' len' nullAt' k (BufferRange op' ope) + | avail > 0 = do + -- avail <= nullFree + copyBytes op0 ip avail + let len' = len - avail + !ip' = ip `plusPtr` avail + !op' = op0 `plusPtr` avail + return $ bufferFull 1 op' (modUtf8_step ip' len' nullAt k) + | otherwise = + return $ bufferFull 1 op0 (modUtf8_step ip len nullAt k) + where + !avail = ope `minusPtr` op0 + !nullFree = nullAt `minusPtr` ip + + -- Short bytestrings ------------------------------------------------------------------------------ diff --git a/Data/ByteString/Builder/Prim.hs b/Data/ByteString/Builder/Prim.hs index a4bd22c77..52be31236 100644 --- a/Data/ByteString/Builder/Prim.hs +++ b/Data/ByteString/Builder/Prim.hs @@ -579,7 +579,10 @@ primBounded w x = -- because it moves several variables out of the inner loop. {-# INLINE primMapListBounded #-} primMapListBounded :: BoundedPrim a -> [a] -> Builder -primMapListBounded w xs0 = +primMapListBounded w = \xs0 -> + -- We want this to inline when there is one arg, so that we can + -- specialise on the BoundedPrim "w". So we move the \xs0 after the + -- "=" sign so that the INLINE pragma doesn't interfere with this. builder $ step xs0 where step xs1 k (BufferRange op0 ope0) = @@ -663,59 +666,29 @@ primMapLazyByteStringBounded w = L.foldrChunks (\x b -> primMapByteStringBounded w x `mappend` b) mempty ------------------------------------------------------------------------------- --- Raw CString encoding ------------------------------------------------------------------------------- - --- | A null-terminated ASCII encoded 'Foreign.C.String.CString'. --- Null characters are not representable. +-- | Builder for raw 'Addr#' pointers to null-terminated primitive ASCII +-- strings that are free of embedded (overlong-encoded as the two-byte sequence +-- @0xC0 0x80@) null characters. +-- +-- Deprecated since @bytestring-0.12.1.0@. -- -- @since 0.11.0.0 +{-# DEPRECATED cstring "Use asciiLiteralCopy instead" #-} cstring :: Addr# -> Builder -cstring = - \addr0 -> builder $ step addr0 - where - step :: Addr# -> BuildStep r -> BuildStep r - step !addr !k br@(BufferRange op0@(Ptr op0#) ope) - | W8# ch == 0 = k br - | op0 == ope = - return $ bufferFull 1 op0 (step addr k) - | otherwise = do - IO $ \s -> case writeWord8OffAddr# op0# 0# ch s of - s' -> (# s', () #) - let br' = BufferRange (op0 `plusPtr` 1) ope - step (addr `plusAddr#` 1#) k br' - where - !ch = indexWord8OffAddr# addr 0# +cstring s = asciiLiteralCopy (Ptr s) (S.byteCountLiteral s) +{-# INLINE cstring #-} --- | A null-terminated UTF-8 encoded 'Foreign.C.String.CString'. --- Null characters can be encoded as @0xc0 0x80@. +-- | Builder for raw 'Addr#' pointers to null-terminated primitive UTF-8 +-- encoded strings that may contain embedded overlong-encodings (as the +-- two-byte sequence @0xC0 0x80@) of null characters. +-- +-- Deprecated since @bytestring-0.12.1.0@. -- -- @since 0.11.0.0 +{-# DEPRECATED cstringUtf8 "Use modUtf8LitCopy instead" #-} cstringUtf8 :: Addr# -> Builder -cstringUtf8 = - \addr0 -> builder $ step addr0 - where - step :: Addr# -> BuildStep r -> BuildStep r - step !addr !k br@(BufferRange op0@(Ptr op0#) ope) - | W8# ch == 0 = k br - | op0 == ope = - return $ bufferFull 1 op0 (step addr k) - -- NULL is encoded as 0xc0 0x80 - | W8# ch == 0xc0 - , W8# (indexWord8OffAddr# addr 1#) == 0x80 = do - let !(W8# nullByte#) = 0 - IO $ \s -> case writeWord8OffAddr# op0# 0# nullByte# s of - s' -> (# s', () #) - let br' = BufferRange (op0 `plusPtr` 1) ope - step (addr `plusAddr#` 2#) k br' - | otherwise = do - IO $ \s -> case writeWord8OffAddr# op0# 0# ch s of - s' -> (# s', () #) - let br' = BufferRange (op0 `plusPtr` 1) ope - step (addr `plusAddr#` 1#) k br' - where - !ch = indexWord8OffAddr# addr 0# +cstringUtf8 s = modUtf8LitCopy (Ptr s) (S.byteCountLiteral s) +{-# INLINE cstringUtf8 #-} ------------------------------------------------------------------------------ -- Char8 encoding diff --git a/Data/ByteString/Internal.hs b/Data/ByteString/Internal.hs index c4c831bc0..58639a4b9 100644 --- a/Data/ByteString/Internal.hs +++ b/Data/ByteString/Internal.hs @@ -36,7 +36,7 @@ module Data.ByteString.Internal ( unpackBytes, unpackAppendBytesLazy, unpackAppendBytesStrict, unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict, unsafePackAddress, unsafePackLenAddress, - unsafePackLiteral, unsafePackLenLiteral, + unsafePackLiteral, unsafePackLenLiteral, byteCountLiteral, -- * Low level imperative construction empty, diff --git a/Data/ByteString/Internal/Type.hs b/Data/ByteString/Internal/Type.hs index 52dede330..6c90377b3 100644 --- a/Data/ByteString/Internal/Type.hs +++ b/Data/ByteString/Internal/Type.hs @@ -39,7 +39,7 @@ module Data.ByteString.Internal.Type ( unpackBytes, unpackAppendBytesLazy, unpackAppendBytesStrict, unpackChars, unpackAppendCharsLazy, unpackAppendCharsStrict, unsafePackAddress, unsafePackLenAddress, - unsafePackLiteral, unsafePackLenLiteral, + unsafePackLiteral, unsafePackLenLiteral, byteCountLiteral, -- * Low level imperative construction empty, @@ -82,6 +82,7 @@ module Data.ByteString.Internal.Type ( overflowError, checkedAdd, checkedMultiply, + checkedCast, -- * Standard C Functions c_strlen, @@ -137,8 +138,6 @@ import Foreign.Marshal.Utils #if PURE_HASKELL import qualified Data.ByteString.Internal.Pure as Pure -import Data.Bits (toIntegralSized, Bits) -import Data.Maybe (fromMaybe) import Control.Monad ((<$!>)) #endif @@ -154,8 +153,9 @@ import Data.String (IsString(..)) import Control.Exception (assert, throw, Exception) -import Data.Bits ((.&.)) +import Data.Bits ((.&.), toIntegralSized, Bits) import Data.Char (ord) +import Data.Maybe (fromMaybe) import Data.Word import Data.Data (Data(..), mkConstr ,mkDataType, Constr, DataType, Fixity(Prefix), constrIndex) @@ -164,6 +164,7 @@ import GHC.Base (nullAddr#,realWorld#,unsafeChr) import GHC.Exts (IsList(..), Addr#, minusAddr#, ByteArray#) import GHC.CString (unpackCString#) import GHC.Magic (runRW#, lazy) +import GHC.Stack.Types (HasCallStack) #define TIMES_INT_2_AVAILABLE MIN_VERSION_ghc_prim(0,7,0) #if TIMES_INT_2_AVAILABLE @@ -510,13 +511,22 @@ unsafePackLenAddress len addr# = do -- @since 0.11.1.0 unsafePackLiteral :: Addr# -> ByteString unsafePackLiteral addr# = + unsafePackLenLiteral (byteCountLiteral addr#) addr# +{-# INLINE unsafePackLiteral #-} + +-- | Byte count of null-terminated primitive literal string excluding the +-- terminating null byte. +byteCountLiteral :: Addr# -> Int +byteCountLiteral addr# = #if __GLASGOW_HASKELL__ >= 811 - unsafePackLenLiteral (I# (cstringLength# addr#)) addr# + I# (cstringLength# addr#) #else - let len = accursedUnutterablePerformIO (c_strlen (Ptr addr#)) - in unsafePackLenLiteral (fromIntegral len) addr# + fromIntegral (pure_strlen (Ptr addr#)) + +foreign import ccall unsafe "string.h strlen" pure_strlen + :: CString -> CSize #endif -{-# INLINE unsafePackLiteral #-} +{-# INLINE byteCountLiteral #-} -- | See 'unsafePackLiteral'. This function is similar, @@ -1064,6 +1074,12 @@ checkedIntegerToInt x | otherwise = Nothing where res = fromInteger x :: Int +checkedCast :: (HasCallStack, Bits a, Bits b, Integral a, Integral b) => a -> b +checkedCast x = + fromMaybe (error "checkedCast: overflow") + (toIntegralSized x) + + ------------------------------------------------------------------------ @@ -1272,11 +1288,6 @@ bool_to_cint :: Bool -> CInt bool_to_cint True = 1 bool_to_cint False = 0 -checkedCast :: (Bits a, Bits b, Integral a, Integral b) => a -> b -checkedCast x = - fromMaybe (errorWithoutStackTrace "checkedCast: overflow") - (toIntegralSized x) - ---------------------------------------------------------------- -- Haskell version of functions in itoa.c ---------------------------------------------------------------- diff --git a/bench/BenchAll.hs b/bench/BenchAll.hs index 85f348748..55ea35512 100644 --- a/bench/BenchAll.hs +++ b/bench/BenchAll.hs @@ -19,26 +19,29 @@ import Data.Monoid import Data.Semigroup import Data.String import Test.Tasty.Bench + import Prelude hiding (words) import qualified Data.List as List import Control.DeepSeq +import Control.Exception import qualified Data.ByteString as S import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy.Char8 as L8 +import Data.ByteString.Internal (byteCountLiteral) import Data.ByteString.Builder -import Data.ByteString.Builder.Extra (byteStringCopy, - byteStringInsert, - intHost) -import Data.ByteString.Builder.Internal (ensureFree) +import qualified Data.ByteString.Builder.Extra as Extra +import qualified Data.ByteString.Builder.Internal as BI import Data.ByteString.Builder.Prim (BoundedPrim, FixedPrim, (>$<)) import qualified Data.ByteString.Builder.Prim as P import qualified Data.ByteString.Builder.Prim.Internal as PI import Foreign +import qualified GHC.Exts as Exts +import GHC.Ptr (Ptr(..)) import System.Random @@ -126,15 +129,45 @@ loremIpsum = S8.unlines $ map S8.pack -- benchmark wrappers --------------------- -{-# INLINE benchB #-} benchB :: String -> a -> (a -> Builder) -> Benchmark -benchB name x b = - bench (name ++" (" ++ show nRepl ++ ")") $ - whnf (L.length . toLazyByteString . b) x +{-# INLINE benchB #-} +benchB name x b = benchB' (name ++" (" ++ show nRepl ++ ")") x b -{-# INLINE benchB' #-} benchB' :: String -> a -> (a -> Builder) -> Benchmark -benchB' name x b = bench name $ whnf (L.length . toLazyByteString . b) x +{-# INLINE benchB' #-} +benchB' name x mkB = + env (BI.newBuffer BI.defaultChunkSize) $ \buf -> + bench name $ whnfAppIO (runBuildStepOn buf . BI.runBuilder . mkB) x + +benchB'_ :: String -> Builder -> Benchmark +{-# INLINE benchB'_ #-} +benchB'_ name b = + env (BI.newBuffer BI.defaultChunkSize) $ \buf -> + bench name $ whnfIO (runBuildStepOn buf (BI.runBuilder b)) + +-- | @runBuilderOn@ runs a @BuildStep@'s actions all on the same @Buffer@. +-- It is used to avoid measuring driver allocation overhead. +runBuildStepOn :: BI.Buffer -> BI.BuildStep () -> IO () +{-# NOINLINE runBuildStepOn #-} +runBuildStepOn (BI.Buffer fp br@(BI.BufferRange op ope)) b = go b + where + !len = ope `minusPtr` op + + go :: BI.BuildStep () -> IO () + go bs = BI.fillWithBuildStep bs doneH fullH insertChunkH br + + doneH :: Ptr Word8 -> () -> IO () + doneH _ _ = touchForeignPtr fp + -- 'touchForeignPtr' is adequate because the given BuildStep + -- will always terminate. (We won't measure an infinite loop!) + + fullH :: Ptr Word8 -> Int -> BI.BuildStep () -> IO () + fullH _ minLen nextStep + | len < minLen = throwIO (ErrorCall "runBuilderOn: action expects too long of a BufferRange") + | otherwise = go nextStep + + insertChunkH :: Ptr Word8 -> S.ByteString -> BI.BuildStep () -> IO () + insertChunkH _ _ nextStep = go nextStep {-# INLINE benchBInts #-} benchBInts :: String -> ([Int] -> Builder) -> Benchmark @@ -252,18 +285,41 @@ largeTraversalInput = S.concat (replicate 10 byteStringData) smallTraversalInput :: S.ByteString smallTraversalInput = S8.pack "The quick brown fox" +asciiBuf, utf8Buf, halfNullBuf, allNullBuf :: Ptr Word8 +asciiBuf = Ptr "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"# +utf8Buf = Ptr "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\xc0\x80xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"# +halfNullBuf = Ptr "\xc0\x80xx\xc0\x80x\xc0\x80\xc0\x80x\xc0\x80\xc0\x80xx\xc0\x80\xc0\x80xxx\xc0\x80x\xc0\x80x\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80xxx\xc0\x80x\xc0\x80xx\xc0\x80\xc0\x80xxxxxxxxxx\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80x\xc0\x80\xc0\x80x\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80xxx"# +allNullBuf = Ptr "\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80\xc0\x80"# + +asciiLit, utf8Lit :: Ptr Word8 -> Builder +asciiLit str@(Ptr addr) = BI.asciiLiteralCopy str (byteCountLiteral addr) +utf8Lit str@(Ptr addr) = BI.modUtf8LitCopy str (byteCountLiteral addr) + +asciiStr, utf8Str :: String +asciiStr = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" +utf8Str = "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\0xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + main :: IO () main = do defaultMain [ bgroup "Data.ByteString.Builder" [ bgroup "Small payload" - [ benchB' "mempty" () (const mempty) - , benchB' "ensureFree 8" () (const (ensureFree 8)) - , benchB' "intHost 1" 1 intHost - , benchB' "UTF-8 String (naive)" "hello world\0" fromString - , benchB' "UTF-8 String" () $ \() -> P.cstringUtf8 "hello world\0"# - , benchB' "String (naive)" "hello world!" fromString - , benchB' "String" () $ \() -> P.cstring "hello world!"# + [ benchB'_ "mempty" mempty + , bench "toLazyByteString mempty" $ nf toLazyByteString mempty + , benchB'_ "empty (10000 times)" $ + stimes (10000 :: Int) (Exts.lazy BI.empty) + , benchB'_ "ensureFree 8" (BI.ensureFree 8) + , benchB' "intHost 1" 1 Extra.intHost + , benchB' "UTF-8 String (12B, naive)" "hello world\0" fromString + , benchB'_ "UTF-8 String (12B)" $ utf8Lit (Ptr "hello world\xc0\x80"#) + , benchB' "UTF-8 String (64B, naive)" utf8Str fromString + , benchB'_ "UTF-8 String (64B)" $ utf8Lit utf8Buf + , benchB'_ "UTF-8 String (64B, half nulls)" $ utf8Lit halfNullBuf + , benchB'_ "UTF-8 String (64B, all nulls)" $ utf8Lit allNullBuf + , benchB' "String (12B, naive)" "hello world!" fromString + , benchB'_ "String (12B)" $ asciiLit (Ptr "hello world!"#) + , benchB' "String (64B, naive)" asciiStr fromString + , benchB'_ "String (64B)" $ asciiLit asciiBuf ] , bgroup "Encoding wrappers" @@ -280,11 +336,11 @@ main = do ] , bgroup "ByteString insertion" $ [ benchB "foldMap byteStringInsert" byteStringChunksData - (foldMap byteStringInsert) + (foldMap Extra.byteStringInsert) , benchB "foldMap byteString" byteStringChunksData (foldMap byteString) , benchB "foldMap byteStringCopy" byteStringChunksData - (foldMap byteStringCopy) + (foldMap Extra.byteStringCopy) ] , bgroup "Non-bounded encodings" diff --git a/tests/builder/Data/ByteString/Builder/Prim/Tests.hs b/tests/builder/Data/ByteString/Builder/Prim/Tests.hs index 230882335..fa1ae5894 100644 --- a/tests/builder/Data/ByteString/Builder/Prim/Tests.hs +++ b/tests/builder/Data/ByteString/Builder/Prim/Tests.hs @@ -24,17 +24,7 @@ import Test.Tasty.QuickCheck tests :: [TestTree] tests = concat [ testsBinary, testsASCII, testsChar8, testsUtf8 - , testsCombinatorsB, [testCString, testCStringUtf8] ] - -testCString :: TestTree -testCString = testProperty "cstring" $ - toLazyByteString (BP.cstring "hello world!"#) == - LC.pack "hello" `L.append` L.singleton 0x20 `L.append` LC.pack "world!" - -testCStringUtf8 :: TestTree -testCStringUtf8 = testProperty "cstringUtf8" $ - toLazyByteString (BP.cstringUtf8 "hello\xc0\x80world!"#) == - LC.pack "hello" `L.append` L.singleton 0x00 `L.append` LC.pack "world!" + , testsCombinatorsB ] ------------------------------------------------------------------------------ -- Binary diff --git a/tests/builder/Data/ByteString/Builder/Tests.hs b/tests/builder/Data/ByteString/Builder/Tests.hs index 0d5afc6ba..bec94209a 100644 --- a/tests/builder/Data/ByteString/Builder/Tests.hs +++ b/tests/builder/Data/ByteString/Builder/Tests.hs @@ -34,6 +34,7 @@ import Data.Word import qualified Data.ByteString as S import qualified Data.ByteString.Internal as S import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString.Lazy.Char8 as LC import qualified Data.ByteString.Short as Sh import Data.ByteString.Builder @@ -47,6 +48,7 @@ import Control.Exception (evaluate) import System.IO (openTempFile, hPutStr, hClose, hSetBinaryMode, hSetEncoding, utf8, hSetNewlineMode, noNewlineTranslation) import Foreign (ForeignPtr, withForeignPtr, castPtr) import Foreign.C.String (withCString) +import GHC.Ptr (Ptr(..)) import Numeric (showFFloat) import System.Posix.Internals (c_unlink) @@ -75,7 +77,8 @@ tests = testsASCII ++ testsFloating ++ testsChar8 ++ - testsUtf8 + testsUtf8 ++ + testCString ------------------------------------------------------------------------------ @@ -986,3 +989,22 @@ testsUtf8 = [ testBuilderConstr "charUtf8" charUtf8_list charUtf8 , testBuilderConstr "stringUtf8" (foldMap charUtf8_list) stringUtf8 ] + +testCString :: [TestTree] +testCString = + [ testProperty "cstring" $ + toLazyByteString (asciiLit (Ptr "hello world!"#)) == + LC.pack "hello" `L.append` L.singleton 0x20 + `L.append` LC.pack "world!" + , testProperty "cstringUtf8" $ + toLazyByteString (utf8Lit (Ptr "hello\xc0\x80\xc0\x80world\xc0\x80!"#)) == + LC.pack "hello" `L.append` L.singleton 0x00 + `L.append` L.singleton 0x00 + `L.append` LC.pack "world" + `L.append` L.singleton 0x00 + `L.append` LC.singleton '!' + ] + +asciiLit, utf8Lit :: Ptr Word8 -> Builder +asciiLit str@(Ptr addr) = BI.asciiLiteralCopy str (S.byteCountLiteral addr) +utf8Lit str@(Ptr addr) = BI.modUtf8LitCopy str (S.byteCountLiteral addr)