From 7c95d636943f2e89eb9ca2056d620fe30fdf2dc2 Mon Sep 17 00:00:00 2001 From: Julian Ospald Date: Tue, 12 Dec 2023 15:20:35 +0800 Subject: [PATCH] Add unsafeEncodeUtf, fixes #5 --- System/OsString.hs | 2 ++ System/OsString/Common.hs | 20 +++++++++++++++++--- System/OsString/Internal.hs | 10 +++++++++- 3 files changed, 28 insertions(+), 4 deletions(-) diff --git a/System/OsString.hs b/System/OsString.hs index 14b294c..1cdb197 100644 --- a/System/OsString.hs +++ b/System/OsString.hs @@ -21,6 +21,7 @@ module System.OsString -- * OsString construction , encodeUtf + , unsafeEncodeUtf , encodeWith , encodeFS , osstr @@ -130,6 +131,7 @@ import System.OsString.Internal ( unsafeFromChar , toChar , encodeUtf + , unsafeEncodeUtf , encodeWith , encodeFS , osstr diff --git a/System/OsString/Common.hs b/System/OsString/Common.hs index 6d0b5a9..ba9ab19 100644 --- a/System/OsString/Common.hs +++ b/System/OsString/Common.hs @@ -30,6 +30,7 @@ module System.OsString.MODULE_NAME -- * String construction , encodeUtf + , unsafeEncodeUtf , encodeWith , encodeFS , fromBytes @@ -177,7 +178,7 @@ import GHC.IO.Encoding.UTF8 ( mkUTF8 ) import qualified System.OsString.Data.ByteString.Short as BSP #endif import GHC.Stack (HasCallStack) -import Prelude (Bool(..), Int, Maybe(..), IO, String, Either(..), fmap, ($), (.), mconcat, fromEnum, fromInteger, mempty, fromIntegral, fail, (<$>), show, either, pure, const, flip) +import Prelude (Bool(..), Int, Maybe(..), IO, String, Either(..), fmap, ($), (.), mconcat, fromEnum, fromInteger, mempty, fromIntegral, fail, (<$>), show, either, pure, const, flip, error, id) import Data.Bifunctor ( bimap ) import qualified System.OsString.Data.ByteString.Short.Word16 as BS16 import qualified System.OsString.Data.ByteString.Short as BS8 @@ -189,13 +190,15 @@ import qualified System.OsString.Data.ByteString.Short as BS8 -- -- This encodes as UTF16-LE (strictly), which is a pretty good guess. -- --- Throws an 'EncodingException' if encoding fails. +-- Throws an 'EncodingException' if encoding fails. If the input does not +-- contain surrogate chars, you can use @unsafeEncodeUtf@. #else -- | Partial unicode friendly encoding. -- -- This encodes as UTF8 (strictly), which is a good guess. -- --- Throws an 'EncodingException' if encoding fails. +-- Throws an 'EncodingException' if encoding fails. If the input does not +-- contain surrogate chars, you can use 'unsafeEncodeUtf'. #endif encodeUtf :: MonadThrow m => String -> m PLATFORM_STRING #ifdef WINDOWS @@ -204,6 +207,17 @@ encodeUtf = either throwM pure . encodeWith utf16le encodeUtf = either throwM pure . encodeWith utf8 #endif +-- | Unsafe unicode friendly encoding. +-- +-- Like 'encodeUtf', except it crashes when the input contains +-- surrogate chars. For sanitized input, this can be useful. +unsafeEncodeUtf :: HasCallStack => String -> PLATFORM_STRING +#ifdef WINDOWS +unsafeEncodeUtf = either (error . displayException) id . encodeWith utf16le +#else +unsafeEncodeUtf = either (error . displayException) id . encodeWith utf8 +#endif + -- | Encode a 'String' with the specified encoding. encodeWith :: TextEncoding -> String diff --git a/System/OsString/Internal.hs b/System/OsString/Internal.hs index e0d5254..026557c 100644 --- a/System/OsString/Internal.hs +++ b/System/OsString/Internal.hs @@ -42,10 +42,18 @@ import Data.Coerce (coerce) -- On windows this encodes as UTF16-LE (strictly), which is a pretty good guess. -- On unix this encodes as UTF8 (strictly), which is a good guess. -- --- Throws a 'EncodingException' if encoding fails. +-- Throws an 'EncodingException' if encoding fails. If the input does not +-- contain surrogate chars, you can use 'unsafeEncodeUtf'. encodeUtf :: MonadThrow m => String -> m OsString encodeUtf = fmap OsString . PF.encodeUtf +-- | Unsafe unicode friendly encoding. +-- +-- Like 'encodeUtf', except it crashes when the input contains +-- surrogate chars. For sanitized input, this can be useful. +unsafeEncodeUtf :: HasCallStack => String -> OsString +unsafeEncodeUtf = OsString . PF.unsafeEncodeUtf + -- | Encode an 'OsString' given the platform specific encodings. encodeWith :: TextEncoding -- ^ unix text encoding -> TextEncoding -- ^ windows text encoding