{-# LANGUAGE Trustworthy #-}
{-# LANGUAGE NoImplicitPrelude #-}
module GHC.IO.Encoding.Failure (
CodingFailureMode(..), codingFailureModeSuffix,
isSurrogate,
recoverDecode, recoverEncode
) where
import GHC.IO
import GHC.IO.Buffer
import GHC.IO.Exception
import GHC.Base
import GHC.Char
import GHC.Word
import GHC.Show
import GHC.Num
import GHC.Real ( fromIntegral )
data CodingFailureMode
= ErrorOnCodingFailure
| IgnoreCodingFailure
| TransliterateCodingFailure
| RoundtripFailure
deriving ( Int -> CodingFailureMode -> ShowS
[CodingFailureMode] -> ShowS
CodingFailureMode -> String
(Int -> CodingFailureMode -> ShowS)
-> (CodingFailureMode -> String)
-> ([CodingFailureMode] -> ShowS)
-> Show CodingFailureMode
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CodingFailureMode] -> ShowS
$cshowList :: [CodingFailureMode] -> ShowS
show :: CodingFailureMode -> String
$cshow :: CodingFailureMode -> String
showsPrec :: Int -> CodingFailureMode -> ShowS
$cshowsPrec :: Int -> CodingFailureMode -> ShowS
Show
)
codingFailureModeSuffix :: CodingFailureMode -> String
codingFailureModeSuffix :: CodingFailureMode -> String
codingFailureModeSuffix CodingFailureMode
ErrorOnCodingFailure = String
""
codingFailureModeSuffix CodingFailureMode
IgnoreCodingFailure = String
"//IGNORE"
codingFailureModeSuffix CodingFailureMode
TransliterateCodingFailure = String
"//TRANSLIT"
codingFailureModeSuffix CodingFailureMode
RoundtripFailure = String
"//ROUNDTRIP"
unrepresentableChar :: Char
unrepresentableChar :: Char
unrepresentableChar = Char
'\xFFFD'
{-# INLINE isSurrogate #-}
isSurrogate :: Char -> Bool
isSurrogate :: Char -> Bool
isSurrogate Char
c = (Int
0xD800 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
x Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xDBFF)
Bool -> Bool -> Bool
|| (Int
0xDC00 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
x Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xDFFF)
where x :: Int
x = Char -> Int
ord Char
c
{-# INLINE escapeToRoundtripCharacterSurrogate #-}
escapeToRoundtripCharacterSurrogate :: Word8 -> Char
escapeToRoundtripCharacterSurrogate :: Word8 -> Char
escapeToRoundtripCharacterSurrogate Word8
b
| Word8
b Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
< Word8
128 = Int -> Char
chr (Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b)
| Bool
otherwise = Int -> Char
chr (Int
0xDC00 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word8
b)
{-# INLINE unescapeRoundtripCharacterSurrogate #-}
unescapeRoundtripCharacterSurrogate :: Char -> Maybe Word8
unescapeRoundtripCharacterSurrogate :: Char -> Maybe Word8
unescapeRoundtripCharacterSurrogate Char
c
| Int
0xDC80 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
x Bool -> Bool -> Bool
&& Int
x Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xDD00 = Word8 -> Maybe Word8
forall a. a -> Maybe a
Just (Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
x)
| Bool
otherwise = Maybe Word8
forall a. Maybe a
Nothing
where x :: Int
x = Char -> Int
ord Char
c
recoverDecode :: CodingFailureMode -> Buffer Word8 -> Buffer Char
-> IO (Buffer Word8, Buffer Char)
recoverDecode :: CodingFailureMode
-> Buffer Word8 -> Buffer Char -> IO (Buffer Word8, Buffer Char)
recoverDecode CodingFailureMode
cfm input :: Buffer Word8
input@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer Word8
iraw, bufL :: forall e. Buffer e -> Int
bufL=Int
ir, bufR :: forall e. Buffer e -> Int
bufR=Int
_ }
output :: Buffer Char
output@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer Char
oraw, bufL :: forall e. Buffer e -> Int
bufL=Int
_, bufR :: forall e. Buffer e -> Int
bufR=Int
ow } = do
case CodingFailureMode
cfm of
CodingFailureMode
ErrorOnCodingFailure -> IO (Buffer Word8, Buffer Char)
forall a. IO a
ioe_decodingError
CodingFailureMode
IgnoreCodingFailure -> (Buffer Word8, Buffer Char) -> IO (Buffer Word8, Buffer Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Word8
input { bufL :: Int
bufL=Int
irInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 }, Buffer Char
output)
CodingFailureMode
TransliterateCodingFailure -> do
Int
ow' <- RawBuffer Char -> Int -> Char -> IO Int
writeCharBuf RawBuffer Char
oraw Int
ow Char
unrepresentableChar
(Buffer Word8, Buffer Char) -> IO (Buffer Word8, Buffer Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Word8
input { bufL :: Int
bufL=Int
irInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 }, Buffer Char
output { bufR :: Int
bufR=Int
ow' })
CodingFailureMode
RoundtripFailure -> do
Word8
b <- RawBuffer Word8 -> Int -> IO Word8
readWord8Buf RawBuffer Word8
iraw Int
ir
Int
ow' <- RawBuffer Char -> Int -> Char -> IO Int
writeCharBuf RawBuffer Char
oraw Int
ow (Word8 -> Char
escapeToRoundtripCharacterSurrogate Word8
b)
(Buffer Word8, Buffer Char) -> IO (Buffer Word8, Buffer Char)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Word8
input { bufL :: Int
bufL=Int
irInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 }, Buffer Char
output { bufR :: Int
bufR=Int
ow' })
recoverEncode :: CodingFailureMode -> Buffer Char -> Buffer Word8
-> IO (Buffer Char, Buffer Word8)
recoverEncode :: CodingFailureMode
-> Buffer Char -> Buffer Word8 -> IO (Buffer Char, Buffer Word8)
recoverEncode CodingFailureMode
cfm input :: Buffer Char
input@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer Char
iraw, bufL :: forall e. Buffer e -> Int
bufL=Int
ir, bufR :: forall e. Buffer e -> Int
bufR=Int
_ }
output :: Buffer Word8
output@Buffer{ bufRaw :: forall e. Buffer e -> RawBuffer e
bufRaw=RawBuffer Word8
oraw, bufL :: forall e. Buffer e -> Int
bufL=Int
_, bufR :: forall e. Buffer e -> Int
bufR=Int
ow } = do
(Char
c,Int
ir') <- RawBuffer Char -> Int -> IO (Char, Int)
readCharBuf RawBuffer Char
iraw Int
ir
case CodingFailureMode
cfm of
CodingFailureMode
IgnoreCodingFailure -> (Buffer Char, Buffer Word8) -> IO (Buffer Char, Buffer Word8)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Char
input { bufL :: Int
bufL=Int
ir' }, Buffer Word8
output)
CodingFailureMode
TransliterateCodingFailure -> do
if Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'?'
then (Buffer Char, Buffer Word8) -> IO (Buffer Char, Buffer Word8)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Char
input { bufL :: Int
bufL=Int
ir' }, Buffer Word8
output)
else do
Int
_ir' <- RawBuffer Char -> Int -> Char -> IO Int
writeCharBuf RawBuffer Char
iraw Int
ir Char
'?'
(Buffer Char, Buffer Word8) -> IO (Buffer Char, Buffer Word8)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Char
input, Buffer Word8
output)
CodingFailureMode
RoundtripFailure | Just Word8
x <- Char -> Maybe Word8
unescapeRoundtripCharacterSurrogate Char
c -> do
RawBuffer Word8 -> Int -> Word8 -> IO ()
writeWord8Buf RawBuffer Word8
oraw Int
ow Word8
x
(Buffer Char, Buffer Word8) -> IO (Buffer Char, Buffer Word8)
forall (m :: * -> *) a. Monad m => a -> m a
return (Buffer Char
input { bufL :: Int
bufL=Int
ir' }, Buffer Word8
output { bufR :: Int
bufR=Int
owInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 })
CodingFailureMode
_ -> IO (Buffer Char, Buffer Word8)
forall a. IO a
ioe_encodingError
ioe_decodingError :: IO a
ioe_decodingError :: forall a. IO a
ioe_decodingError = IOException -> IO a
forall a. IOException -> IO a
ioException
(Maybe Handle
-> IOErrorType
-> String
-> String
-> Maybe CInt
-> Maybe String
-> IOException
IOError Maybe Handle
forall a. Maybe a
Nothing IOErrorType
InvalidArgument String
"recoverDecode"
String
"invalid byte sequence" Maybe CInt
forall a. Maybe a
Nothing Maybe String
forall a. Maybe a
Nothing)
ioe_encodingError :: IO a
ioe_encodingError :: forall a. IO a
ioe_encodingError = IOException -> IO a
forall a. IOException -> IO a
ioException
(Maybe Handle
-> IOErrorType
-> String
-> String
-> Maybe CInt
-> Maybe String
-> IOException
IOError Maybe Handle
forall a. Maybe a
Nothing IOErrorType
InvalidArgument String
"recoverEncode"
String
"invalid character" Maybe CInt
forall a. Maybe a
Nothing Maybe String
forall a. Maybe a
Nothing)