Decoding text input with octal escape sequences
1 answer
First you need an unescape string that converts each escape sequence to one Char
. Then use the package utf8-string
to decode the result in the actual utf8 string.
import Data.Char
import Codec.Binary.UTF8.String (decodeString)
input :: String
input = "Divinit\\303\\251s"
main = maybe (return ()) putStrLn $ convertString input
convertString :: [Char] -> Maybe [Char]
convertString = fmap decodeString . unescape
unescape :: [Char] -> Maybe [Char]
unescape [] = Just []
unescape ('\\' : tail) = do
headResult <- fmap toEnum . octalDigitsToInt . take 3 $ tail
tailResult <- unescape . drop 3 $ tail
return $ headResult : tailResult
unescape (head : tail) = fmap (head :) . unescape $ tail
octalDigitsToInt :: [Char] -> Maybe Int
octalDigitsToInt =
fmap sum . sequence .
map (\(i, c) -> fmap (8^i*) $ octalDigitToInt c) .
zip [0..] . reverse
octalDigitToInt :: Char -> Maybe Int
octalDigitToInt c | isOctDigit c = Just $ digitToInt c
octalDigitToInt _ = Nothing
+2
source to share