CurryInfo: rw-data-1.1.0 / RW.Base

classes:
ReadWrite
documentation:
------------------------------------------------------------------------------
--- This library defines the class `ReadWrite` which is the basis to define
--- compact data representations for various types.
--- Furthermore, instances of this class are defined for various standard types
--- defined in the prelude.
---
--- @author Lasse Züngel
--- @version July 2024
------------------------------------------------------------------------------
name:
RW.Base
operations:
defaultParams monoRWType readData readDataFile showData showDataP writeDataFile writeDataFileP
sourcecode:
{-# OPTIONS_FRONTEND -Wno-incomplete-patterns -Wno-unused-bindings #-}

module RW.Base
  ( ReadWrite(..), RWParameters (..), defaultParams, RWType(..), monoRWType
  , readDataFile, writeDataFile, writeDataFileP, readData, showData, showDataP)
 where

import Data.Maybe ( fromJust )
import Data.List  ( intercalate, sortBy )
import System.IO  ( Handle, IOMode(WriteMode), hClose, hPutChar, hPutStr, openFile )
import Text.Show  ( ShowS, showChar, showString, shows )

import Prelude hiding (ShowS, showString, showChar, shows)

import Data.Trie as T 

------------------------------------------------------------------------------

--- The class `ReadWrite` contains the interface to be implemented
--- by compact readers and writers of data.
class ReadWrite a where
  readRW :: Trie String -> String -> (a, String)

  showRW :: RWParameters -> Trie String -> a -> (Trie String, ShowS)

  writeRW :: RWParameters -> Handle -> a -> Trie String -> IO (Trie String)

  --- Returns the type of the value.
  typeOf :: a -> RWType

  readListRW :: Trie String -> String -> ([a], String)
  readListRW _    ('0' : cs) = ([], cs)
  readListRW strs ('1' : cs) = (x : xs, r2)
    where
      (x,r1) = readRW strs cs
      (xs,r2) = readListRW strs r1

  showListRW :: RWParameters  -> Trie String -> [a] -> (Trie String,ShowS)
  showListRW _      strs []       = (strs,   showString "0")
  showListRW params strs (x : xs) = (strs'', showString "1" . x' . xs')
    where
      (strs',x') = showRW params strs x
      (strs'',xs') = showListRW params strs' xs

  writeListRW :: RWParameters -> Handle -> [a] -> Trie String -> IO (Trie String)
  writeListRW _      h [] strs = hPutStr h "0" >> return strs
  writeListRW params h (x : xs) strs =
    hPutStr h "1" >> writeRW params h x strs >>= writeListRW params h xs

------------------------------------------------------------------------------
-- `ReadtWrite` instances of prelude types.

instance ReadWrite Int where
  readRW _ cs = (readInt n,r)
    where
      (n,_ : r) = span (flip (/=) ';') cs
      
  showRW _ strs n = (strs, shows n . showString ";")

  writeRW _ h n strs = hPutStr h (show n ++ ";") >> return strs

  typeOf _ = monoRWType "Int"

instance ReadWrite Float where
  readRW _ cs = (read n,r)
    where
      (n,_ : r) = span (flip (/=) ';') cs

  showRW _ strs n = (strs,shows n . showString ";")

  writeRW _ h n strs = hPutStr h (show n ++ ";") >> return strs

  typeOf _ = monoRWType "Float"

instance ReadWrite Char where
  readRW _ (c : cs)
    | c /= '\\' = (c,cs)
    | otherwise
    = case cs of
        '"' : cs1 -> ('"',cs1)
        'a' : cs1 -> ('\a',cs1)
        'b' : cs1 -> ('\b',cs1)
        't' : cs1 -> ('\t',cs1)
        'n' : cs1 -> ('\n',cs1)
        'v' : cs1 -> ('\v',cs1)
        'f' : cs1 -> ('\f',cs1)
        'r' : cs1 -> ('\r',cs1)
        '\\' : cs1 -> ('\\',cs1)
        _ -> error "Invalid escape sequence"

  showRW _ strs c = (strs, showString $ escapeChar c)

  writeRW _ h c strs = do hPutStr h (escapeChar c) >> return strs

  readListRW strs cs =
    case index of
      (_:_) -> (fromJust $ T.lookup index strs, r)
      []    -> readStubString strs r
    where
      (index,r) = readStringId cs :: (String,String)
      readStubString _ cs1 = (str,cs')
        where
          (str,_ : cs') = span (flip (/=) '"') cs1

  showListRW params strs str = (strs',index)
    where
      (strs',index) = writeString params strs str

  writeListRW params h str strs = hPutStr h (index "") >> return strs'
    where
      (strs',index) = writeString params strs str

  typeOf _ = monoRWType "Char"

instance ReadWrite Bool where
  readRW strs ('0' : r0) = (False,r0)
  readRW strs ('1' : r0) = (True,r0)

  showRW params strs0 False = (strs0,showChar '0')
  showRW params strs0 True  = (strs0,showChar '1')

  writeRW params h False strs = hPutChar h '0' >> return strs
  writeRW params h True  strs = hPutChar h '1' >> return strs

  typeOf _ = monoRWType "Bool"

-- `ReadWrite` instance for polymorphic lists.
instance ReadWrite a => ReadWrite [a] where
  readRW = readListRW

  showRW = showListRW

  writeRW = writeListRW

  typeOf n = RWType "[]" [typeOf $ get_a' n]
    where
      get_a' :: [a'] -> a'
      get_a' _ = failed 

--- `ReadWrite` instance for `Either` types.
instance (ReadWrite a,ReadWrite b) => ReadWrite (Either a b) where
  readRW strs ('0' : r0) = (Left a',r1)
    where
      (a',r1) = readRW strs r0
  readRW strs ('1' : r0) = (Right a',r1)
    where
      (a',r1) = readRW strs r0

  showRW params strs0 (Left a') = (strs1, showChar '0' . show1)
    where
      (strs1,show1) = showRW params strs0 a'
  showRW params strs0 (Right a') = (strs1, showChar '1' . show1)
    where
      (strs1,show1) = showRW params strs0 a'

  writeRW params h (Left a')  strs = hPutChar h '0' >> writeRW params h a' strs
  writeRW params h (Right a') strs = hPutChar h '1' >> writeRW params h a' strs

  typeOf n = RWType "Either" [typeOf (get_a n),typeOf (get_b n)]
    where
      get_a :: Either a' b' -> a'
      get_a _ = failed
      get_b :: Either a' b' -> b'
      get_b _ = failed

--- `ReadWrite` instance for `Maybe` types.
instance ReadWrite a => ReadWrite (Maybe a) where
  readRW strs ('0' : r0) = (Nothing, r0)
  readRW strs ('1' : r0) = (Just a', r1)
    where
      (a',r1) = readRW strs r0

  showRW params strs0 Nothing   = (strs0, showChar '0')
  showRW params strs0 (Just a') = (strs1, showChar '1' . show1)
    where
      (strs1,show1) = showRW params strs0 a'

  writeRW params h Nothing   strs = hPutChar h '0' >> return strs
  writeRW params h (Just a') strs = hPutChar h '1' >> writeRW params h a' strs

  typeOf n = RWType "Maybe" [typeOf (get_a n)]
    where
      get_a :: Maybe a' -> a'
      get_a _ = failed

--- `ReadWrite` instance for type `Ordering`.
instance ReadWrite Ordering where
  readRW strs ('0' : r0) = (LT, r0)
  readRW strs ('1' : r0) = (EQ, r0)
  readRW strs ('2' : r0) = (GT, r0)

  showRW params strs0 LT = (strs0, showChar '0')
  showRW params strs0 EQ = (strs0, showChar '1')
  showRW params strs0 GT = (strs0, showChar '2')

  writeRW params h LT strs = hPutChar h '0' >> return strs
  writeRW params h EQ strs = hPutChar h '1' >> return strs
  writeRW params h GT strs = hPutChar h '2' >> return strs

  typeOf _ = monoRWType "Ordering"

--- `ReadWrite` instance for unit type.
instance ReadWrite () where
  readRW _ cs = ((), cs)

  showRW _ strs () = (strs, showString "")

  writeRW _ _ () strs = return strs

  typeOf _ = monoRWType "()"

instance (ReadWrite a,ReadWrite b) => ReadWrite (a,b) where
  readRW strs cs = ((x, y), r2)
    where
      (x, r1) = readRW strs cs
      (y, r2) = readRW strs r1

  showRW params strs (x,y) = (strs'',x' . y')
    where
      (strs',  x') = showRW params strs  x
      (strs'', y') = showRW params strs' y

  writeRW params h (x, y) strs = writeRW params h x strs >>= writeRW params h y

  typeOf n = RWType "()" [typeOf $ get_a' n,typeOf $ get_b' n]
    where
      get_a' :: (a', b') -> a'
      get_a' _ = failed 
      get_b' :: (a', b') -> b'
      get_b' _ = failed

instance (ReadWrite a,ReadWrite b,ReadWrite c) => ReadWrite (a,b,c) where
  readRW strs cs = ((x,y,z),r3)
    where
      (x,r1) = readRW strs cs
      (y,r2) = readRW strs r1
      (z,r3) = readRW strs r2

  showRW params strs (x,y,z) = (strs''',x' . (y' . z'))
    where
      (strs',x') = showRW params strs x
      (strs'',y') = showRW params strs' y
      (strs''',z') = showRW params strs'' z

  writeRW params h (x,y,z) strs = 
    (writeRW params h x strs >>= writeRW params h y) >>= writeRW params h z

  typeOf n = RWType "()" [typeOf $ get_a' n,typeOf $ get_b' n,typeOf $ get_c' n]
    where
      get_a' :: (a',b',c') -> a'
      get_a' _ = failed 
      get_b' :: (a',b',c') -> b'
      get_b' _ = failed 
      get_c' :: (a',b',c') -> c'
      get_c' _ = failed

instance (ReadWrite a,ReadWrite b,ReadWrite c,ReadWrite d) => ReadWrite (a, b, c, d) where
  readRW strs cs = ((x,y,z,w),r4)
    where
      (x,r1) = readRW strs cs
      (y,r2) = readRW strs r1
      (z,r3) = readRW strs r2
      (w,r4) = readRW strs r3

  showRW params strs (x,y,z,w) = (strs'''',x' . (y' . (z' . w')))
    where
      (strs',x')    = showRW params strs x
      (strs'',y')   = showRW params strs' y
      (strs''',z')  = showRW params strs'' z
      (strs'''',w') = showRW params strs''' w

  writeRW params h (x,y,z,w) strs =
    writeRW params h x strs >>= writeRW params h y 
                            >>= writeRW params h z 
                            >>= writeRW params h w

  typeOf n = RWType "()" [typeOf $ get_a' n, typeOf $ get_b' n,
                          typeOf $ get_c' n, typeOf $ get_d' n]
    where
      get_a' :: (a', b', c', d') -> a'
      get_a' _ = failed 
      get_b' :: (a', b', c', d') -> b'
      get_b' _ = failed 
      get_c' :: (a', b', c', d') -> c'
      get_c' _ = failed 
      get_d' :: (a', b', c', d') -> d'
      get_d' _ = failed

--- Reads an integer.
readInt :: String -> Int
readInt [] = error "readInt: empty string"
readInt (c:cs) | c == '-'  = -readInt' cs
               | otherwise =  readInt' (c:cs)
 where 
  readInt' = foldl (\n c1 -> n * 10 + (ord c1 - ord '0')) 0

--- Writing RWParameters . 
data RWParameters  = RWParameters 
  { minStrLen   :: Int -- minimum string length for a string to be considered
                       -- a stub (-> not extracted)
  , alphabetLen :: Int -- length of the alphabet
  }

--- Default RWParameters  for writing compact data.
defaultParams :: RWParameters 
defaultParams = RWParameters  6 26

--- Represents a type in the compact data representation.
data RWType = RWType String [RWType]
 deriving Eq

--- Creates a representation of a monomorphic type.
monoRWType :: String -> RWType
monoRWType name = RWType name []

--- Pretty-prints a type.
ppType :: RWType -> String
ppType (RWType name args)
  | args == []                       = name
  | name == "[]" && length args == 1 = "[" ++ (ppType (head args) ++ "]")
  | name == "()"                     = "(" ++ intercalate ", " (map ppType' args) ++ ")"
  | otherwise                        = name ++ " " ++ unwords (map ppType' args)
  where
    ppType' x
      | isMonomorphic x = ppType x
      | bracketed x = ppType x
      | otherwise = "(" ++ (ppType x ++ ")")
    isMonomorphic x =
      case x of
        RWType _ [] -> True
        _ -> False
    bracketed t =
      case t of
        RWType "[]" [_] -> True
        RWType "()" _ -> True
        _ -> False

--- Returns a type at a given position of a type.
typeAt :: [Int] -> RWType -> RWType
typeAt [] x = x
typeAt (i : is) (RWType _ args) = typeAt is (args !! i)

--- The coding.
lookupCoding :: Int -> Char
lookupCoding x = "abcdefghijklmnopqrstuvwxyz0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ !\"#$%&'()*+,-./:<=>?@[\\]^_`{|}~" !! x
  
--- Converts an integer (index) to a string (coding).
intToASCII :: Int -> Int -> String
intToASCII lc n
  | n < lc = [lookupCoding n]
  | otherwise = intToASCII lc (div n lc) ++ [lookupCoding (mod n lc)]

--- Shows a string. 
--- 
--- If the string is long, it is extracted and represented only once.
--- Otherwise, it is inlined.
writeString :: RWParameters -> Trie String -> String
            -> (Trie String, String -> String)
writeString (RWParameters  sLen aLen) strs s
  | isStub s = (strs,showChar ';' . (showString s . showChar '"'))
  | otherwise
  = case T.lookup s strs of
      Just i -> (strs,showString i . showChar ';')
      Nothing ->
        let coding = intToASCII aLen (T.size strs)
        in (T.insert s coding strs, showString coding . showChar ';')
  where
    isStub str =
      length str < sLen && not (elem '"' str) && not (containsNewline str)

--- Char escaping 
escapeChar :: Char -> String
escapeChar c =
  case c of
    '"' -> "\\\""
    '\a' -> "\\a"
    '\b' -> "\\b"
    '\t' -> "\\t"
    '\n' -> "\\n"
    '\v' -> "\\v"
    '\f' -> "\\f"
    '\r' -> "\\r"
    '\\' -> "\\\\"
    _ -> [c]

--- Special case: Carriage return
escapeCarriageReturn :: String -> String
escapeCarriageReturn [] = []
escapeCarriageReturn (c : cs)
  | c == '\r' = '\\' : ('r' : escapeCarriageReturn cs)
  | otherwise = c : escapeCarriageReturn cs

--- Special case: Carriage return
unescapeCarriageReturn :: String -> String
unescapeCarriageReturn [] = []
unescapeCarriageReturn (c : cs) =
  case c of
    '\\' ->
      case cs of
        'r' : cs' -> '\r' : unescapeCarriageReturn cs'
        _ -> c : unescapeCarriageReturn cs
    _ -> c : unescapeCarriageReturn cs

--- String output. If the string contains a newline, the length is prepended.
--- Otherwise, a newline is appended as a delimiter.
outputStr :: String -> String
outputStr s
  | containsNewline s = let s' = escapeCarriageReturn s
                        in show (length s') ++ (";" ++ s')
  | otherwise = ";" ++ (s ++ "\n")

--- Checks if the string contains some kind of return character.
containsNewline :: String -> Bool
containsNewline s = elem '\n' s || elem '\r' s

keysOrdByVal :: Trie String -> [String]
keysOrdByVal m = map fst (sortBy ordHex $ T.toList m)
 where
  ordHex :: Ord b => (a,[b]) -> (c,[b]) -> Bool
  ordHex (_,a) (_,b)
    | length a < length b = True
    | length a > length b = False
    | otherwise           = a < b

--- Parses a string id. 
readStringId :: String -> (String,String)
readStringId [] = error "readStringId: empty string"
readStringId (c : cs)
  | c == ';'  = ([],cs)
  | otherwise = let (xs,r1) = readStringId cs in (c : xs,r1)

------------------------------------------------------------------------------
--- Data reading and writing

--- Parses a compact data representation and returns the value.
--- If the parse failes (e.g. due to a type mismatch), `Nothing` is returned.
---
--- This operation might fail if the input is not well-formed.
readData :: ReadWrite a => String -> Maybe a
readData ls =
  let n@(_,t,_,_) = parseInput ls
      result = calc n
  in if ppType (typeOf (fst result)) == t
      then Just $ fst result
      else Nothing
 where
  calc (sLen,_,encoding,strings) =
    readRW (T.fromList $ zip (map (intToASCII sLen) (enumFrom 0)) strings)
           encoding

--- Reads a file containing a compact data representation,
--- parses the contents and returns the value. 
---
--- If the parse failes (e.g. due to a type mismatch or a bad input format),
--- `Nothing` is returned.
readDataFile :: ReadWrite a => FilePath -> IO (Maybe a)
readDataFile file = do
  dt <- readFile file
  catch (return $ readData dt) (\_ -> return Nothing)

--- Writes some data to a file containing a compact data representation.
writeDataFile :: ReadWrite a => FilePath -> a -> IO ()
writeDataFile = writeDataFileP defaultParams

--- Writes some data to a file containing a compact data representation
--- and use specific RWParameters .
writeDataFileP :: ReadWrite a => RWParameters  -> FilePath -> a -> IO ()
writeDataFileP params file x =
  do h <- openFile file WriteMode
     hPutStr h (show (alphabetLen params) ++ "\n")
     hPutStr h (ppType (typeOf x) ++ "\n")
     written <- writeRW params h x T.empty
     hPutStr h "\n"
     let strs = keysOrdByVal written
     mapM_ (hPutStr h) (map outputStr strs)
     hClose h

--- Takes the input and chops it into a parametrized layout "version"
--- (alphabet length), type, an encoding and a list of strings.
parseInput :: String -> (Int, String,String,[String])
parseInput s = (readInt sLen, t, encoding, parseStrings strings)
 where
  (sLen, _ : sx) = break (flip (==) '\n') s
  (t,    _ : s') = break (flip (==) '\n') sx
  (encoding,_ : strings) = break (flip (==) '\n') s'
  parseStrings xs =
    case xs of
      [] -> []
      _  -> let (len,_ : xs') = break (flip (==) ';') xs
            in ifThenElse (Prelude.null len)
                (let (str,xs'') = span (flip (/=) '\n') xs'
                  in str : parseStrings (drop 1 xs''))
                (let (str,xs'') = splitAt (read len :: Int) xs'
               in unescapeCarriageReturn str : parseStrings xs'')

--- Converts a given data value into a compact string representation.
---
--- This is rarely what you want. Use 'writeDataFile' if you want to write
--- the data into a file.
showData :: ReadWrite a => a -> String
showData = showDataP defaultParams

--- Converts data to a compact string representation using specific RWParameters .
---
--- This is rarely what you want. Use writeDataFileP if you want to write
--- the data into a file.
showDataP :: ReadWrite a => RWParameters  -> a -> String
showDataP params x =
  (show (alphabetLen params) ++ "\n") ++
  (ppType (typeOf x) ++ "\n") ++ (l "" ++ "\n") ++
  concatMap outputStr (keysOrdByVal ls)
 where
  (ls,l) = showRW params T.empty x
types:
RWParameters RWType
unsafe:
safe