1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
------------------------------------------------------------------------------
--- This library contains operations to read/parse Prolog programs.
---
--- @author Michael Hanus
--- @version November 2023
------------------------------------------------------------------------------
{-# OPTIONS_FRONTEND -Wno-incomplete-patterns -Wno-overlapping #-}

module Language.Prolog.Read
  ( readPrologFile, parseProlog )
 where

import Data.Char ( isAlphaNum, isLower, isSpace, isUpper )
import Data.List ( union, intercalate )

import Control.Search.SetFunctions

import Language.Prolog.Show  ( showPlProg, showPlClause, showPlTerm )
import Language.Prolog.Types

{-
-- Some testing:

--main1 = parseTerm False [] (scanPl "f(x23,Y)")
main2 = scanPl "f(x+y)"
main3 = scanPl "isColor(blue).\nisColor(red)."
main4 = parseProlog "?-(f(X))."
main5 = ppProg "isColor(blue).\nisColor(red).\n?- isColor(42)."
main6 = ppProg "%CMT\n:- module(a).\np(X) :- p(f(X)).\np(true).\n?- p(Z).\n"
main7 = parseProlog ":- op(400, xfy, ++).\n1 ++ 2 ++ 3.\n"
main8 = parseProlog ":- op(400, yfx, [++, +-]).\n1 ++ 2 ++ 3.\n"
main9 = parseProlog "rev([1,2])."

-- Reads Prolog file and print it.
readProg :: String -> IO ()
readProg f = readPrologFile f >>= putStr . showPlProg

ppProg :: String -> IO ()
ppProg = putStr . showPlProg . parseProlog
-}




















----------------------------------------------------------------------------
-- The state used during parsing contains the currently defined operators
-- (which are changed by reading an `op` directive).

data ParseState = ParseState
       { opDecls :: [(String,Int,String)] -- operator name / priority / fixity
       }

initParseState :: ParseState
initParseState = ParseState stdOps

----------------------------------------------------------------------------

--- Reads a file containing a Prolog program and return the Prolog clauses.
readPrologFile :: String -> IO [PlClause]
readPrologFile f = readFile f >>= return . parseProlog

--- Parses a string containing a list of Prolog clauses which are returned.
parseProlog :: String -> [PlClause]
parseProlog = parseClauses initParseState . scanPl

-- Parses a sequence of tokens as Prolog clauses where each clause
-- is terminated by a dot.
parseClauses :: ParseState -> [PlToken] -> [PlClause]
parseClauses _   []       = []
parseClauses pst ts@(_:_) = case ts1 of
  DOT : ts2 -> let cl = term2clause term
               in cl : parseClauses (addOpDecl cl pst) ts2
  _         -> error $ "Missing terminating dot at: " ++ show ts1
 where
  (term, ts1) = parseTerm False pst [] ts

-- Adds operators to the state if the clause is an `op` directive.
addOpDecl :: PlClause -> ParseState -> ParseState
addOpDecl clause pst = case clause of
  PlDirective [PlLit "op" opargs] -> case opargs of
    [PlInt prio, PlAtom fix, ops] -> case ops of
      PlAtom op -> addOps [(op, prio, fix)]
      opsterm   -> addOps (map (\o -> (o, prio, fix)) (opList opsterm))
    _ -> error opError
  _ -> pst
 where
  opList ops = case ops of
    PlStruct "." [PlAtom op, os] -> op : opList os
    PlAtom "[]"                  -> []
    _                            -> error opError

  opError = "Illegal op directive: " ++ showPlClause clause

  addOps ops = pst { opDecls = opDecls pst ++ ops }

-- Translate a Prolog term into a clause by interpreting the main operators.
term2clause :: PlTerm -> PlClause
term2clause term = case term of
    PlStruct f [arg] | f == "?-" -> PlQuery     (term2goals arg)
    PlStruct f [arg] | f == ":-" -> PlDirective (term2goals arg)
    PlStruct f [l,r] | f == ":-" -> let PlLit p args = term2goal l
                                    in PlClause p args (term2goals r)
    PlStruct f args -> PlClause f args []
    PlAtom   f      -> PlClause f []   []
    _               -> error $ "Illegal clause: " ++ showPlTerm term
 where
  term2goals t = case t of
    PlStruct "," [a1,a2] -> term2goal a1 : term2goals a2
    _                    -> [term2goal t]

  term2goal t = case t of
    PlStruct "\\+" [arg] -> PlNeg (term2goals arg)
    PlStruct ";" [PlStruct "->" [cnd,thn],els] ->
      PlCond (term2goals cnd) (term2goals thn) (term2goals els)
    PlStruct f args -> PlLit f args
    PlAtom   f      -> PlLit f []
    _               -> error $ "Illegal goal: " ++ showPlTerm t

-- Parses a sequence of Prolog terms (possibly containing operators).
-- The second argument contains the previously parsed terms of this list.
-- If the first argument is `True`, the term is terminated by a comma,
-- otherwise, a comma is parsed as an infix operator.
parseTerm :: Bool -> ParseState -> [PlTerm] -> [PlToken] -> (PlTerm, [PlToken])
parseTerm stopcomma pst pts ts = case ts of
  VAR s     : ts1 -> parseTerm stopcomma pst (PlVar s : pts) ts1
  INT n     : ts1 -> parseTerm stopcomma pst (PlInt n : pts) ts1
  FUNCTOR s : ts1 ->
    let (args,ts2) = parseCommaTerms pst [] ts1
    in case ts2 of
         RPAREN : ts3 -> parseTerm stopcomma pst (PlStruct s args : pts) ts3
         _            -> rightBracketError ts2
  ATOM s : ts1 -> parseTerm stopcomma pst (PlAtom s : pts) ts1
  COMMA  : ts1 | not stopcomma -> parseTerm stopcomma pst (PlAtom "," : pts) ts1
  LPAREN : ts1 ->
    let (term, ts2) = parseTerm False pst [] ts1
    in case ts2 of RPAREN : ts3 -> parseTerm stopcomma pst (term : pts) ts3
                   _            -> rightBracketError ts2
  LSQUARE : ts1 -> let (listterm, ts2) = parseList pst ts1
                   in parseTerm stopcomma pst (listterm : pts) ts2
  _             -> (terms2term pst pts, ts)
 where
  rightBracketError toks = error $ "Missing right bracket at: " ++ show toks

-- Parses a list of comma-separated Prolog terms.
parseCommaTerms :: ParseState -> [PlTerm] -> [PlToken] -> ([PlTerm], [PlToken])
parseCommaTerms pst args ts =
  let (term, ts1) = parseTerm True pst [] ts
  in case ts1 of
       COMMA  : ts2 -> parseCommaTerms pst (term:args) ts2
       _            -> (reverse (term:args), ts1)

-- Parses a list in Prolog notation (left square bracket already consumed).
parseList :: ParseState -> [PlToken] -> (PlTerm, [PlToken])
parseList pst ts =
  let (terms, ts1) = parseCommaTerms pst [] ts
  in case ts1 of
       RSQUARE : ts2 -> (consList terms (PlAtom "[]"), ts2)
       BAR     : ts2 ->
         let (tail, ts3) = parseTerm True pst [] ts2
         in case ts3 of RSQUARE : ts4 -> (consList terms tail, ts4)
                        _             -> listError ts3
       _             -> listError ts1
 where
  listError toks = error $ "Non-terminated list at: " ++ show toks

  consList []     tail = tail
  consList (x:xs) tail = PlStruct "." [x, consList xs tail]


-- Combines a list of terms (provided as first argument in reverse order)
-- into a single term by considering Prolog operator precedences.
terms2term :: ParseState -> [PlTerm] -> PlTerm
terms2term pst ts =
  if notEmpty terms
    then snd (selectValue terms) -- also check for more than one solution...
    else error $ "Illegal term: " ++ unwords (map showPlTerm (reverse ts))
 where terms = set2 opterms2term pst (reverse ts)

opterms2term :: ParseState -> [PlTerm] -> (Int, PlTerm)
opterms2term _   [t] = (0,t)
opterms2term pst (PlAtom f : ts)
  | isPrefixOp fprio fa tp = (fprio, PlStruct f [t])
 where (fprio,fa) = lookupOpInfo f (opDecls pst)
       (tp,t) = opterms2term pst ts
opterms2term pst (ts ++ [PlAtom f])
  | isPostfixOp fprio fa tp = (fprio, PlStruct f [t])
 where (fprio,fa) = lookupOpInfo f (opDecls pst)
       (tp,t) = opterms2term pst ts
opterms2term pst (ts1 ++ [PlAtom f] ++ ts2)
  | isInfixOp fprio fa tp1 tp2 = (fprio, PlStruct f [t1,t2])
 where (fprio,fa) = lookupOpInfo f (opDecls pst)
       (tp1,t1) = opterms2term pst ts1
       (tp2,t2) = opterms2term pst ts2

isPrefixOp :: Int -> String -> Int -> Bool
isPrefixOp fprio fa tprio = fa == "fx" && tprio < fprio ||
                            fa == "fy" && tprio <= fprio

isPostfixOp :: Int -> String -> Int -> Bool
isPostfixOp fprio fa tprio = fa == "xf" && tprio < fprio ||
                             fa == "yf" && tprio <= fprio

isInfixOp :: Int -> String -> Int -> Int -> Bool
isInfixOp fprio fa tp1 tp2 = fa == "xfx" && tp1 <  fprio && tp2 < fprio ||
                             fa == "yfx" && tp1 <= fprio && tp2 < fprio ||
                             fa == "xfy" && tp1 <  fprio && tp2 <= fprio

lookupOpInfo :: String -> [(String,Int,String)] -> (Int,String)
lookupOpInfo op ((op,prio,a):_) = (prio,a)
lookupOpInfo op (_:ops)         = lookupOpInfo op ops

------------------------------------------------------------------------------
-- A scanner for Prolog.

data PlToken =
    DOT -- a single dot (end of term)
  | LPAREN  | RPAREN
  | LSQUARE | RSQUARE | BAR
  | COMMA
  | VAR     String
  | FUNCTOR String  -- a functor, i.e., directly followed by an open bracket
  | ATOM    String  -- an atom (not followed by an open bracket)
  | INT     Int
 deriving Show

scanPl :: String -> [PlToken]
scanPl s = case dropWhile isSpace s of
  []         -> []
  '%':cs     -> scanPl (dropWhile (/='\n') cs) -- skip comment line
  '(':cs     -> LPAREN : scanPl cs
  ')':cs     -> RPAREN : scanPl cs
  ',':cs     -> COMMA  : scanPl cs
  ';':cs     -> ATOM ";" : scanPl cs
  '!':cs     -> ATOM "!" : scanPl cs
  '\'':cs    -> scanTickAtom "" cs
  '.':cs     -> if null cs || isSpace (head cs)
                  then DOT : scanPl cs
                  else scanOp "." cs
  '[':']':cs -> ATOM "[]" : scanPl cs
  '[':cs     -> LSQUARE : scanPl cs
  ']':cs     -> RSQUARE : scanPl cs
  '|':cs     -> BAR     : scanPl cs
  c:cs | isLower c             -> scanAtom [c] cs
       | isUpper c || c == '_' -> scanVar  [c] cs
       | isPrologSpecial c     -> scanOp   [c] cs
       | isDigit c             -> scanNum  [c] cs
  cs -> error $ "Scan error: " ++ cs

scanAtom :: String -> String -> [PlToken]
scanAtom pre [] = [ATOM (reverse pre)]
scanAtom pre s@(c:cs)
  | isAlphaNum c || c == '_' = scanAtom (c:pre) cs
  | c == '('                 = FUNCTOR (reverse pre) : scanPl cs
  | otherwise                = ATOM (reverse pre) : scanPl s

scanTickAtom :: String -> String -> [PlToken]
scanTickAtom pre [] = error $ "Scan error: non-terminated atom: " ++ pre
scanTickAtom pre s@(c:cs)
  | s == "\\" = error $ "Scan error: non-terminated atom: " ++ pre
  | c == '\\' && head cs == '\'' = scanTickAtom (head cs : pre) cs
  | c == '\''                    = ATOM (reverse pre) : scanPl cs
  | otherwise                    = scanTickAtom (c : pre) cs

scanVar :: String -> String -> [PlToken]
scanVar pre [] = [VAR (reverse pre)]
scanVar pre s@(c:cs)
  | isAlphaNum c || c == '_' = scanVar (c:pre) cs
  | otherwise                = VAR (reverse pre) : scanPl s

scanOp :: String -> String -> [PlToken]
scanOp pre [] = [ATOM (reverse pre)]
scanOp pre s@(c:cs)
  | isPrologSpecial c = scanOp (c:pre) cs
  | c == '('          = FUNCTOR (reverse pre) : scanPl cs
  | otherwise         = ATOM (reverse pre) : scanPl s

scanNum :: String -> String -> [PlToken]
scanNum pre [] = [INT (read (reverse pre))]
scanNum pre s@(c:cs)
  | isDigit c = scanNum (c:pre) cs
  | otherwise = INT (read (reverse pre)) : scanPl s

isPrologSpecial :: Char -> Bool
isPrologSpecial = (`elem` "+-*/<=>`\\:.?@#$&^~")

----------------------------------------------------------------------------

-- The standard operators used in Prolog programs.
stdOps :: [(String,Int,String)]
stdOps =
  [ (":-",1200,"xfx")
  , ("-->",1200,"xfx")
  , (":-",1200,"fx")
  , ("?-",1200,"fx")
  , (";",1100,"xfy")
  , ("dynamic",1150,"fx")
  , ("function",1150,"fx")   -- to specify a function (with result positions)
  , ("type",1150,"fx")       -- to specify data types
  , ("->",1050,"xfy")
  , (",",1000,"xfy")
  , ("\\+",900,"fy")
  , ("is",700,"xfx")
  , ("=",700,"xfx"), ("\\=",700,"xfx"), ("=:=",700,"xfx"), ("=\\=",700,"xfx")
  , ("=..",700,"xfx")
  , ("=<",700,"xfx"), (">=",700,"xfx"), ("<",700,"xfx"), (">",700,"xfx")
  , (":",550,"xfy")
  , ("+",500,"yfx"), ("-",500,"yfx")
  , ("*",400,"yfx"), ("/",400,"yfx")
  , ("//",400,"yfx")
  , ("mod",400,"yfx")
  , ("rem",400,"yfx")
  , ("div",400,"yfx")
  , ("**",200,"xfx")
  , ("^",200,"xfy")
  , ("-",200,"fy")
  , ("+",200,"fy")
  ]

{-
Get all currently defined operators in Prolog with:

?- current_op(X,Y,Z), write('("'), write(Z), write('",'), write(X), write(',"'),write(Y), write('")'), nl, fail.

-}






----------------------------------------------------------------------------