1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
|
module HTML.Parser ( readHtmlFile, parseHtmlString )
where
import Data.Char
import HTML.Base
readHtmlFile :: HTML h => String -> IO [h]
readHtmlFile file = readFile file >>= return . parseHtmlString
parseHtmlString :: HTML h => String -> [h]
parseHtmlString = map fromStaticHtml . parseHtml
parseHtml :: String -> [StaticHtml]
parseHtml s = reverse (parseHtmlTokens [] (scanHtmlString s))
data HtmlToken = HTText String | HTElem String [(String,String)]
parseHtmlTokens :: [StaticHtml] -> [HtmlToken] -> [StaticHtml]
parseHtmlTokens helems [] = helems
parseHtmlTokens helems (HTText s : hs) =
parseHtmlTokens (HText s : helems) hs
parseHtmlTokens helems (HTElem (t:ts) args : hs) =
if t == '/'
then let (structargs,elems,rest) = splitHtmlElems ts helems
in parseHtmlTokens ([HStruct ts structargs elems] ++ rest) hs
else parseHtmlTokens (HStruct (t:ts) args [] : helems) hs
parseHtmlTokens _ (HTElem [] _ : _) =
error "Internal error in HTML.Parser.parseHtmlTokens: empty list in HTElem"
splitHtmlElems :: String -> [StaticHtml]
-> ([(String,String)],[StaticHtml],[StaticHtml])
splitHtmlElems _ [] = ([],[],[])
splitHtmlElems tag (HText s : hs) =
let (largs,elems,rest) = splitHtmlElems tag hs
in (largs, elems ++ [HText s], rest)
splitHtmlElems tag (HStruct s args cont@(_:_) : hs) =
let (largs,elems,rest) = splitHtmlElems tag hs
in (largs, elems ++ [HStruct s args cont], rest)
splitHtmlElems tag (HStruct s args []: hs) =
if tag==s
then (args,[],hs)
else let (largs,elems,rest) = splitHtmlElems tag hs
in (largs, elems ++ [HStruct s args []], rest)
scanHtmlString :: String -> [HtmlToken]
scanHtmlString s = scanHtml s
where
scanHtml [] = []
scanHtml (c:cs) =
if c=='<'
then if take 3 cs == "!--"
then scanHtmlComment cs
else if take 4 (map toLower cs) == "pre>"
then scanHtmlPre "" (skipFirstNewLine (drop 4 cs))
else scanHtmlElem [] cs
else let (initxt,remtag) = break (=='<') (c:cs)
in HTText initxt : scanHtml remtag
scanHtmlElem :: String -> String -> [HtmlToken]
scanHtmlElem ct [] = [HTText ("<"++ct)]
scanHtmlElem ct (c:cs)
| c=='>' = (if null ct
then HTText "<>"
else HTElem ct []) : scanHtmlString cs
| isSpace c =
if null ct
then HTText "< " : scanHtmlString cs
else let (args,rest) = splitAtElement (=='>') (dropWhile isSpace cs)
revargs = reverse args
in if null args || head revargs /= '/'
then HTElem ct (string2args args) : scanHtmlString rest
else HTElem ct (string2args (reverse (tail revargs)))
: HTElem ('/':ct) [] : scanHtmlString rest
| c=='/' && head cs == '>' = HTElem ct [] : HTElem ('/':ct) []
: scanHtmlString (tail cs)
| otherwise = scanHtmlElem (ct++[toLower c]) cs
scanHtmlComment :: String -> [HtmlToken]
[] = []
scanHtmlComment (c:cs) =
if c=='-' && take 2 cs == "->"
then scanHtmlString (drop 2 cs)
else scanHtmlComment cs
scanHtmlPre :: String -> String -> [HtmlToken]
scanHtmlPre _ [] = []
scanHtmlPre pre (c:cs) =
if c=='<' && take 5 (map toLower cs) == "/pre>"
then HTElem "pre" [] : HTText (reverse pre) : HTElem "/pre" []
: scanHtmlString (drop 5 cs)
else scanHtmlPre (c:pre) cs
string2args :: String -> [(String,String)]
string2args [] = []
string2args (c:cs) =
let (arg1,rest) = splitAtElement isSpace (c:cs)
in deleteApo (splitAtElement (=='=') arg1)
: string2args (dropWhile isSpace rest)
deleteApo :: (String,String) -> (String,String)
deleteApo (tag,[]) = (map toLower tag,[])
deleteApo (tag,c:cs) | c=='"' = (map toLower tag, deleteLastApo cs)
| c=='\'' = (map toLower tag, deleteLastApo cs)
| otherwise = (map toLower tag, c:cs)
deleteLastApo :: String -> String
deleteLastApo [] = []
deleteLastApo [c] = if c=='"' || c=='\'' then [] else [c]
deleteLastApo (c1:c2:cs) = c1 : deleteLastApo (c2:cs)
splitAtElement :: (a -> Bool) -> [a] -> ([a],[a])
splitAtElement _ [] = ([],[])
splitAtElement p (c:cs) =
if p c then ([],cs)
else let (first,rest) = splitAtElement p cs in (c:first,rest)
skipFirstNewLine :: String -> String
skipFirstNewLine [] = []
skipFirstNewLine (c:cs) =
if c=='\n' then cs
else if isSpace c then skipFirstNewLine cs else c:cs
|