1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
|
{-# OPTIONS_CYMAKE -Wno-incomplete-patterns #-}
module HTML.Parser ( readHtmlFile, parseHtmlString )
where
import Char
import HTML.Base
readHtmlFile :: String -> IO [BaseHtml]
readHtmlFile file = readFile file >>= return . parseHtmlString
parseHtmlString :: String -> [BaseHtml]
parseHtmlString s = reverse (parseHtmlTokens [] (scanHtmlString s))
data HtmlToken = HText String | HElem String [(String,String)]
parseHtmlTokens :: [BaseHtml] -> [HtmlToken] -> [BaseHtml]
parseHtmlTokens helems [] = helems
parseHtmlTokens helems (HText s : hs) =
parseHtmlTokens (BaseText s : helems) hs
parseHtmlTokens helems (HElem (t:ts) args : hs) =
if t == '/'
then let (structargs,elems,rest) = splitHtmlElems ts helems
in parseHtmlTokens ([BaseStruct ts structargs elems] ++ rest) hs
else parseHtmlTokens (BaseStruct (t:ts) args [] : helems) hs
splitHtmlElems :: String -> [BaseHtml]
-> ([(String,String)],[BaseHtml],[BaseHtml])
splitHtmlElems _ [] = ([],[],[])
splitHtmlElems tag (BaseText s : hs) =
let (largs,elems,rest) = splitHtmlElems tag hs
in (largs, elems ++ [BaseText s], rest)
splitHtmlElems tag (BaseStruct s args cont@(_:_) : hs) =
let (largs,elems,rest) = splitHtmlElems tag hs
in (largs, elems ++ [BaseStruct s args cont], rest)
splitHtmlElems tag (BaseStruct s args []: hs) =
if tag==s
then (args,[],hs)
else let (largs,elems,rest) = splitHtmlElems tag hs
in (largs, elems ++ [BaseStruct s args []], rest)
scanHtmlString :: String -> [HtmlToken]
scanHtmlString s = scanHtml s
where
scanHtml [] = []
scanHtml (c:cs) =
if c=='<'
then if take 3 cs == "!--"
then scanHtmlComment cs
else if take 4 (map toLower cs) == "pre>"
then scanHtmlPre "" (skipFirstNewLine (drop 4 cs))
else scanHtmlElem [] cs
else let (initxt,remtag) = break (=='<') (c:cs)
in HText initxt : scanHtml remtag
scanHtmlElem :: String -> String -> [HtmlToken]
scanHtmlElem ct [] = [HText ("<"++ct)]
scanHtmlElem ct (c:cs)
| c=='>' = (if null ct
then HText "<>"
else HElem ct []) : scanHtmlString cs
| isSpace c =
if null ct
then HText "< " : scanHtmlString cs
else let (args,rest) = splitAtElement (=='>') (dropWhile isSpace cs)
revargs = reverse args
in if null args || head revargs /= '/'
then HElem ct (string2args args) : scanHtmlString rest
else HElem ct (string2args (reverse (tail revargs)))
: HElem ('/':ct) [] : scanHtmlString rest
| c=='/' && head cs == '>' = HElem ct [] : HElem ('/':ct) []
: scanHtmlString (tail cs)
| otherwise = scanHtmlElem (ct++[toLower c]) cs
scanHtmlComment :: String -> [HtmlToken]
[] = []
scanHtmlComment (c:cs) =
if c=='-' && take 2 cs == "->"
then scanHtmlString (drop 2 cs)
else scanHtmlComment cs
scanHtmlPre :: String -> String -> [HtmlToken]
scanHtmlPre _ [] = []
scanHtmlPre pre (c:cs) =
if c=='<' && take 5 (map toLower cs) == "/pre>"
then HElem "pre" [] : HText (reverse pre) : HElem "/pre" []
: scanHtmlString (drop 5 cs)
else scanHtmlPre (c:pre) cs
string2args :: String -> [(String,String)]
string2args [] = []
string2args (c:cs) =
let (arg1,rest) = splitAtElement isSpace (c:cs)
in deleteApo (splitAtElement (=='=') arg1)
: string2args (dropWhile isSpace rest)
deleteApo :: (String,String) -> (String,String)
deleteApo (tag,[]) = (map toLower tag,[])
deleteApo (tag,c:cs) | c=='"' = (map toLower tag, deleteLastApo cs)
| c=='\'' = (map toLower tag, deleteLastApo cs)
| otherwise = (map toLower tag, c:cs)
deleteLastApo :: String -> String
deleteLastApo [] = []
deleteLastApo [c] = if c=='"' || c=='\'' then [] else [c]
deleteLastApo (c1:c2:cs) = c1 : deleteLastApo (c2:cs)
splitAtElement :: (a -> Bool) -> [a] -> ([a],[a])
splitAtElement _ [] = ([],[])
splitAtElement p (c:cs) =
if p c then ([],cs)
else let (first,rest) = splitAtElement p cs in (c:first,rest)
skipFirstNewLine :: String -> String
skipFirstNewLine [] = []
skipFirstNewLine (c:cs) =
if c=='\n' then cs
else if isSpace c then skipFirstNewLine cs else c:cs
|