1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
|
module Text.Markdown
( MarkdownDoc, MarkdownElem(..), fromMarkdownText
, removeEscapes, markdownEscapeChars
, markdownText2HTML, markdownText2CompleteHTML
, markdownText2LaTeX, markdownText2LaTeXWithFormat
, markdownText2CompleteLaTeX
, formatMarkdownFileAsPDF, formatMarkdownInputAsPDF
)
where
import Data.Char
import System.IO ( getContents )
import Data.List
import System.Process
import HTML.Base
import HTML.LaTeX
type MarkdownDoc = [MarkdownElem]
data MarkdownElem = Text String
| Emph String
| Strong String
| Code String
| HRef String String
| Par MarkdownDoc
| CodeBlock String
| UList [MarkdownDoc]
| OList [MarkdownDoc]
| Quote MarkdownDoc
| HRule
| Int String
data SourceMDElem = SMDText String
| SMDEmph String
| SMDStrong String
| SMDCode String
| SMDHRef String String
| SMDPar MarkdownDoc
| SMDCodeBlock String
| SMDUItem String
| SMDOItem String
| SMDQuote MarkdownDoc
| SMDHRule
| Int String
isSMDUItem :: SourceMDElem -> Bool
isSMDUItem md = case md of SMDUItem _ -> True
_ -> False
isSMDOItem :: SourceMDElem -> Bool
isSMDOItem md = case md of SMDOItem _ -> True
_ -> False
textOfItem :: SourceMDElem -> String
textOfItem md = case md of SMDUItem txt -> txt
SMDOItem txt -> txt
_ -> ""
fromMarkdownText :: String -> MarkdownDoc
fromMarkdownText = groupMarkDownElems . markdownText
groupMarkDownElems :: [SourceMDElem] -> MarkdownDoc
groupMarkDownElems [] = []
groupMarkDownElems (SMDUItem itxt :mds) = joinItems UList isSMDUItem [itxt] mds
groupMarkDownElems (SMDOItem itxt :mds) = joinItems OList isSMDOItem [itxt] mds
groupMarkDownElems (SMDText s : mds) = Text s : groupMarkDownElems mds
groupMarkDownElems (SMDEmph s : mds) = Emph s : groupMarkDownElems mds
groupMarkDownElems (SMDStrong s : mds) = Strong s : groupMarkDownElems mds
groupMarkDownElems (SMDCode s : mds) = Code s : groupMarkDownElems mds
groupMarkDownElems (SMDHRef s u : mds) = HRef s u : groupMarkDownElems mds
groupMarkDownElems (SMDPar md : mds) = Par md : groupMarkDownElems mds
groupMarkDownElems (SMDCodeBlock s : mds) = CodeBlock s : groupMarkDownElems mds
groupMarkDownElems (SMDQuote md : mds) = Quote md : groupMarkDownElems mds
groupMarkDownElems (SMDHRule : mds) = HRule : groupMarkDownElems mds
groupMarkDownElems (SMDHeader l s : mds) = Header l s : groupMarkDownElems mds
joinItems :: ([[MarkdownElem]] -> MarkdownElem) -> (SourceMDElem -> Bool)
-> [String] -> [SourceMDElem] -> [MarkdownElem]
joinItems mdlcons _ items [] = [mdlcons (reverse (map fromMarkdownText items))]
joinItems mdlcons isitem items (md:mds) =
if isitem md
then joinItems mdlcons isitem (textOfItem md : items) mds
else mdlcons (reverse (map fromMarkdownText items))
: groupMarkDownElems (md:mds)
markdownText :: String -> [SourceMDElem]
markdownText [] = []
markdownText txt@(_:_) = markdownLine fstline (dropFirst remtxt)
where (fstline,remtxt) = break (=='\n') txt
markdownLine :: String -> String -> [SourceMDElem]
markdownLine fstline remtxt
| all isSpace fstline
= markdownText remtxt
| blanklen > 0
= markdownCodeBlock blanklen (drop blanklen fstline) remtxt
| isLevel1Line
= SMDHeader 1 fstline : markdownText (dropFirst furtherlines)
| isLevel2Line
= SMDHeader 2 fstline : markdownText (dropFirst furtherlines)
| take 1 fstline == "#"
= tryMDHeader fstline remtxt
| isHRule fstline
= SMDHRule : markdownText remtxt
| take 2 fstline == "> "
= markdownQuote (drop 2 fstline) remtxt
| uitemlen > 0
= markdownItem SMDUItem uitemlen (drop uitemlen fstline) remtxt
| nitemlen > 0
= markdownItem SMDOItem nitemlen (drop nitemlen fstline) remtxt
| otherwise = markdownPar fstline remtxt
where
blanklen = isCodeLine fstline
(sndline,furtherlines) = break (=='\n') remtxt
isLevel1Line = not (null sndline) && all (=='=') sndline
isLevel2Line = not (null sndline) && all (=='-') sndline
nitemlen = isNumberedItemLine fstline
uitemlen = isUnorderedItemLine fstline
dropFirst :: [a] -> [a]
dropFirst s = if null s then [] else tail s
tryMDHeader :: String -> String -> [SourceMDElem]
s rtxt =
let (sharps,htxt) = break (==' ') s
level = length sharps
in if null htxt || level>6
then markdownPar s rtxt
else SMDHeader level (dropFirst htxt) : markdownText rtxt
isHRule :: String -> Bool
isHRule l =
(all (\c -> isSpace c || c=='-') l && length (filter (=='-') l) > 3) ||
(all (\c -> isSpace c || c=='*') l && length (filter (=='*') l) > 3)
isUnorderedItemLine :: String -> Int
isUnorderedItemLine s =
let (blanks,nonblanks) = span (==' ') s
in if take 2 nonblanks `elem` ["* ","- ","+ "] then length blanks+2 else 0
isNumberedItemLine :: String -> Int
isNumberedItemLine s =
let (blanks,nonblanks) = span (==' ') s
numblanks = length blanks
in checkNumber numblanks nonblanks
where
checkNumber indt numtxt =
let (ns,brt) = break (==' ') numtxt
(blanks,rtxt) = break (/=' ') brt
nsl = length ns
in if nsl>0 && all isDigit (take (nsl-1) ns) && ns!!(nsl-1)=='.' &&
not (null blanks) && not (null rtxt)
then indt+nsl+length blanks
else 0
isCodeLine :: String -> Int
isCodeLine s =
let (blanks,nonblanks) = span (==' ') s
numblanks = length blanks
in if not (null nonblanks) && numblanks >= 4 then numblanks else 0
markdownPar :: String -> String -> [SourceMDElem]
markdownPar ptxt txt
| null txt || head txt `elem` ['\n'] ||
uitemlen>0 || nitemlen>0
= SMDPar (groupMarkDownElems (outsideMarkdownElem "" ptxt)) : markdownText txt
| null remtxt
= [SMDPar (groupMarkDownElems (outsideMarkdownElem "" (ptxt++'\n':fstline)))]
| otherwise = markdownPar (ptxt++'\n':fstline) (tail remtxt)
where
(fstline,remtxt) = break (=='\n') txt
nitemlen = isNumberedItemLine fstline
uitemlen = isUnorderedItemLine fstline
markdownQuote :: String -> String -> [SourceMDElem]
markdownQuote qtxt alltxt =
let txt = if take 2 alltxt == ">\n"
then "> " ++ drop 1 alltxt
else alltxt
in if take 2 txt == "> "
then let (fstline,remtxt) = break (=='\n') (drop 2 txt)
in if null remtxt
then [SMDQuote (fromMarkdownText (qtxt++'\n':fstline))]
else markdownQuote (qtxt++'\n':fstline) (tail remtxt)
else SMDQuote (fromMarkdownText qtxt) : markdownText txt
markdownCodeBlock :: Int -> String -> String -> [SourceMDElem]
markdownCodeBlock n ctxt txt =
if all (==' ') (take n txt)
then let (fstline,remtxt) = break (=='\n') (drop n txt)
in if null remtxt
then [SMDCodeBlock (ctxt ++ '\n' : fstline)]
else markdownCodeBlock n (ctxt ++ '\n' : fstline)
(tail remtxt)
else SMDCodeBlock ctxt : markdownText txt
markdownItem :: (String -> SourceMDElem) -> Int -> String -> String
-> [SourceMDElem]
markdownItem icons n itxt txt =
if take n txt == take n (repeat ' ')
then let (fstline,remtxt) = break (=='\n') (drop n txt)
in if null remtxt
then [icons (itxt++'\n':fstline)]
else markdownItem icons n (itxt++'\n':fstline) (tail remtxt)
else let (fstline,remtxt) = break (=='\n') txt
in if all isSpace fstline
then if null remtxt
then [icons itxt]
else markdownItem icons n (itxt++"\n") (tail remtxt)
else icons itxt : markdownText txt
removeEscapes :: String -> String
removeEscapes s = case s of
[] -> []
('\\':c:cs) -> if c `elem` markdownEscapeChars
then c : removeEscapes cs
else '\\' : removeEscapes (c:cs)
(c:cs) -> c : removeEscapes cs
markdownEscapeChars :: [Char]
markdownEscapeChars =
['\\','`','*','_','{','}','[',']','(',')','<','>','#','+','-','.',' ','!']
outsideMarkdownElem :: String -> String -> [SourceMDElem]
outsideMarkdownElem txt s = case s of
[] -> addPrevious txt []
('\\':c:cs) -> if c `elem` markdownEscapeChars
then outsideMarkdownElem (c:'\\':txt) cs
else outsideMarkdownElem ('\\':txt) (c:cs)
('*':'*':cs) -> addPrevious txt $ insideMarkdownElem "**" [] cs
('_':'_':cs) -> addPrevious txt $ insideMarkdownElem "__" [] cs
('*':cs) -> addPrevious txt $ insideMarkdownElem "*" [] cs
('_':cs) -> addPrevious txt $ insideMarkdownElem "_" [] cs
('`':cs) -> let (ticks, cs') = span (=='`') cs in
addPrevious txt $ insideMarkdownElem
(replicate (length ticks + 1) '`') [] cs'
('[':cs) -> addPrevious txt $ tryParseLink cs
('<':cs) -> addPrevious txt $ markdownHRef cs
(c:cs) -> outsideMarkdownElem (c:txt) cs
addPrevious :: String -> [SourceMDElem] -> [SourceMDElem]
addPrevious ptxt xs = if null ptxt then xs else SMDText (reverse ptxt) : xs
tryParseLink :: String -> [SourceMDElem]
tryParseLink txt = let (linktxt,rtxt) = break (==']') txt in
if null rtxt || null (tail rtxt) || (rtxt!!1 /= '(')
then outsideMarkdownElem "[" txt
else let (url,mtxt) = break (==')') (drop 2 rtxt)
in if null mtxt
then outsideMarkdownElem "[" txt
else SMDHRef linktxt url : outsideMarkdownElem "" (tail mtxt)
markdownHRef :: String -> [SourceMDElem]
markdownHRef txt = let (url,rtxt) = break (=='>') txt in
if null rtxt
then outsideMarkdownElem "<" txt
else SMDHRef url url : outsideMarkdownElem "" (dropFirst rtxt)
insideMarkdownElem :: String -> String -> String -> [SourceMDElem]
insideMarkdownElem marker etext s =
if marker `isPrefixOf` s
then text2MDElem marker (reverse etext)
: outsideMarkdownElem "" (drop (length marker) s)
else case s of
[] -> [SMDText (marker ++ reverse etext)]
('\\':c:cs) -> if c `elem` markdownEscapeChars
then insideMarkdownElem marker (c:'\\':etext) cs
else insideMarkdownElem marker ('\\':etext) (c:cs)
(c:cs) -> insideMarkdownElem marker (c:etext) cs
text2MDElem :: String -> String -> SourceMDElem
text2MDElem marker txt = case marker of
"**" -> SMDStrong txt
"__" -> SMDStrong txt
"*" -> SMDEmph txt
"_" -> SMDEmph txt
_ | all (=='`') marker -> SMDCode txt
| otherwise -> error $ "Markdown.text2MDElem: unknown marker \"" ++
marker ++ "\""
mdDoc2html :: HTML h => MarkdownDoc -> [h]
mdDoc2html = map mdElem2html
mdtxt2html :: HTML h => String -> h
mdtxt2html = htmlText . htmlQuote . removeEscapes
mdElem2html :: HTML h => MarkdownElem -> h
mdElem2html (Text s) = mdtxt2html s
mdElem2html (Emph s) = emphasize [mdtxt2html s]
mdElem2html (Strong s) = htmlStruct "strong" [] [mdtxt2html s]
mdElem2html (HRef s url) = if s==url
then href url [code [mdtxt2html s]]
else href url [mdtxt2html s]
mdElem2html (Code s) = code [htmlText (htmlQuote s)]
mdElem2html (CodeBlock s) = verbatim s
mdElem2html (Quote md) = htmlStruct "blockquote" [] (mdDoc2html md)
mdElem2html (Par md) = par (mdDoc2html md)
mdElem2html (UList mds) = ulist (map mdDoc2htmlWithoutPar mds)
mdElem2html (OList mds) = olist (map mdDoc2htmlWithoutPar mds)
mdElem2html HRule = hrule
mdElem2html (Header l s) = htmlStruct ('h':show l) [] [mdtxt2html s]
mdDoc2htmlWithoutPar :: HTML h => MarkdownDoc -> [h]
mdDoc2htmlWithoutPar mdoc = case mdoc of
[] -> []
[Par md] -> mdDoc2html md
[md] -> [mdElem2html md]
(Par md1 : md2 : mds) -> mdDoc2html md1 ++ breakline :
mdDoc2htmlWithoutPar (md2:mds)
(md1 : md2 : mds) -> mdElem2html md1 : mdDoc2htmlWithoutPar (md2:mds)
markdownText2HTML :: HTML h => String -> [h]
markdownText2HTML = mdDoc2html . fromMarkdownText
markdownText2CompleteHTML :: String -> String -> String
markdownText2CompleteHTML title mdtxt =
showHtmlPage (page title (markdownText2HTML mdtxt))
mdDoc2latex :: (String->String) -> MarkdownDoc -> String
mdDoc2latex txt2latex = concatMap (mdElem2latex txt2latex)
mdElem2latex :: (String->String) -> MarkdownElem -> String
mdElem2latex txt2latex (Text s) = txt2latex s
mdElem2latex txt2latex (Emph s) = "\\emph{"++txt2latex s++"}"
mdElem2latex txt2latex (Strong s) = "\\textbf{"++txt2latex s++"}"
mdElem2latex txt2latex (HRef s url) =
if s==url then "\\url{"++url++"}"
else "\\href{"++url++"}{"++txt2latex s++"}"
mdElem2latex txt2latex (Code s) = "\\texttt{"++txt2latex (htmlQuote s)++"}"
mdElem2latex _ (CodeBlock s) =
"\\begin{verbatim}\n"++s++"\n\\end{verbatim}\n"
mdElem2latex txt2latex (Quote md) =
"\\begin{quote}\n"++mdDoc2latex txt2latex md++"\\end{quote}\n"
mdElem2latex txt2latex (Par md) = mdDoc2latex txt2latex md++"\n\n"
mdElem2latex txt2latex (UList s) =
"\\begin{itemize}"++
concatMap (\i -> "\n\\item\n"++mdDoc2latex txt2latex i) s ++
"\\end{itemize}\n"
mdElem2latex txt2latex (OList s) =
"\\begin{enumerate}"++
concatMap (\i -> "\n\\item\n"++mdDoc2latex txt2latex i) s ++
"\\end{enumerate}\n"
mdElem2latex _ HRule = "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n"
mdElem2latex txt2latex (Header l s) = case l of
1 -> "\\section{"++txt2latex s++"}\n\n"
2 -> "\\subsection{"++txt2latex s++"}\n\n"
3 -> "\\subsubsection{"++txt2latex s++"}\n\n"
4 -> "\\paragraph{"++txt2latex s++"}\n\n"
5 -> "\\textbf{"++txt2latex s++"}\n\n"
_ -> "\\emph{"++txt2latex s++"}\n\n"
text2latex :: String -> String
text2latex = showLatexExps . (\s -> [htxt s]) . removeEscapes
markdownText2LaTeX :: String -> String
markdownText2LaTeX = mdDoc2latex text2latex . fromMarkdownText
markdownText2LaTeXWithFormat :: (String->String) -> String -> String
markdownText2LaTeXWithFormat txt2latex = mdDoc2latex txt2latex . fromMarkdownText
markdownText2CompleteLaTeX :: String -> String
markdownText2CompleteLaTeX mds =
latexHeader ++ mdDoc2latex text2latex (fromMarkdownText mds) ++
"\\end{document}\n"
latexHeader :: String
= unwords
[ "\\documentclass{article}"
, "\\usepackage[utf8x]{inputenc}"
, "\\usepackage{url}"
, "\\usepackage[breaklinks=true,unicode=true]{hyperref}"
, "\\setlength{\\parindent}{0pt}"
, "\\setlength{\\parskip}{6pt plus 2pt minus 1pt}"
, "\\setcounter{secnumdepth}{0}"
, "\\begin{document}"
]
formatMarkdownInputAsPDF :: String -> IO ()
formatMarkdownInputAsPDF outfile = getContents >>= formatMarkdownAsPDF outfile
formatMarkdownFileAsPDF :: String -> String -> IO ()
formatMarkdownFileAsPDF infile outfile =
readFile infile >>= formatMarkdownAsPDF outfile
formatMarkdownAsPDF :: String -> String -> IO ()
formatMarkdownAsPDF outfile mdstr = do
pid <- getPID
let tmp = "tmp_" ++ show pid
writeFile (tmp ++ ".tex") (markdownText2CompleteLaTeX mdstr)
pdflatexFile tmp outfile
pdflatexFile :: String -> String -> IO ()
pdflatexFile tmp outfile = do
system $ "pdflatex '\\nonstopmode\\input{" ++ tmp ++ ".tex}'"
system $ unwords ["/bin/rm -f", tmp ++ ".tex", tmp ++ ".aux", tmp ++ ".log",
tmp ++ ".out"]
if null outfile
then do system $ "evince " ++ tmppdf
system $ "/bin/rm -f " ++ tmppdf
else system $ "/bin/mv -f " ++ tmppdf ++ " " ++ outfile
return ()
where
tmppdf = tmp ++ ".pdf"
|