sourcecode:
|
module Text.Markdown
( MarkdownDoc, MarkdownElem(..), fromMarkdownText
, removeEscapes, markdownEscapeChars
, markdownText2HTML, markdownText2CompleteHTML
, markdownText2LaTeX, markdownText2LaTeXWithFormat
, markdownText2CompleteLaTeX
, formatMarkdownFileAsPDF, formatMarkdownInputAsPDF
)
where
import Data.Char
import System.IO ( getContents )
import Data.List
import System.Process
import HTML.Base
import HTML.LaTeX
-----------------------------------------------------------------------
--- A markdown document is a list of markdown elements.
type MarkdownDoc = [MarkdownElem]
--- The data type for representing the different elements
--- occurring in a markdown document.
--- @cons Text s - a simple text in a markdown document
--- @cons Emph s - an emphasized text in a markdown document
--- @cons Strong s - a strongly emphaszed text in a markdown document
--- @cons Strong s - a code string in a markdown document
--- @cons Code s - a code string in a markdown document
--- @cons HRef s u - a reference to URL `u` with text `s` in a markdown document
--- @cons Par md - a paragraph in a markdown document
--- @cons CodeBlock s - a code block in a markdown document
--- @cons UList mds - an unordered list in a markdown document
--- @cons OList mds - an ordered list in a markdown document
--- @cons Quote md - a quoted paragraph in a markdown document
--- @cons HRule - a hoirzontal rule in a markdown document
--- @cons Header l s - a level `l` header with title `s`
--- in a markdown document
data MarkdownElem = Text String
| Emph String
| Strong String
| Code String
| HRef String String
| Par MarkdownDoc
| CodeBlock String
| UList [MarkdownDoc]
| OList [MarkdownDoc]
| Quote MarkdownDoc
| HRule
| Header Int String
-----------------------------------------------------------------------
--- The data type for representing the different elements
--- of a source markdown document. Basically, it is the same
--- as the final markdown document execept for single list items
--- that occur in the source but are combined into a list in
--- the final document.
data SourceMDElem = SMDText String
| SMDEmph String
| SMDStrong String
| SMDCode String
| SMDHRef String String
| SMDPar MarkdownDoc
| SMDCodeBlock String
| SMDUItem String
| SMDOItem String
| SMDQuote MarkdownDoc
| SMDHRule
| SMDHeader Int String
isSMDUItem :: SourceMDElem -> Bool
isSMDUItem md = case md of SMDUItem _ -> True
_ -> False
isSMDOItem :: SourceMDElem -> Bool
isSMDOItem md = case md of SMDOItem _ -> True
_ -> False
textOfItem :: SourceMDElem -> String
textOfItem md = case md of SMDUItem txt -> txt
SMDOItem txt -> txt
_ -> ""
-----------------------------------------------------------------------
--- Parse markdown document from its textual representation.
fromMarkdownText :: String -> MarkdownDoc
fromMarkdownText = groupMarkDownElems . markdownText
-- Group adjacent item elements together in a markdown list.
groupMarkDownElems :: [SourceMDElem] -> MarkdownDoc
groupMarkDownElems [] = []
groupMarkDownElems (SMDUItem itxt :mds) = joinItems UList isSMDUItem [itxt] mds
groupMarkDownElems (SMDOItem itxt :mds) = joinItems OList isSMDOItem [itxt] mds
groupMarkDownElems (SMDText s : mds) = Text s : groupMarkDownElems mds
groupMarkDownElems (SMDEmph s : mds) = Emph s : groupMarkDownElems mds
groupMarkDownElems (SMDStrong s : mds) = Strong s : groupMarkDownElems mds
groupMarkDownElems (SMDCode s : mds) = Code s : groupMarkDownElems mds
groupMarkDownElems (SMDHRef s u : mds) = HRef s u : groupMarkDownElems mds
groupMarkDownElems (SMDPar md : mds) = Par md : groupMarkDownElems mds
groupMarkDownElems (SMDCodeBlock s : mds) = CodeBlock s : groupMarkDownElems mds
groupMarkDownElems (SMDQuote md : mds) = Quote md : groupMarkDownElems mds
groupMarkDownElems (SMDHRule : mds) = HRule : groupMarkDownElems mds
groupMarkDownElems (SMDHeader l s : mds) = Header l s : groupMarkDownElems mds
joinItems :: ([[MarkdownElem]] -> MarkdownElem) -> (SourceMDElem -> Bool)
-> [String] -> [SourceMDElem] -> [MarkdownElem]
joinItems mdlcons _ items [] = [mdlcons (reverse (map fromMarkdownText items))]
joinItems mdlcons isitem items (md:mds) =
if isitem md
then joinItems mdlcons isitem (textOfItem md : items) mds
else mdlcons (reverse (map fromMarkdownText items))
: groupMarkDownElems (md:mds)
-- Basic reader for a markdown text.
markdownText :: String -> [SourceMDElem]
markdownText [] = []
markdownText txt@(_:_) = markdownLine fstline (dropFirst remtxt)
where (fstline,remtxt) = break (=='\n') txt
-- Analyze the first line of a markdown text:
markdownLine :: String -> String -> [SourceMDElem]
markdownLine fstline remtxt
| all isSpace fstline
= markdownText remtxt
| blanklen > 0 -- four space indent for code
= markdownCodeBlock blanklen (drop blanklen fstline) remtxt
| isLevel1Line
= SMDHeader 1 fstline : markdownText (dropFirst furtherlines)
| isLevel2Line
= SMDHeader 2 fstline : markdownText (dropFirst furtherlines)
| take 1 fstline == "#"
= tryMDHeader fstline remtxt
| isHRule fstline
= SMDHRule : markdownText remtxt
| take 2 fstline == "> " -- start of a quoted text
= markdownQuote (drop 2 fstline) remtxt
| uitemlen > 0 -- start of an unordered item
= markdownItem SMDUItem uitemlen (drop uitemlen fstline) remtxt
| nitemlen > 0 -- start of a numbered item
= markdownItem SMDOItem nitemlen (drop nitemlen fstline) remtxt
| otherwise = markdownPar fstline remtxt
where
blanklen = isCodeLine fstline
(sndline,furtherlines) = break (=='\n') remtxt
isLevel1Line = not (null sndline) && all (=='=') sndline
isLevel2Line = not (null sndline) && all (=='-') sndline
nitemlen = isNumberedItemLine fstline
uitemlen = isUnorderedItemLine fstline
dropFirst :: [a] -> [a]
dropFirst s = if null s then [] else tail s
-- translate a header line
tryMDHeader :: String -> String -> [SourceMDElem]
tryMDHeader s rtxt =
let (sharps,htxt) = break (==' ') s
level = length sharps
in if null htxt || level>6
then markdownPar s rtxt
else SMDHeader level (dropFirst htxt) : markdownText rtxt
-- is a line a horizontal rule:
isHRule :: String -> Bool
isHRule l =
(all (\c -> isSpace c || c=='-') l && length (filter (=='-') l) > 3) ||
(all (\c -> isSpace c || c=='*') l && length (filter (=='*') l) > 3)
-- check whether a line starts with an unordered item indicator ("* ")
-- and return indent:
isUnorderedItemLine :: String -> Int
isUnorderedItemLine s =
let (blanks,nonblanks) = span (==' ') s
in if take 2 nonblanks `elem` ["* ","- ","+ "] then length blanks+2 else 0
-- check whether a line starts with an indented number and return indent value:
isNumberedItemLine :: String -> Int
isNumberedItemLine s =
let (blanks,nonblanks) = span (==' ') s
numblanks = length blanks
in checkNumber numblanks nonblanks
where
checkNumber indt numtxt =
let (ns,brt) = break (==' ') numtxt
(blanks,rtxt) = break (/=' ') brt
nsl = length ns
in if nsl>0 && all isDigit (take (nsl-1) ns) && ns!!(nsl-1)=='.' &&
not (null blanks) && not (null rtxt)
then indt+nsl+length blanks
else 0
-- Check whether a line starts with at least four blanks and
-- return indent value:
isCodeLine :: String -> Int
isCodeLine s =
let (blanks,nonblanks) = span (==' ') s
numblanks = length blanks
in if not (null nonblanks) && numblanks >= 4 then numblanks else 0
-- parse a paragraph (where the initial part of the paragraph is given
-- as the first argument):
markdownPar :: String -> String -> [SourceMDElem]
markdownPar ptxt txt
| null txt || head txt `elem` ['\n'] ||
uitemlen>0 || nitemlen>0
= SMDPar (groupMarkDownElems (outsideMarkdownElem "" ptxt)) : markdownText txt
| null remtxt
= [SMDPar (groupMarkDownElems (outsideMarkdownElem "" (ptxt++'\n':fstline)))]
| otherwise = markdownPar (ptxt++'\n':fstline) (tail remtxt)
where
(fstline,remtxt) = break (=='\n') txt
nitemlen = isNumberedItemLine fstline
uitemlen = isUnorderedItemLine fstline
-- parse a quoted section:
markdownQuote :: String -> String -> [SourceMDElem]
markdownQuote qtxt alltxt =
let txt = if take 2 alltxt == ">\n" -- allow empty quote lines
then "> " ++ drop 1 alltxt
else alltxt
in if take 2 txt == "> "
then let (fstline,remtxt) = break (=='\n') (drop 2 txt)
in if null remtxt
then [SMDQuote (fromMarkdownText (qtxt++'\n':fstline))]
else markdownQuote (qtxt++'\n':fstline) (tail remtxt)
else SMDQuote (fromMarkdownText qtxt) : markdownText txt
-- parse a program block (where the indent and the initial code block is given):
markdownCodeBlock :: Int -> String -> String -> [SourceMDElem]
markdownCodeBlock n ctxt txt =
if all (==' ') (take n txt)
then let (fstline,remtxt) = break (=='\n') (drop n txt)
in if null remtxt
then [SMDCodeBlock (ctxt ++ '\n' : fstline)]
else markdownCodeBlock n (ctxt ++ '\n' : fstline)
(tail remtxt)
else SMDCodeBlock ctxt : markdownText txt
-- parse a markdown list item:
markdownItem :: (String -> SourceMDElem) -> Int -> String -> String
-> [SourceMDElem]
markdownItem icons n itxt txt =
if take n txt == take n (repeat ' ')
then let (fstline,remtxt) = break (=='\n') (drop n txt)
in if null remtxt
then [icons (itxt++'\n':fstline)]
else markdownItem icons n (itxt++'\n':fstline) (tail remtxt)
else let (fstline,remtxt) = break (=='\n') txt
in if all isSpace fstline
then if null remtxt
then [icons itxt]
else markdownItem icons n (itxt++"\n") (tail remtxt)
else icons itxt : markdownText txt
--- Remove the backlash of escaped markdown characters in a string.
removeEscapes :: String -> String
removeEscapes s = case s of
[] -> []
('\\':c:cs) -> if c `elem` markdownEscapeChars
then c : removeEscapes cs
else '\\' : removeEscapes (c:cs)
(c:cs) -> c : removeEscapes cs
--- Escape characters supported by markdown.
markdownEscapeChars :: [Char]
markdownEscapeChars =
['\\','`','*','_','{','}','[',']','(',')','<','>','#','+','-','.',' ','!']
-- Analyze markdown text outside an element like emphasis, code, strong:
outsideMarkdownElem :: String -> String -> [SourceMDElem]
outsideMarkdownElem txt s = case s of
[] -> addPrevious txt []
('\\':c:cs) -> if c `elem` markdownEscapeChars
then outsideMarkdownElem (c:'\\':txt) cs
else outsideMarkdownElem ('\\':txt) (c:cs)
('*':'*':cs) -> addPrevious txt $ insideMarkdownElem "**" [] cs
('_':'_':cs) -> addPrevious txt $ insideMarkdownElem "__" [] cs
('*':cs) -> addPrevious txt $ insideMarkdownElem "*" [] cs
('_':cs) -> addPrevious txt $ insideMarkdownElem "_" [] cs
('`':cs) -> let (ticks, cs') = span (=='`') cs in
addPrevious txt $ insideMarkdownElem
(replicate (length ticks + 1) '`') [] cs'
('[':cs) -> addPrevious txt $ tryParseLink cs
('<':cs) -> addPrevious txt $ markdownHRef cs
(c:cs) -> outsideMarkdownElem (c:txt) cs
addPrevious :: String -> [SourceMDElem] -> [SourceMDElem]
addPrevious ptxt xs = if null ptxt then xs else SMDText (reverse ptxt) : xs
-- Try to parse a link of the form [link test](url)
tryParseLink :: String -> [SourceMDElem]
tryParseLink txt = let (linktxt,rtxt) = break (==']') txt in
if null rtxt || null (tail rtxt) || (rtxt!!1 /= '(')
then outsideMarkdownElem "[" txt
else let (url,mtxt) = break (==')') (drop 2 rtxt)
in if null mtxt
then outsideMarkdownElem "[" txt
else SMDHRef linktxt url : outsideMarkdownElem "" (tail mtxt)
markdownHRef :: String -> [SourceMDElem]
markdownHRef txt = let (url,rtxt) = break (=='>') txt in
if null rtxt
then outsideMarkdownElem "<" txt
else SMDHRef url url : outsideMarkdownElem "" (dropFirst rtxt)
insideMarkdownElem :: String -> String -> String -> [SourceMDElem]
insideMarkdownElem marker etext s =
if marker `isPrefixOf` s
then text2MDElem marker (reverse etext)
: outsideMarkdownElem "" (drop (length marker) s)
else case s of
[] -> [SMDText (marker ++ reverse etext)] -- end marker missing
('\\':c:cs) -> if c `elem` markdownEscapeChars
then insideMarkdownElem marker (c:'\\':etext) cs
else insideMarkdownElem marker ('\\':etext) (c:cs)
(c:cs) -> insideMarkdownElem marker (c:etext) cs
text2MDElem :: String -> String -> SourceMDElem
text2MDElem marker txt = case marker of
"**" -> SMDStrong txt
"__" -> SMDStrong txt
"*" -> SMDEmph txt
"_" -> SMDEmph txt
_ | all (=='`') marker -> SMDCode txt
| otherwise -> error $ "Markdown.text2MDElem: unknown marker \"" ++
marker ++ "\""
-----------------------------------------------------------------------
-- Translate markdown document to HTML.
mdDoc2html :: HTML h => MarkdownDoc -> [h]
mdDoc2html = map mdElem2html
-- translate markdown special characters in text to HTML
mdtxt2html :: HTML h => String -> h
mdtxt2html = htmlText . htmlQuote . removeEscapes
mdElem2html :: HTML h => MarkdownElem -> h
mdElem2html (Text s) = mdtxt2html s
mdElem2html (Emph s) = emphasize [mdtxt2html s]
mdElem2html (Strong s) = htmlStruct "strong" [] [mdtxt2html s]
mdElem2html (HRef s url) = if s==url
then href url [code [mdtxt2html s]]
else href url [mdtxt2html s]
mdElem2html (Code s) = code [htmlText (htmlQuote s)]
mdElem2html (CodeBlock s) = verbatim s
mdElem2html (Quote md) = htmlStruct "blockquote" [] (mdDoc2html md)
mdElem2html (Par md) = par (mdDoc2html md)
mdElem2html (UList mds) = ulist (map mdDoc2htmlWithoutPar mds)
mdElem2html (OList mds) = olist (map mdDoc2htmlWithoutPar mds)
mdElem2html HRule = hrule
mdElem2html (Header l s) = htmlStruct ('h':show l) [] [mdtxt2html s]
mdDoc2htmlWithoutPar :: HTML h => MarkdownDoc -> [h]
mdDoc2htmlWithoutPar mdoc = case mdoc of
[] -> []
[Par md] -> mdDoc2html md
[md] -> [mdElem2html md]
(Par md1 : md2 : mds) -> mdDoc2html md1 ++ breakline :
mdDoc2htmlWithoutPar (md2:mds)
(md1 : md2 : mds) -> mdElem2html md1 : mdDoc2htmlWithoutPar (md2:mds)
--- Translate a markdown text into a (partial) HTML document.
markdownText2HTML :: HTML h => String -> [h]
markdownText2HTML = mdDoc2html . fromMarkdownText
--- Translate a markdown text into a complete HTML text
--- that can be viewed as a standalone document by a browser.
--- The first argument is the title of the document.
markdownText2CompleteHTML :: String -> String -> String
markdownText2CompleteHTML title mdtxt =
showHtmlPage (page title (markdownText2HTML mdtxt))
-----------------------------------------------------------------------
--- Translate markdown document to a LaTeX string where the first
--- argument is a function to translate the basic text occurring
--- in markdown elements to a LaTeX string.
--- Note that the basic text (execept for code blocks)
--- contains escaped markdown characters
--- that needs also to be removed by the translation function.
mdDoc2latex :: (String->String) -> MarkdownDoc -> String
mdDoc2latex txt2latex = concatMap (mdElem2latex txt2latex)
mdElem2latex :: (String->String) -> MarkdownElem -> String
mdElem2latex txt2latex (Text s) = txt2latex s
mdElem2latex txt2latex (Emph s) = "\\emph{"++txt2latex s++"}"
mdElem2latex txt2latex (Strong s) = "\\textbf{"++txt2latex s++"}"
mdElem2latex txt2latex (HRef s url) =
if s==url then "\\url{"++url++"}"
else "\\href{"++url++"}{"++txt2latex s++"}"
mdElem2latex txt2latex (Code s) = "\\texttt{"++txt2latex (htmlQuote s)++"}"
mdElem2latex _ (CodeBlock s) =
"\\begin{verbatim}\n"++s++"\n\\end{verbatim}\n"
mdElem2latex txt2latex (Quote md) =
"\\begin{quote}\n"++mdDoc2latex txt2latex md++"\\end{quote}\n"
mdElem2latex txt2latex (Par md) = mdDoc2latex txt2latex md++"\n\n"
mdElem2latex txt2latex (UList s) =
"\\begin{itemize}"++
concatMap (\i -> "\n\\item\n"++mdDoc2latex txt2latex i) s ++
"\\end{itemize}\n"
mdElem2latex txt2latex (OList s) =
"\\begin{enumerate}"++
concatMap (\i -> "\n\\item\n"++mdDoc2latex txt2latex i) s ++
"\\end{enumerate}\n"
mdElem2latex _ HRule = "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n"
mdElem2latex txt2latex (Header l s) = case l of
1 -> "\\section{"++txt2latex s++"}\n\n"
2 -> "\\subsection{"++txt2latex s++"}\n\n"
3 -> "\\subsubsection{"++txt2latex s++"}\n\n"
4 -> "\\paragraph{"++txt2latex s++"}\n\n"
5 -> "\\textbf{"++txt2latex s++"}\n\n"
_ -> "\\emph{"++txt2latex s++"}\n\n"
--- Translator for basic text to LaTeX.
--- Markdown escapes are removed and translated to LaTeX.
text2latex :: String -> String
text2latex = showLatexExps . (\s -> [htxt s]) . removeEscapes
--- Translate a markdown text into a (partial) LaTeX document.
--- All characters with a special meaning in LaTeX, like dollar
--- or ampersand signs, are quoted.
markdownText2LaTeX :: String -> String
markdownText2LaTeX = mdDoc2latex text2latex . fromMarkdownText
--- Translate a markdown text into a (partial) LaTeX document
--- where the first argument is a function to translate the basic text
--- occurring in markdown elements to a LaTeX string.
--- For instance, one can use a translation operation
--- that supports passing mathematical formulas in LaTeX style
--- instead of quoting all special characters.
markdownText2LaTeXWithFormat :: (String->String) -> String -> String
markdownText2LaTeXWithFormat txt2latex = mdDoc2latex txt2latex . fromMarkdownText
--- Translate a markdown text into a complete LaTeX document
--- that can be formatted as a standalone document.
markdownText2CompleteLaTeX :: String -> String
markdownText2CompleteLaTeX mds =
latexHeader ++ mdDoc2latex text2latex (fromMarkdownText mds) ++
"\\end{document}\n"
latexHeader :: String
latexHeader = unwords
[ "\\documentclass{article}"
, "\\usepackage[utf8x]{inputenc}"
, "\\usepackage{url}"
, "\\usepackage[breaklinks=true,unicode=true]{hyperref}"
, "\\setlength{\\parindent}{0pt}"
, "\\setlength{\\parskip}{6pt plus 2pt minus 1pt}"
, "\\setcounter{secnumdepth}{0}"
, "\\begin{document}"
]
--- Format the standard input (containing markdown text) as PDF.
formatMarkdownInputAsPDF :: String -> IO ()
formatMarkdownInputAsPDF outfile = getContents >>= formatMarkdownAsPDF outfile
--- Format a file containing markdown text as PDF.
formatMarkdownFileAsPDF :: String -> String -> IO ()
formatMarkdownFileAsPDF infile outfile =
readFile infile >>= formatMarkdownAsPDF outfile
--- Format a file containing markdown text as PDF.
formatMarkdownAsPDF :: String -> String -> IO ()
formatMarkdownAsPDF outfile mdstr = do
pid <- getPID
let tmp = "tmp_" ++ show pid
writeFile (tmp ++ ".tex") (markdownText2CompleteLaTeX mdstr)
pdflatexFile tmp outfile
-- Format a file tmp.tex with pdflatex and show the result
pdflatexFile :: String -> String -> IO ()
pdflatexFile tmp outfile = do
system $ "pdflatex \'\\nonstopmode\\input{" ++ tmp ++ ".tex}\'"
system $ unwords ["/bin/rm -f", tmp ++ ".tex", tmp ++ ".aux", tmp ++ ".log",
tmp ++ ".out"]
if null outfile
then do system $ "evince " ++ tmppdf
system $ "/bin/rm -f " ++ tmppdf
else system $ "/bin/mv -f " ++ tmppdf ++ " " ++ outfile
return ()
where
tmppdf = tmp ++ ".pdf"
-----------------------------------------------------------------------
|