CurryInfo: markdown-3.3.0 / Text.Markdown

classes:
documentation:	------------------------------------------------------------------------------ --- Library to translate --- [markdown documents](http://en.wikipedia.org/wiki/Markdown) --- into HTML or LaTeX. --- The slightly restricted subset of the markdown syntax recognized by --- this implementation is --- [documented in this page](https://github.com/curry-packages/markdown/blob/main/docs/markdown-syntax.md). --- --- @author Michael Hanus --- @version November 2023 ------------------------------------------------------------------------------
name:	Text.Markdown
operations:	formatMarkdownFileAsPDF formatMarkdownInputAsPDF fromMarkdownText markdownEscapeChars markdownText2CompleteHTML markdownText2CompleteLaTeX markdownText2HTML markdownText2LaTeX markdownText2LaTeXWithFormat removeEscapes
sourcecode:	module Text.Markdown ( MarkdownDoc, MarkdownElem(..), fromMarkdownText , removeEscapes, markdownEscapeChars , markdownText2HTML, markdownText2CompleteHTML , markdownText2LaTeX, markdownText2LaTeXWithFormat , markdownText2CompleteLaTeX , formatMarkdownFileAsPDF, formatMarkdownInputAsPDF ) where import Data.Char import System.IO ( getContents ) import Data.List import System.Process import HTML.Base import HTML.LaTeX ----------------------------------------------------------------------- --- A markdown document is a list of markdown elements. type MarkdownDoc = [MarkdownElem] --- The data type for representing the different elements --- occurring in a markdown document. --- @cons Text s - a simple text in a markdown document --- @cons Emph s - an emphasized text in a markdown document --- @cons Strong s - a strongly emphaszed text in a markdown document --- @cons Strong s - a code string in a markdown document --- @cons Code s - a code string in a markdown document --- @cons HRef s u - a reference to URL `u` with text `s` in a markdown document --- @cons Par md - a paragraph in a markdown document --- @cons CodeBlock s - a code block in a markdown document --- @cons UList mds - an unordered list in a markdown document --- @cons OList mds - an ordered list in a markdown document --- @cons Quote md - a quoted paragraph in a markdown document --- @cons HRule - a hoirzontal rule in a markdown document --- @cons Header l s - a level `l` header with title `s` --- in a markdown document data MarkdownElem = Text String \| Emph String \| Strong String \| Code String \| HRef String String \| Par MarkdownDoc \| CodeBlock String \| UList [MarkdownDoc] \| OList [MarkdownDoc] \| Quote MarkdownDoc \| HRule \| Header Int String ----------------------------------------------------------------------- --- The data type for representing the different elements --- of a source markdown document. Basically, it is the same --- as the final markdown document execept for single list items --- that occur in the source but are combined into a list in --- the final document. data SourceMDElem = SMDText String \| SMDEmph String \| SMDStrong String \| SMDCode String \| SMDHRef String String \| SMDPar MarkdownDoc \| SMDCodeBlock String \| SMDUItem String \| SMDOItem String \| SMDQuote MarkdownDoc \| SMDHRule \| SMDHeader Int String isSMDUItem :: SourceMDElem -> Bool isSMDUItem md = case md of SMDUItem _ -> True _ -> False isSMDOItem :: SourceMDElem -> Bool isSMDOItem md = case md of SMDOItem _ -> True _ -> False textOfItem :: SourceMDElem -> String textOfItem md = case md of SMDUItem txt -> txt SMDOItem txt -> txt _ -> "" ----------------------------------------------------------------------- --- Parse markdown document from its textual representation. fromMarkdownText :: String -> MarkdownDoc fromMarkdownText = groupMarkDownElems . markdownText -- Group adjacent item elements together in a markdown list. groupMarkDownElems :: [SourceMDElem] -> MarkdownDoc groupMarkDownElems [] = [] groupMarkDownElems (SMDUItem itxt :mds) = joinItems UList isSMDUItem [itxt] mds groupMarkDownElems (SMDOItem itxt :mds) = joinItems OList isSMDOItem [itxt] mds groupMarkDownElems (SMDText s : mds) = Text s : groupMarkDownElems mds groupMarkDownElems (SMDEmph s : mds) = Emph s : groupMarkDownElems mds groupMarkDownElems (SMDStrong s : mds) = Strong s : groupMarkDownElems mds groupMarkDownElems (SMDCode s : mds) = Code s : groupMarkDownElems mds groupMarkDownElems (SMDHRef s u : mds) = HRef s u : groupMarkDownElems mds groupMarkDownElems (SMDPar md : mds) = Par md : groupMarkDownElems mds groupMarkDownElems (SMDCodeBlock s : mds) = CodeBlock s : groupMarkDownElems mds groupMarkDownElems (SMDQuote md : mds) = Quote md : groupMarkDownElems mds groupMarkDownElems (SMDHRule : mds) = HRule : groupMarkDownElems mds groupMarkDownElems (SMDHeader l s : mds) = Header l s : groupMarkDownElems mds joinItems :: ([[MarkdownElem]] -> MarkdownElem) -> (SourceMDElem -> Bool) -> [String] -> [SourceMDElem] -> [MarkdownElem] joinItems mdlcons _ items [] = [mdlcons (reverse (map fromMarkdownText items))] joinItems mdlcons isitem items (md:mds) = if isitem md then joinItems mdlcons isitem (textOfItem md : items) mds else mdlcons (reverse (map fromMarkdownText items)) : groupMarkDownElems (md:mds) -- Basic reader for a markdown text. markdownText :: String -> [SourceMDElem] markdownText [] = [] markdownText txt@(_:_) = markdownLine fstline (dropFirst remtxt) where (fstline,remtxt) = break (=='\n') txt -- Analyze the first line of a markdown text: markdownLine :: String -> String -> [SourceMDElem] markdownLine fstline remtxt \| all isSpace fstline = markdownText remtxt \| blanklen > 0 -- four space indent for code = markdownCodeBlock blanklen (drop blanklen fstline) remtxt \| isLevel1Line = SMDHeader 1 fstline : markdownText (dropFirst furtherlines) \| isLevel2Line = SMDHeader 2 fstline : markdownText (dropFirst furtherlines) \| take 1 fstline == "#" = tryMDHeader fstline remtxt \| isHRule fstline = SMDHRule : markdownText remtxt \| take 2 fstline == "> " -- start of a quoted text = markdownQuote (drop 2 fstline) remtxt \| uitemlen > 0 -- start of an unordered item = markdownItem SMDUItem uitemlen (drop uitemlen fstline) remtxt \| nitemlen > 0 -- start of a numbered item = markdownItem SMDOItem nitemlen (drop nitemlen fstline) remtxt \| otherwise = markdownPar fstline remtxt where blanklen = isCodeLine fstline (sndline,furtherlines) = break (=='\n') remtxt isLevel1Line = not (null sndline) && all (=='=') sndline isLevel2Line = not (null sndline) && all (=='-') sndline nitemlen = isNumberedItemLine fstline uitemlen = isUnorderedItemLine fstline dropFirst :: [a] -> [a] dropFirst s = if null s then [] else tail s -- translate a header line tryMDHeader :: String -> String -> [SourceMDElem] tryMDHeader s rtxt = let (sharps,htxt) = break (==' ') s level = length sharps in if null htxt \|\| level>6 then markdownPar s rtxt else SMDHeader level (dropFirst htxt) : markdownText rtxt -- is a line a horizontal rule: isHRule :: String -> Bool isHRule l = (all (\c -> isSpace c \|\| c=='-') l && length (filter (=='-') l) > 3) \|\| (all (\c -> isSpace c \|\| c=='') l && length (filter (=='') l) > 3) -- check whether a line starts with an unordered item indicator ("* ") -- and return indent: isUnorderedItemLine :: String -> Int isUnorderedItemLine s = let (blanks,nonblanks) = span (==' ') s in if take 2 nonblanks `elem` ["* ","- ","+ "] then length blanks+2 else 0 -- check whether a line starts with an indented number and return indent value: isNumberedItemLine :: String -> Int isNumberedItemLine s = let (blanks,nonblanks) = span (==' ') s numblanks = length blanks in checkNumber numblanks nonblanks where checkNumber indt numtxt = let (ns,brt) = break (==' ') numtxt (blanks,rtxt) = break (/=' ') brt nsl = length ns in if nsl>0 && all isDigit (take (nsl-1) ns) && ns!!(nsl-1)=='.' && not (null blanks) && not (null rtxt) then indt+nsl+length blanks else 0 -- Check whether a line starts with at least four blanks and -- return indent value: isCodeLine :: String -> Int isCodeLine s = let (blanks,nonblanks) = span (==' ') s numblanks = length blanks in if not (null nonblanks) && numblanks >= 4 then numblanks else 0 -- parse a paragraph (where the initial part of the paragraph is given -- as the first argument): markdownPar :: String -> String -> [SourceMDElem] markdownPar ptxt txt \| null txt \|\| head txt `elem` ['\n'] \|\| uitemlen>0 \|\| nitemlen>0 = SMDPar (groupMarkDownElems (outsideMarkdownElem "" ptxt)) : markdownText txt \| null remtxt = [SMDPar (groupMarkDownElems (outsideMarkdownElem "" (ptxt++'\n':fstline)))] \| otherwise = markdownPar (ptxt++'\n':fstline) (tail remtxt) where (fstline,remtxt) = break (=='\n') txt nitemlen = isNumberedItemLine fstline uitemlen = isUnorderedItemLine fstline -- parse a quoted section: markdownQuote :: String -> String -> [SourceMDElem] markdownQuote qtxt alltxt = let txt = if take 2 alltxt == ">\n" -- allow empty quote lines then "> " ++ drop 1 alltxt else alltxt in if take 2 txt == "> " then let (fstline,remtxt) = break (=='\n') (drop 2 txt) in if null remtxt then [SMDQuote (fromMarkdownText (qtxt++'\n':fstline))] else markdownQuote (qtxt++'\n':fstline) (tail remtxt) else SMDQuote (fromMarkdownText qtxt) : markdownText txt -- parse a program block (where the indent and the initial code block is given): markdownCodeBlock :: Int -> String -> String -> [SourceMDElem] markdownCodeBlock n ctxt txt = if all (==' ') (take n txt) then let (fstline,remtxt) = break (=='\n') (drop n txt) in if null remtxt then [SMDCodeBlock (ctxt ++ '\n' : fstline)] else markdownCodeBlock n (ctxt ++ '\n' : fstline) (tail remtxt) else SMDCodeBlock ctxt : markdownText txt -- parse a markdown list item: markdownItem :: (String -> SourceMDElem) -> Int -> String -> String -> [SourceMDElem] markdownItem icons n itxt txt = if take n txt == take n (repeat ' ') then let (fstline,remtxt) = break (=='\n') (drop n txt) in if null remtxt then [icons (itxt++'\n':fstline)] else markdownItem icons n (itxt++'\n':fstline) (tail remtxt) else let (fstline,remtxt) = break (=='\n') txt in if all isSpace fstline then if null remtxt then [icons itxt] else markdownItem icons n (itxt++"\n") (tail remtxt) else icons itxt : markdownText txt --- Remove the backlash of escaped markdown characters in a string. removeEscapes :: String -> String removeEscapes s = case s of [] -> [] ('\\':c:cs) -> if c `elem` markdownEscapeChars then c : removeEscapes cs else '\\' : removeEscapes (c:cs) (c:cs) -> c : removeEscapes cs --- Escape characters supported by markdown. markdownEscapeChars :: [Char] markdownEscapeChars = ['\\','`','','_','{','}','[',']','(',')','<','>','#','+','-','.',' ','!'] -- Analyze markdown text outside an element like emphasis, code, strong: outsideMarkdownElem :: String -> String -> [SourceMDElem] outsideMarkdownElem txt s = case s of [] -> addPrevious txt [] ('\\':c:cs) -> if c `elem` markdownEscapeChars then outsideMarkdownElem (c:'\\':txt) cs else outsideMarkdownElem ('\\':txt) (c:cs) ('':'':cs) -> addPrevious txt $ insideMarkdownElem "" [] cs ('_':'_':cs) -> addPrevious txt $ insideMarkdownElem "__" [] cs ('':cs) -> addPrevious txt $ insideMarkdownElem "" [] cs ('_':cs) -> addPrevious txt $ insideMarkdownElem "_" [] cs ('`':cs) -> let (ticks, cs') = span (=='`') cs in addPrevious txt $ insideMarkdownElem (replicate (length ticks + 1) '`') [] cs' ('[':cs) -> addPrevious txt $ tryParseLink cs ('<':cs) -> addPrevious txt $ markdownHRef cs (c:cs) -> outsideMarkdownElem (c:txt) cs addPrevious :: String -> [SourceMDElem] -> [SourceMDElem] addPrevious ptxt xs = if null ptxt then xs else SMDText (reverse ptxt) : xs -- Try to parse a link of the form [link test](url) tryParseLink :: String -> [SourceMDElem] tryParseLink txt = let (linktxt,rtxt) = break (==']') txt in if null rtxt \|\| null (tail rtxt) \|\| (rtxt!!1 /= '(') then outsideMarkdownElem "[" txt else let (url,mtxt) = break (==')') (drop 2 rtxt) in if null mtxt then outsideMarkdownElem "[" txt else SMDHRef linktxt url : outsideMarkdownElem "" (tail mtxt) markdownHRef :: String -> [SourceMDElem] markdownHRef txt = let (url,rtxt) = break (=='>') txt in if null rtxt then outsideMarkdownElem "<" txt else SMDHRef url url : outsideMarkdownElem "" (dropFirst rtxt) insideMarkdownElem :: String -> String -> String -> [SourceMDElem] insideMarkdownElem marker etext s = if marker `isPrefixOf` s then text2MDElem marker (reverse etext) : outsideMarkdownElem "" (drop (length marker) s) else case s of [] -> [SMDText (marker ++ reverse etext)] -- end marker missing ('\\':c:cs) -> if c `elem` markdownEscapeChars then insideMarkdownElem marker (c:'\\':etext) cs else insideMarkdownElem marker ('\\':etext) (c:cs) (c:cs) -> insideMarkdownElem marker (c:etext) cs text2MDElem :: String -> String -> SourceMDElem text2MDElem marker txt = case marker of "" -> SMDStrong txt "__" -> SMDStrong txt "" -> SMDEmph txt "_" -> SMDEmph txt _ \| all (=='`') marker -> SMDCode txt \| otherwise -> error $ "Markdown.text2MDElem: unknown marker \"" ++ marker ++ "\"" ----------------------------------------------------------------------- -- Translate markdown document to HTML. mdDoc2html :: HTML h => MarkdownDoc -> [h] mdDoc2html = map mdElem2html -- translate markdown special characters in text to HTML mdtxt2html :: HTML h => String -> h mdtxt2html = htmlText . htmlQuote . removeEscapes mdElem2html :: HTML h => MarkdownElem -> h mdElem2html (Text s) = mdtxt2html s mdElem2html (Emph s) = emphasize [mdtxt2html s] mdElem2html (Strong s) = htmlStruct "strong" [] [mdtxt2html s] mdElem2html (HRef s url) = if s==url then href url [code [mdtxt2html s]] else href url [mdtxt2html s] mdElem2html (Code s) = code [htmlText (htmlQuote s)] mdElem2html (CodeBlock s) = verbatim s mdElem2html (Quote md) = htmlStruct "blockquote" [] (mdDoc2html md) mdElem2html (Par md) = par (mdDoc2html md) mdElem2html (UList mds) = ulist (map mdDoc2htmlWithoutPar mds) mdElem2html (OList mds) = olist (map mdDoc2htmlWithoutPar mds) mdElem2html HRule = hrule mdElem2html (Header l s) = htmlStruct ('h':show l) [] [mdtxt2html s] mdDoc2htmlWithoutPar :: HTML h => MarkdownDoc -> [h] mdDoc2htmlWithoutPar mdoc = case mdoc of [] -> [] [Par md] -> mdDoc2html md [md] -> [mdElem2html md] (Par md1 : md2 : mds) -> mdDoc2html md1 ++ breakline : mdDoc2htmlWithoutPar (md2:mds) (md1 : md2 : mds) -> mdElem2html md1 : mdDoc2htmlWithoutPar (md2:mds) --- Translate a markdown text into a (partial) HTML document. markdownText2HTML :: HTML h => String -> [h] markdownText2HTML = mdDoc2html . fromMarkdownText --- Translate a markdown text into a complete HTML text --- that can be viewed as a standalone document by a browser. --- The first argument is the title of the document. markdownText2CompleteHTML :: String -> String -> String markdownText2CompleteHTML title mdtxt = showHtmlPage (page title (markdownText2HTML mdtxt)) ----------------------------------------------------------------------- --- Translate markdown document to a LaTeX string where the first --- argument is a function to translate the basic text occurring --- in markdown elements to a LaTeX string. --- Note that the basic text (execept for code blocks) --- contains escaped markdown characters --- that needs also to be removed by the translation function. mdDoc2latex :: (String->String) -> MarkdownDoc -> String mdDoc2latex txt2latex = concatMap (mdElem2latex txt2latex) mdElem2latex :: (String->String) -> MarkdownElem -> String mdElem2latex txt2latex (Text s) = txt2latex s mdElem2latex txt2latex (Emph s) = "\\emph{"++txt2latex s++"}" mdElem2latex txt2latex (Strong s) = "\\textbf{"++txt2latex s++"}" mdElem2latex txt2latex (HRef s url) = if s==url then "\\url{"++url++"}" else "\\href{"++url++"}{"++txt2latex s++"}" mdElem2latex txt2latex (Code s) = "\\texttt{"++txt2latex (htmlQuote s)++"}" mdElem2latex _ (CodeBlock s) = "\\begin{verbatim}\n"++s++"\n\\end{verbatim}\n" mdElem2latex txt2latex (Quote md) = "\\begin{quote}\n"++mdDoc2latex txt2latex md++"\\end{quote}\n" mdElem2latex txt2latex (Par md) = mdDoc2latex txt2latex md++"\n\n" mdElem2latex txt2latex (UList s) = "\\begin{itemize}"++ concatMap (\i -> "\n\\item\n"++mdDoc2latex txt2latex i) s ++ "\\end{itemize}\n" mdElem2latex txt2latex (OList s) = "\\begin{enumerate}"++ concatMap (\i -> "\n\\item\n"++mdDoc2latex txt2latex i) s ++ "\\end{enumerate}\n" mdElem2latex _ HRule = "\\begin{center}\\rule{3in}{0.4pt}\\end{center}\n\n" mdElem2latex txt2latex (Header l s) = case l of 1 -> "\\section{"++txt2latex s++"}\n\n" 2 -> "\\subsection{"++txt2latex s++"}\n\n" 3 -> "\\subsubsection{"++txt2latex s++"}\n\n" 4 -> "\\paragraph{"++txt2latex s++"}\n\n" 5 -> "\\textbf{"++txt2latex s++"}\n\n" _ -> "\\emph{"++txt2latex s++"}\n\n" --- Translator for basic text to LaTeX. --- Markdown escapes are removed and translated to LaTeX. text2latex :: String -> String text2latex = showLatexExps . (\s -> [htxt s]) . removeEscapes --- Translate a markdown text into a (partial) LaTeX document. --- All characters with a special meaning in LaTeX, like dollar --- or ampersand signs, are quoted. markdownText2LaTeX :: String -> String markdownText2LaTeX = mdDoc2latex text2latex . fromMarkdownText --- Translate a markdown text into a (partial) LaTeX document --- where the first argument is a function to translate the basic text --- occurring in markdown elements to a LaTeX string. --- For instance, one can use a translation operation --- that supports passing mathematical formulas in LaTeX style --- instead of quoting all special characters. markdownText2LaTeXWithFormat :: (String->String) -> String -> String markdownText2LaTeXWithFormat txt2latex = mdDoc2latex txt2latex . fromMarkdownText --- Translate a markdown text into a complete LaTeX document --- that can be formatted as a standalone document. markdownText2CompleteLaTeX :: String -> String markdownText2CompleteLaTeX mds = latexHeader ++ mdDoc2latex text2latex (fromMarkdownText mds) ++ "\\end{document}\n" latexHeader :: String latexHeader = unwords [ "\\documentclass{article}" , "\\usepackage[utf8x]{inputenc}" , "\\usepackage{url}" , "\\usepackage[breaklinks=true,unicode=true]{hyperref}" , "\\setlength{\\parindent}{0pt}" , "\\setlength{\\parskip}{6pt plus 2pt minus 1pt}" , "\\setcounter{secnumdepth}{0}" , "\\begin{document}" ] --- Format the standard input (containing markdown text) as PDF. formatMarkdownInputAsPDF :: String -> IO () formatMarkdownInputAsPDF outfile = getContents >>= formatMarkdownAsPDF outfile --- Format a file containing markdown text as PDF. formatMarkdownFileAsPDF :: String -> String -> IO () formatMarkdownFileAsPDF infile outfile = readFile infile >>= formatMarkdownAsPDF outfile --- Format a file containing markdown text as PDF. formatMarkdownAsPDF :: String -> String -> IO () formatMarkdownAsPDF outfile mdstr = do pid <- getPID let tmp = "tmp_" ++ show pid writeFile (tmp ++ ".tex") (markdownText2CompleteLaTeX mdstr) pdflatexFile tmp outfile -- Format a file tmp.tex with pdflatex and show the result pdflatexFile :: String -> String -> IO () pdflatexFile tmp outfile = do system $ "pdflatex \'\\nonstopmode\\input{" ++ tmp ++ ".tex}\'" system $ unwords ["/bin/rm -f", tmp ++ ".tex", tmp ++ ".aux", tmp ++ ".log", tmp ++ ".out"] if null outfile then do system $ "evince " ++ tmppdf system $ "/bin/rm -f " ++ tmppdf else system $ "/bin/mv -f " ++ tmppdf ++ " " ++ outfile return () where tmppdf = tmp ++ ".pdf" -----------------------------------------------------------------------
types:	MarkdownDoc MarkdownElem
unsafe:	safe