diff options
author | Daniel Gnoutcheff <gnoutchd@softwarefreedom.org> | 2021-09-16 16:48:03 -0400 |
---|---|---|
committer | Daniel Gnoutcheff <gnoutchd@softwarefreedom.org> | 2021-09-16 16:48:03 -0400 |
commit | 229141a7001de5ca110b79e0f2bc161ab14bfced (patch) | |
tree | a5659757f8572907c7b989a3d9e71ce4ecd1c6d2 | |
parent | 14b60616df95d76b91873a726861b58f3578182b (diff) |
Add tabFilter
The pandoc CLI does this by default, and for some reason it greatly changes
the parsing of tab-indented raw HTML.
-rw-r--r-- | app/Main.hs | 5 |
1 files changed, 3 insertions, 2 deletions
diff --git a/app/Main.hs b/app/Main.hs index 59a9a79..cb924e5 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -25,6 +25,7 @@ import Text.XML.HaXml.XmlContent (Document (..), fromXml) import Text.XML.HaXml.Escape (xmlUnEscape, stdXmlEscaper) import qualified Text.Pandoc as P import qualified Network.XmlRpc.Internals as XRI +import qualified Text.Pandoc.Shared as PS -- Modified version of XMLParse.document that doesn't wait for anything after -- the top-level element @@ -99,6 +100,6 @@ rpcHtmlize args = XRI.renderResponse . XRI.Return . XRI.ValueString . -- leaks memory like crazy. htmlize :: String -> String htmlize mdwn = either (error . show) T.unpack . P.runPure . - (P.writeHtml5String P.def =<<) . P.readMarkdown readOpts . T.pack . - filter (\c -> c `elem` "\t\n\r" || (c>=' ' && c/='\x7f')) $ mdwn + (P.writeHtml5String P.def =<<) . P.readMarkdown readOpts . PS.tabFilter 4 . + T.pack . filter (\c -> c `elem` "\t\n\r" || (c>=' ' && c/='\x7f')) $ mdwn where readOpts = P.def {P.readerExtensions = P.pandocExtensions} |