From 229141a7001de5ca110b79e0f2bc161ab14bfced Mon Sep 17 00:00:00 2001 From: Daniel Gnoutcheff Date: Thu, 16 Sep 2021 16:48:03 -0400 Subject: Add tabFilter The pandoc CLI does this by default, and for some reason it greatly changes the parsing of tab-indented raw HTML. --- app/Main.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 59a9a79..cb924e5 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -25,6 +25,7 @@ import Text.XML.HaXml.XmlContent (Document (..), fromXml) import Text.XML.HaXml.Escape (xmlUnEscape, stdXmlEscaper) import qualified Text.Pandoc as P import qualified Network.XmlRpc.Internals as XRI +import qualified Text.Pandoc.Shared as PS -- Modified version of XMLParse.document that doesn't wait for anything after -- the top-level element @@ -99,6 +100,6 @@ rpcHtmlize args = XRI.renderResponse . XRI.Return . XRI.ValueString . -- leaks memory like crazy. htmlize :: String -> String htmlize mdwn = either (error . show) T.unpack . P.runPure . - (P.writeHtml5String P.def =<<) . P.readMarkdown readOpts . T.pack . - filter (\c -> c `elem` "\t\n\r" || (c>=' ' && c/='\x7f')) $ mdwn + (P.writeHtml5String P.def =<<) . P.readMarkdown readOpts . PS.tabFilter 4 . + T.pack . filter (\c -> c `elem` "\t\n\r" || (c>=' ' && c/='\x7f')) $ mdwn where readOpts = P.def {P.readerExtensions = P.pandocExtensions} -- cgit v1.2.1