Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Megaparsec for parsing #3

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion BoarDocs.cabal
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ source-repository head

library
exposed-modules:
Chunks
BoarConfig
FParser
PandocUtils
other-modules:
Paths_BoarDocs
autogen-modules:
Expand All @@ -34,9 +36,12 @@ library
ghc-options: -Wall -Wcompat -Widentities -Wincomplete-record-updates -Wincomplete-uni-patterns -Wmissing-export-lists -Wmissing-home-modules -Wpartial-fields -Wredundant-constraints
build-depends:
base >=4.7 && <5
, containers >=0.6.5
, megaparsec >=9.5
, pandoc >=3.1
, pandoc-types >=1.23
, text
, transformers >=0.6.1
default-language: Haskell2010

executable BoarDocs-exe
Expand All @@ -51,9 +56,12 @@ executable BoarDocs-exe
build-depends:
BoarDocs
, base >=4.7 && <5
, containers >=0.6.5
, megaparsec >=9.5
, pandoc >=3.1
, pandoc-types >=1.23
, text
, transformers >=0.6.1
default-language: Haskell2010

test-suite BoarDocs-test
Expand All @@ -69,8 +77,11 @@ test-suite BoarDocs-test
build-depends:
BoarDocs
, base >=4.7 && <5
, containers >=0.6.5
, megaparsec >=9.5
, pandoc >=3.1
, pandoc-types >=1.23
, text
, transformers >=0.6.1
, HUnit >=1.6
default-language: Haskell2010
80 changes: 10 additions & 70 deletions app/Main.hs
Original file line number Diff line number Diff line change
@@ -1,73 +1,12 @@
{-# LANGUAGE OverloadedStrings #-}

module Main (main) where

import Control.Monad (void, foldM)
import Data.Maybe (listToMaybe, fromJust)
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import System.Environment (getArgs)
import Text.Pandoc.Class (PandocIO, PandocMonad, runIOorExplode)
import Text.Pandoc.Definition (Pandoc)
import Text.Pandoc.Options (ReaderOptions, def)
import Text.Pandoc.Readers (Reader (TextReader), readers)

import Chunks (CodeType (..), Chunk, separateText)

type TxtReader = ReaderOptions -> Text -> PandocIO Pandoc

-- Gets the reader associated with a name (Ex: "markdown")
getPandocTxtReader :: Text -> Maybe TxtReader
getPandocTxtReader k =
case listToMaybe $ fmap snd $ filter ((== k) . fst) readers of
Just (TextReader r) -> Just r
_ -> Nothing


-- MovingConfig is called this way because it keeps moving along the code. As
-- the Chunks are parsed this config gets recreated (when Cfg is found)
data MovingConfig = MovingConfig { markup :: Maybe (Text, TxtReader) }

defaultMovingConfig :: MovingConfig
defaultMovingConfig = MovingConfig { markup = Nothing }

data Accumulator = Accumulator { movingCfg :: MovingConfig
, pChunks :: [Pandoc]
}
defaultAccumulator :: Accumulator
defaultAccumulator = Accumulator { movingCfg = defaultMovingConfig
, pChunks = []
}


setMarkupPrefix :: Text
setMarkupPrefix = "boar set markup "

parseChunks :: [Chunk] -> IO Accumulator
parseChunks = foldM parseChunk defaultAccumulator

parseChunk :: Accumulator -> Chunk -> IO Accumulator
parseChunk acc (Cfg, s) =
return acc { movingCfg = foldl parseCfgLine (movingCfg acc) $ T.lines s }
parseChunk acc (Lit, s) = do
pc <- executeConfig (movingCfg acc) s
return acc { pChunks = (pChunks acc) ++ [pc] }
parseChunk acc (Code, _) = return acc -- TODO

parseCfgLine :: MovingConfig -> Text -> MovingConfig
parseCfgLine cfg ln
| setMarkupPrefix `T.isPrefixOf` ln =
let name = T.drop (T.length setMarkupPrefix) ln
in case getPandocTxtReader name of
Just reader -> cfg { markup = Just (name, reader) }
Nothing -> error $ "Unknown markup " ++ (T.unpack name)
| otherwise = error $ "unknown cfg " ++ (T.unpack ln)

executeConfig :: MovingConfig -> Text -> IO Pandoc
executeConfig cfg t = runIOorExplode $ (snd $ fromJust $ markup cfg) def t

import Text.Megaparsec (runParserT, errorBundlePretty)
import Control.Monad.Trans.State (runState)

import FParser (FState (..), defaultFState, parseFile)

singleArgument :: [String] -> String
singleArgument [arg] = arg
Expand All @@ -77,13 +16,14 @@ main :: IO ()
main = do
arg <- fmap singleArgument getArgs
text <- TIO.readFile arg
let chunks = separateText $ text
void $ sequence $ fmap (\(x, y) -> print x >> TIO.putStrLn y) chunks

parsedChunks <- parseChunks chunks
print $ fmap fst $ markup $ movingCfg parsedChunks
print $ pChunks parsedChunks
print text
let (r, s) = runState (runParserT parseFile arg text) defaultFState
case r of
Right x -> print x
Left e -> putStrLn $ errorBundlePretty e

print s

TIO.putStrLn $ T.unlines $ literature s


3 changes: 3 additions & 0 deletions package.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@ dependencies:
- text
- pandoc >= 3.1
- pandoc-types >= 1.23
- megaparsec >= 9.5
- transformers >= 0.6.1
- containers >= 0.6.5

ghc-options:
- -Wall
Expand Down
14 changes: 14 additions & 0 deletions src/BoarConfig.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
module BoarConfig ( BoarConfig (..)
, defaultBoarConfig
) where

import Data.Text (Text)

import PandocUtils (TxtReader)

data BoarConfig = BoarConfig { markup :: Maybe (Text, TxtReader)
} deriving Show
defaultBoarConfig :: BoarConfig
defaultBoarConfig = BoarConfig { markup = Nothing
}

66 changes: 0 additions & 66 deletions src/Chunks.hs

This file was deleted.

140 changes: 140 additions & 0 deletions src/FParser.hs
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
{-# LANGUAGE OverloadedStrings #-}

module FParser ( FState (..)
, defaultFState
, FParser
, parseFile
) where

import Control.Monad (void)
import Control.Monad.Trans.Class (lift)
import Control.Monad.Trans.State (State, modify, get)
import qualified Data.List.NonEmpty as NE
import qualified Data.Set as Set
import Data.Text (Text)
import qualified Data.Text as T
import Data.Void (Void)
import Text.Megaparsec hiding (State)
import Text.Megaparsec.Char

import BoarConfig (BoarConfig (..), defaultBoarConfig)
import PandocUtils (getPandocTxtReader)

-- The state the parser maintains and mutates while parsing
data FState = FState { bCfg :: BoarConfig
, codeBlocks :: [Text]
, literature :: [Text]
} deriving Show
defaultFState :: FState
defaultFState = FState { bCfg = defaultBoarConfig
, codeBlocks = []
, literature = []
}

-- Parser for the files, maintains a state inside
type FParser = ParsecT Void Text (State FState)

-- Parses the input file
parseFile :: FParser ()
parseFile = void $ many $ try parseLine -- w/o try it would complain about the
-- last line being empty

-- A single line, either Literature (markup), Config (metadata) or Code (the
-- code itself)
parseLine :: FParser ()
parseLine = do
code <- many parseCodeLine
case code of
[] -> return ()
_ -> lift
$ addCodeBlock
$ T.dropAround (`elem` ("\r\n" :: String)) $ T.unlines code
ln <- parseNonCodeLine
void eol
case ln of
Nothing -> return ()
Just l -> lift $ modify (\s -> s { literature = literature s ++ [l] })

addCodeBlock :: Text -> State FState ()
addCodeBlock code = do
index <- fmap (length . codeBlocks) get
modify (\s -> s { codeBlocks = codeBlocks s ++ [code]
, literature = literature s ++ [boarDocsStr index]
})

boarDocsStr :: Int -> Text
boarDocsStr index = T.pack $ "\nBOARDOCSCODEBLOCK" ++ show index ++ "\n"

-- Parses a line of code, failing if it's not a line of code
parseCodeLine :: FParser Text
parseCodeLine = do
notFollowedBy parseNonCodeLine
t <- tillLineEnd
void eol
return t

-- Parses a line of literature or config
parseNonCodeLine :: FParser (Maybe Text)
parseNonCodeLine = try $ do -- w/o try it would fail consuming whitespace
void space
(fmap Just parseLiterature) <|> (Nothing <$ parseConfig)

-- Parses a line of literature/markup
parseLiterature :: FParser Text
parseLiterature = do
_ <- string "////"
tillLineEnd


-- Parses a config line ("///!")
parseConfig :: FParser ()
parseConfig = do
_ <- string "///!"
-- tries to parse a markup definition or throws an error while skipping the
-- line
parseSetMarkup <|> tillLineEndFail "valid config"

-- Modifies the configuration, setting the markup
-- In case of invalid markup, it reports an error but doesn't fail
parseSetMarkup :: FParser ()
parseSetMarkup = do
-- default prefix for markup definition
_ <- string "boar set markup "
-- no trimming is being done
m <- tillLineEnd
case getPandocTxtReader m of
Just r -> do
-- modify the configuration, setting the new markup
lift $ modify (\s -> s { bCfg = (bCfg s) { markup = Just (m, r) } })
Nothing -> do
-- fails if the markup wasn't found
betterFailureReg (T.unpack m) "valid markup"


-- Returns the rest of the line as a string, but not consuming the eol
-- Usually you will prefer to use tillLineEnd >>= eol
tillLineEnd :: FParser Text
tillLineEnd = do
s <- takeWhileP Nothing (\x -> not $ x `elem` ("\r\n" :: String))
return s

-- Fails showing the rest of the line as unexpected input and the String
-- argument as the label of the expected one
tillLineEndFail :: String -> FParser ()
tillLineEndFail s = do
rest <- tillLineEnd
betterFailureReg (T.unpack rest) s

-- Wrapper to make registerFailure easier to use, by restricting the types
-- The first string is the unexpected input, second is a label describing what
-- was expected
-- Any of them can be an empty string
betterFailureReg :: String -> String -> FParser ()
betterFailureReg u e = registerFailure un ex
where
un = fmap Tokens $ NE.nonEmpty u
ex = case NE.nonEmpty e of
Just ne -> Set.singleton (Label ne)
Nothing -> Set.empty


Loading