Indentation parsing is nice.


Intro

This could be the topic of a first blog post that I yet have to write: indentation parsing in Haskell with trifecta, and indentation-trifecta.

Below is a toy example that allows to parse case expressions with matches indented, but all ligning up, like so:

case ex of
  foo -> bar
  baz -> baaz

Likewise indentation parsing is used to distinguish declarations starting dedented on new lines from expressions (application f a eg.) continuing indented on new lines possibly.

Example

Given a file foo.mini eg.

-- hi there foo

x : hi there foo

y : stuff
y = asdf asdfa



-- case x of 
--   h -> foo hi
--   k -> bar
--     next -> other


foo = case x of 
  h -> foo hi
  k -> bar
  next -> other



foo2 = case x of 
  h -> foo  {- c -} hi
  -- some comment
  k -> bar
    next -> other





asd flas laks dfsa
   asdkf asldkf 

we can parse (and print) its decls:

*Mini > parsefilep (decls ) "samples/foo.mini" >>= mapM_ print 
Sign (Pat "x") ((Con "hi" :@ Con "there") :@ V "foo")
Sign (Pat "y") (V "stuff")
Def (Pat "y") (V "asdf" :@ V "asdfa")
Def (Pat "foo") (Case (V "x") [Match (Pat "h") (V "foo" :@ Con "hi"),Match (Pat "k") (V "bar"),Match (Pat "next") (V "other")])
Def (Pat "foo2") (Case (V "x") [Match (Pat "h") (V "foo" :@ Con "hi"),Match (Pat "k") (V "bar" :@ V "next")])

Parsing stops at bar next which is understood as an application (because of the indentation), and thus the following -> other... makes no sense any more.

We could be stricter by requiring no junk (eof) at the end of parsing:

*Mini > parsefile (decls <* eof) "samples/foo.mini" >>= mapM_ print 

Note that while (the monad class) IndentationParsing m uses some state internally, we are using our own state here as well: MonadState TTState m, to distinguish between ordinary identifiers: foo, bar, and constructors in the prelude: hi, there.

This distinction is not visible when parsing with the ordinary parse function:

*Mini > parse ex "foo"
V "foo"
*Mini > parse ex "there"
V "there"

but only when parsing with a prelude: parsep (and likewise: parsefilep)

*Mini > parsep ex "foo"
V "foo"
*Mini > parsep ex "there"
Con "there"

Code

Without further ado, here is the example code (mini.hs):

-- for deriving Functor
{-# LANGUAGE GeneralizedNewtypeDeriving #-}


{-# LANGUAGE FlexibleContexts #-}

-- {-# LANGUAGE FlexibleInstances #-}
-- {-# LANGUAGE MultiParamTypeClasses #-}




{- |

Copyright :  (c) Andreas Reuleaux 2018
License   :  BSD2
Maintainer:  Andreas Reuleaux <rx [at] a-rx [dot] info>
Stability :  experimental
Portability: non-portable
toying with indentation parsing


contains the original parens/brackets example from the
paper / testsuite as wel:
https://bitbucket.org/adamsmd/indentation.git
(BSD3)

-}







module Mini where

import Data.Char (isSpace)


import qualified Data.ByteString.Char8 as BS
import Control.Applicative
import Text.Trifecta hiding ((:@))
import Text.Trifecta.Indentation as I
import Text.Trifecta.Delta
import Text.Parser.Token.Style


-- import qualified Data.HashSet as HashSet
import qualified Data.HashSet as HS

import Control.Lens

import Data.List


import Text.Parser.LookAhead

import Control.Monad.Except



-- MonadPlus
-- import Control.Monad


import Control.Monad.State
-- import Control.Monad.State.Strict




main :: IO ()
main = return ()







idStyle :: TokenParsing m => IdentifierStyle m
idStyle =
  styleStart .~ letter
  $ styleReserved .~  HS.fromList (tail
                                   [ 
                                     undefined
                                   , "let"
                                   , "in"
                                   , "letrec"
                                   , "rec"


                                   -- !!!
                                   , "of"

                                   ])

  $ emptyIdents






newtype InnerParser a = InnerParser { runInnerParser :: Parser a }
                      deriving (Functor
                               , Monad
                               , Applicative
                               , Alternative
                               , Parsing
                               , CharParsing
                               , MonadPlus
                                 -- , TokenParsing
                               , DeltaParsing
                               , LookAheadParsing
                               )


instance TokenParsing InnerParser where
  someSpace = buildSomeSpaceParser (skipSome (satisfy isSpace))
              $ commentStart .~ "{-"
              $ commentEnd .~ "-}"
              $ commentLine .~ "--"
              $ commentNesting .~ True
              $ emptyCommentStyle






-- cf. either / fromRight'
fromSuccess :: Result b -> b
fromSuccess (Success x) = x
fromSuccess (Failure m) = error $ show m



beginning = Lines 0 0 0 0
directed = (Directed BS.empty 0 0 0 0)



data TTState = TTState {

  freshVars :: [String] 
  , constrNames :: [String]

  } deriving (Show)


ttdefault =  TTState {
  freshVars = ["a", "b", "c", "d"]
  , constrNames = []
  }


prelude :: TTState
prelude = ttdefault { constrNames = ["hi", "there"] }











-- --------------------------------------------------



-- pattern P w/ parser p
data Pat a
  = Pat a
  deriving (Show)

pat :: (TokenParsing m
       , Monad m
       ) => m (Pat String)
pat = Pat <$> ide



-- match M w/ parser m
data Match a
  = Match (Pat a) (Exp a)
  deriving (Show)


match :: (TokenParsing m
       , Monad m
       , IndentationParsing m
       , MonadState TTState m
       ) => m (Match String)
match = Match <$> pat <* symbol "->" <*> ex




data Exp a 

  -- | a variable
  = V a

  | Case (Exp a) [Match a]

    -- | a constructor
  | Con a

  | (Exp a) :@ (Exp a)

  | Stuff (Exp a)

  | Fancy [Exp a]

  deriving (Show)




ex :: (TokenParsing m
       , Monad m
       , IndentationParsing m
       , MonadState TTState m
       ) => m (Exp String)
ex = term




term :: (
  -- Monad m
  TokenParsing m
  -- , LookAheadParsing m
  -- , DeltaParsing m
  , MonadState TTState m
  , IndentationParsing m
  )        
  => m (Exp String)
term = funapp




-- absolutionIndentation normally defaults to the first token of the parent. 

-- we need to use absolute indentation. This mode says indentation is defined in terms of the first token parsed, 
-- and all indentation rules apply in terms of where that first token is found.

funapp :: (
  -- Monad m
  TokenParsing m
  -- , LookAheadParsing m
  -- , DeltaParsing m
  , MonadState TTState m
  , IndentationParsing m
  )
  => m (Exp String)
funapp = factor >>= \f -> foldl' (:@) f <$> many factor
-- funapp = factor




factor :: (TokenParsing m
          -- , LookAheadParsing m
          -- , DeltaParsing m
          , IndentationParsing m
          , MonadState TTState m
          )
          => m (Exp String)

factor = choice $ tail [

  undefined

  , parens ex

  , _case

  -- , var 
  , varOrCon

  ]




-- case
_case :: (TokenParsing m
       , Monad m
       , IndentationParsing m
       , MonadState TTState m
       ) => m (Exp String)

-- _case = symbol "case" >> Case <$> ex <* symbol "of" <*> localIndentation Gt (many $ try $ absoluteIndentation match)

_case = symbol "case" >> Case <$> ex <* symbol "of" <*> localIndentation Gt (many $ absoluteIndentation match)



ide :: (TokenParsing m, Monad m) => m String
ide = token $ ident $ idStyle





var :: (TokenParsing m
       , Monad m
       ) => m (Exp String)
var = V <$> ide


varOrCon :: (
  TokenParsing m
  -- LookAheadParsing m
  -- , DeltaParsing m
  , MonadState TTState m
  , IndentationParsing m
  ) => m (Exp String)
varOrCon = do 
  -- _ <- (localTokenMode (const Eq) $ symbolic ':')
  p <- ide
  st <- get

  let r = go p
        where
          go q 
            | q `elem` constrNames st = Con q
            | otherwise = V q

  return r


data Decl a 
  = Def (Pat a) (Exp a)
  | Sign (Pat a) (Exp a)
  deriving (Show)

decl :: (
  TokenParsing m
  -- LookAheadParsing m
  -- , DeltaParsing m
  , MonadState TTState m
  , IndentationParsing m
  ) => m (Decl String)


decl = try (Def <$> (pat <* symbol "=") <*> ex)
       <|> Sign <$> (pat <* symbol ":") <*> ex



decls :: (
  TokenParsing m
  -- LookAheadParsing m
  -- , DeltaParsing m
  , MonadState TTState m
  , IndentationParsing m
  ) => m [Decl String]


decls = many $ absoluteIndentation decl



-- --------------------------------------------------


data A
  = Par A   -- '(' A ')'
  | Bra A   -- '[' A ']'
  | Seq A A -- A A
  | Nil     -- epsilon
  deriving (Show, Eq)

-- a :: (Monad m, Stream s m (Char, Indentation)) => ParsecT (IndentStream s) () m A
a :: (Applicative m, TokenParsing m, IndentationParsing m) => m A
a = choice [ Seq <$> a' <*> a, a', pure Nil ]


-- difference: [] is allowed, but not ()
-- only
-- (
-- )



-- a' :: (Monad m, Stream s m (Char, Indentation)) => ParsecT (IndentStream s) () m A
a' :: (TokenParsing m, IndentationParsing m) => m A
a' = choice
    [ Par <$>
        between (localTokenMode (const Eq) $ symbolic '(')
                (localTokenMode (const Eq) $ symbolic ')')
                (localIndentation Gt a)
    , Bra <$>
        between (localTokenMode (const Ge) $ symbolic '[')
                (localTokenMode (const Ge) $ symbolic ']')
                (localIndentation Gt a)
    ]


parL = Par . listToSeq
braL = Bra . listToSeq

listToSeq [] = Nil
listToSeq (x:xs) = Seq x $ listToSeq xs


-- -- > parseFromFileEx (runInnerParser $ evalCharIndentationParserT (evalStateT a prelude) indentst) "samples/foo.tt" >>= return . fromSuccess
-- -- Seq (Par (Seq (Bra (Seq (Par Nil) Nil)) Nil)) Nil


-- for the second example
-- -- *Problem > parseFromFileEx (runInnerParser $ evalCharIndentationParserT   (evalStateT (whiteSpace *> a) prelude) indentst) "samples/foo.tt" >>= return . fromSuccess
-- -- Seq (Par (Seq (Bra (Seq (Par Nil) (Seq (Bra Nil) Nil))) (Seq (Par Nil) Nil))) Nil
-- -- *Problem >








-- default
indentst = mkIndentationState 0 infIndentation True Gt



evalCharIndentationParserT :: Monad m => IndentationParserT Char m a -> IndentationState -> m a
evalCharIndentationParserT = evalIndentationParserT

evalTokenIndentationParserT :: Monad m => IndentationParserT Token m a -> IndentationState -> m a
evalTokenIndentationParserT = evalIndentationParserT





oldparse p s = fromSuccess $ parseString (runInnerParser $ evalStateT p prelude) beginning s



-- parse    p s = fromSuccess $ parseString (runInnerParser $ evalCharIndentationParserT (evalStateT p prelude) indentst) beginning s

-- maybe better directed
parse    p s = fromSuccess $ parseString (runInnerParser $ evalCharIndentationParserT (evalStateT p prelude) indentst) directed s






parsefile p fn = parseFromFileEx (runInnerParser $ evalCharIndentationParserT (evalStateT (whiteSpace *> p) prelude) indentst) fn >>= return . fromSuccess



oldparsefile p f = parseFromFileEx (runInnerParser $ evalStateT (whiteSpace *> p) prelude) f  >>= return . fromSuccess





--



{-
 this  works

parseFromFileEx (runInnerParser $ evalCharIndentationParserT    (evalStateT (whiteSpace *> a) prelude) (mkIndentationState 0 infIndentation True Gt)) "samples/foo.mini" 



parseFromFileEx (runInnerParser $ evalCharIndentationParserT (evalStateT (whiteSpace *> ex) prelude) (mkIndentationState 0 infIndentation True Gt)) "samples/foo.mini" >>= return . fromSuccess 


and now w/ decls

parsefile decls "samples/foo.mini"



 -}