Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Contexts that can be serialized + deserialized while retaining and explicitly representing sharing #2202

Draft
wants to merge 17 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
292 changes: 259 additions & 33 deletions src/swarm-lang/Swarm/Language/Context.hs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DerivingVia #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE ViewPatterns #-}

-- |
-- SPDX-License-Identifier: BSD-3-Clause
Expand All @@ -10,72 +9,243 @@
-- types, values, or capability sets) used throughout the codebase.
module Swarm.Language.Context where

import Control.Algebra (Has)
import Control.Algebra (Has, run)
import Control.Carrier.State.Strict (execState)
import Control.Effect.Reader (Reader, ask, local)
import Control.Lens.Empty (AsEmpty (..), pattern Empty)
import Control.Effect.State (State, get, modify)
import Control.Lens.Empty (AsEmpty (..))
import Control.Lens.Prism (prism)
import Data.Aeson (FromJSON (..), ToJSON (..), genericParseJSON, genericToJSON)
import Data.Aeson (FromJSON (..), FromJSONKey, ToJSON (..), ToJSONKey, genericParseJSON, genericToJSON, withText)
import Data.Data (Data)
import Data.Function (on)
import Data.Functor.Const
import Data.Hashable
import Data.List.NonEmpty qualified as NE
import Data.Map (Map)
import Data.Map qualified as M
import Data.Semigroup (Sum (..))
import Data.Text (Text)
import Data.Text qualified as T
import GHC.Generics (Generic)
import Prettyprinter (brackets, emptyDoc, hsep, punctuate)
import Swarm.Pretty (PrettyPrec (..), prettyBinding)
import Swarm.Util.JSON (optionsUnwrapUnary)
import Swarm.Pretty (PrettyPrec (..))
import Swarm.Util (failT, showT)
import Swarm.Util.JSON (optionsMinimize)
import Swarm.Util.Yaml (FromJSONE, getE, liftE, parseJSONE)
import Text.Printf (printf)
import Text.Read (readMaybe)
import Prelude hiding (lookup)

-- | We use 'Text' values to represent variables.
type Var = Text

-- | A context is a mapping from variable names to things.
newtype Ctx t = Ctx {unCtx :: Map Var t}
------------------------------------------------------------
-- Context hash

-- | A context hash is a hash value used to identify contexts without
-- having to compare them for equality. Hash values are computed
-- homomorphically, so that two equal contexts will be guaranteed to
-- have the same hash value, even if they were constructed with a
-- different sequence of operations.
--
-- The downside of this approach is that, /in theory/, there could
-- be hash collisions, that is, two different contexts which
-- nonetheless have the same hash value. However, this is extremely
-- unlikely. The benefit is that everything can be purely
-- functional, without the need to thread around some kind of
-- globally unique ID generation effect.
newtype CtxHash = CtxHash {getCtxHash :: Int}
deriving (Eq, Ord, Data, Generic, ToJSONKey, FromJSONKey)
deriving (Semigroup, Monoid) via Sum Int
deriving (Num) via Int

instance Show CtxHash where
show (CtxHash h) = printf "%016x" h

instance ToJSON CtxHash where
toJSON h = toJSON (show h)

instance FromJSON CtxHash where
parseJSON = withText "hash" $ \t -> case readMaybe ("0x" ++ T.unpack t) of
Nothing -> fail "Could not parse CtxHash"
Just h -> pure (CtxHash h)

-- | The hash for a single variable -> value binding.
singletonHash :: Hashable t => Var -> t -> CtxHash
singletonHash x t = CtxHash $ hashWithSalt (hash x) t

-- | The hash for an entire Map's worth of bindings.
mapHash :: Hashable t => Map Var t -> CtxHash
mapHash = M.foldMapWithKey singletonHash

------------------------------------------------------------
-- Context structure

-- | 'CtxF' represents one level of structure of a context: a context
-- is either empty, a singleton, or built via deletion or union.
data CtxF f t
= CtxEmpty
| CtxSingle Var t
| CtxDelete Var t (f t)
| CtxUnion (f t) (f t)
deriving (Eq, Show, Functor, Foldable, Traversable, Data, Generic)

instance ToJSON t => ToJSON (Ctx t) where
toJSON = genericToJSON optionsUnwrapUnary
instance (ToJSON t, ToJSON (f t)) => ToJSON (CtxF f t) where
toJSON = genericToJSON optionsMinimize

instance (FromJSON t, FromJSON (f t)) => FromJSON (CtxF f t) where
parseJSON = genericParseJSON optionsMinimize

-- | Map over the recursive structure stored in a 'CtxF'.
restructure :: (f t -> g t) -> CtxF f t -> CtxF g t
restructure _ CtxEmpty = CtxEmpty
restructure _ (CtxSingle x t) = CtxSingle x t
restructure h (CtxDelete x t f1) = CtxDelete x t (h f1)
restructure h (CtxUnion f1 f2) = CtxUnion (h f1) (h f2)

-- | A 'CtxTree' is one possible representation of a context,
-- consisting of a structured record of the process by which a
-- context was constructed. This representation would be terrible
-- for doing efficient variable lookups, but it can be used to
-- efficiently serialize/deserialize the context while recovering
-- sharing.
--
-- It stores a top-level hash of the context, along with a recursive
-- tree built via 'CtxF'.
data CtxTree t = CtxTree CtxHash (CtxF CtxTree t)
deriving (Eq, Functor, Foldable, Traversable, Data, Generic, ToJSON, FromJSON, Show)

-- | A 'CtxNode' is just a single level of structure for a context,
-- with any recursive contexts replaced by their hash.
--
-- For example, a 'CtxNode' could look something like @CtxUnion
-- (Const 0fe5b299) (Const abcdef12)@.
type CtxNode t = CtxF (Const CtxHash) t

-- | Roll up one level of context structure while building a new
-- top-level Map and computing an appropriate top-level hash.
rollCtx :: Hashable t => CtxF Ctx t -> Ctx t
rollCtx s = Ctx m (CtxTree h (restructure ctxStruct s))
where
(m, h) = case s of
CtxEmpty -> (M.empty, 0)
CtxSingle x t -> (M.singleton x t, singletonHash x t)
CtxDelete x _ (Ctx m1 (CtxTree h1 _)) -> case M.lookup x m1 of
Nothing -> (m1, h1)
Just t' -> (M.delete x m1, h1 - singletonHash x t')
CtxUnion (Ctx m1 (CtxTree h1 _)) (Ctx m2 (CtxTree h2 _)) -> (m2 `M.union` m1, h')
where
-- `Data.Map.intersection l r` returns a map with common keys,
-- but values from `l`. The values in m1 are the ones we want
-- to subtract from the hash, since they are the ones that will
-- be overwritten.
overwritten = M.intersection m1 m2
h' = h1 + h2 - mapHash overwritten

------------------------------------------------------------
-- Contexts

-- | A context is a mapping from variable names to things. We store
-- both a 'Map' (for efficient lookup) as well as a 'CtxTree' for
-- sharing-aware serializing/deserializing of contexts.
data Ctx t = Ctx {unCtx :: Map Var t, ctxStruct :: CtxTree t}
deriving (Functor, Traversable, Data, Generic)

instance FromJSON t => FromJSON (Ctx t) where
parseJSON = genericParseJSON optionsUnwrapUnary
-- | Get the top-level hash of a context.
ctxHash :: Ctx t -> CtxHash
ctxHash (Ctx _ (CtxTree h _)) = h

instance Show (Ctx t) where
show _ = "<Ctx>"

instance Eq (Ctx t) where
(==) = (==) `on` ctxHash

instance Hashable t => Hashable (Ctx t) where
hash = getCtxHash . ctxHash
hashWithSalt s = hashWithSalt s . getCtxHash . ctxHash

instance Foldable Ctx where
foldMap f = foldMap f . unCtx

-- | Rebuild a complete 'Ctx' from a 'CtxTree'.
ctxFromTree :: CtxTree t -> Ctx t
ctxFromTree tree = Ctx (varMap tree) tree
where
varMap (CtxTree _ s) = case s of
CtxEmpty -> M.empty
CtxSingle x t -> M.singleton x t
CtxDelete x _ s1 -> M.delete x (varMap s1)
CtxUnion s1 s2 -> varMap s2 `M.union` varMap s1

------------------------------------------------------------
-- Context instances

-- | Serialize a context simply as its hash; we assume that a
-- top-level CtxMap has been seralized somewhere, from which we can
-- recover this context by looking it up.
instance ToJSON (Ctx t) where
toJSON = toJSON . ctxHash

-- | Deserialize a context. We expect to see a hash, and look it up
-- in the provided CtxMap.
instance FromJSONE (CtxMap CtxTree t) (Ctx t) where
parseJSONE v = do
h <- liftE $ parseJSON @CtxHash v
m <- getE
case getCtx h m of
Nothing -> failT ["Encountered unknown context hash", showT h]
Just ctx -> pure ctx

instance (PrettyPrec t) => PrettyPrec (Ctx t) where
prettyPrec _ Empty = emptyDoc
prettyPrec _ (assocs -> bs) = brackets (hsep (punctuate "," (map prettyBinding bs)))
prettyPrec _ _ = "<Ctx>"

-- | The semigroup operation for contexts is /right/-biased union.
instance Semigroup (Ctx t) where
instance Hashable t => Semigroup (Ctx t) where
(<>) = union

instance Monoid (Ctx t) where
instance Hashable t => Monoid (Ctx t) where
mempty = empty
mappend = (<>)

instance AsEmpty (Ctx t) where
_Empty = prism (const empty) isEmpty
where
isEmpty (Ctx c)
| M.null c = Right ()
| otherwise = Left (Ctx c)
isEmpty c
| M.null (unCtx c) = Right ()
| otherwise = Left c

------------------------------------------------------------
-- Context operations

-- | The empty context.
empty :: Ctx t
empty = Ctx M.empty
-- We could also define empty = rollCtx CtxEmpty but that would introduce an
-- unnecessary Hashable t constraint.
empty = Ctx M.empty (CtxTree mempty CtxEmpty)

-- | A singleton context.
singleton :: Var -> t -> Ctx t
singleton x t = Ctx (M.singleton x t)
singleton :: Hashable t => Var -> t -> Ctx t
singleton x t = rollCtx $ CtxSingle x t

-- | Create a 'Ctx' from a 'Map'.
fromMap :: Hashable t => Map Var t -> Ctx t
fromMap m = case NE.nonEmpty (M.assocs m) of
Nothing -> empty
Just ne -> foldr1 union (NE.map (uncurry singleton) ne)

-- | Look up a variable in a context.
lookup :: Var -> Ctx t -> Maybe t
lookup x (Ctx c) = M.lookup x c
lookup x = M.lookup x . unCtx

-- | Look up a variable in a context in an ambient Reader effect.
lookupR :: Has (Reader (Ctx t)) sig m => Var -> m (Maybe t)
lookupR x = lookup x <$> ask

-- | Delete a variable from a context.
delete :: Var -> Ctx t -> Ctx t
delete x (Ctx c) = Ctx (M.delete x c)
delete :: Hashable t => Var -> Ctx t -> Ctx t
delete x ctx@(Ctx m _) = case M.lookup x m of
Nothing -> ctx
Just t -> rollCtx $ CtxDelete x t ctx

-- | Get the list of key-value associations from a context.
assocs :: Ctx t -> [(Var, t)]
Expand All @@ -87,18 +257,74 @@ vars = M.keys . unCtx

-- | Add a key-value binding to a context (overwriting the old one if
-- the key is already present).
addBinding :: Var -> t -> Ctx t -> Ctx t
addBinding x t (Ctx c) = Ctx (M.insert x t c)
addBinding :: Hashable t => Var -> t -> Ctx t -> Ctx t
addBinding x t ctx = ctx `union` singleton x t

-- | /Right/-biased union of contexts.
union :: Ctx t -> Ctx t -> Ctx t
union (Ctx c1) (Ctx c2) = Ctx (c2 `M.union` c1)
union :: Hashable t => Ctx t -> Ctx t -> Ctx t
union ctx1 ctx2 = rollCtx $ CtxUnion ctx1 ctx2

-- | Locally extend the context with an additional binding.
withBinding :: Has (Reader (Ctx t)) sig m => Var -> t -> m a -> m a
withBinding :: (Has (Reader (Ctx t)) sig m, Hashable t) => Var -> t -> m a -> m a
withBinding x ty = local (addBinding x ty)

-- | Locally extend the context with an additional context of
-- bindings.
withBindings :: Has (Reader (Ctx t)) sig m => Ctx t -> m a -> m a
withBindings :: (Has (Reader (Ctx t)) sig m, Hashable t) => Ctx t -> m a -> m a
withBindings ctx = local (`union` ctx)

------------------------------------------------------------
-- Context serializing/deserializing

-- | A 'CtxMap' maps context hashes to context structures. Those
-- structures could either be complete context trees, or just a
-- single level of structure containing more hashes.
type CtxMap f t = Map CtxHash (CtxF f t)

-- | Read a context from a context map.
getCtx :: CtxHash -> CtxMap CtxTree t -> Maybe (Ctx t)
getCtx h m = case M.lookup h m of
Nothing -> Nothing
Just tree -> Just $ ctxFromTree (CtxTree h tree)

-- | Turn a context into a context map containing every subtree of its
-- structure.
toCtxMap :: Ctx t -> CtxMap CtxTree t
toCtxMap (Ctx m s) = run $ execState M.empty (buildCtxMap m s)

-- | Build a context map by keeping track of the incrementally built
-- map in a state effect, and traverse the given context structure
-- to add all subtrees to the map---but, of course, stopping without
-- recursing further whenever we see a hash that is already in the
-- map.
buildCtxMap :: forall t m sig. Has (State (CtxMap CtxTree t)) sig m => Map Var t -> CtxTree t -> m ()
buildCtxMap m (CtxTree h s) = do
cm <- get @(CtxMap CtxTree t)
case h `M.member` cm of
True -> pure ()
False -> do
modify (M.insert h s)
case s of
CtxEmpty -> pure ()
CtxSingle {} -> pure ()
CtxDelete x t s1 -> buildCtxMap (M.insert x t m) s1
CtxUnion s1 s2 -> buildCtxMap m s1 *> buildCtxMap m s2

-- | "Dehydrate" a context map by replacing the actual context trees
-- with single structure layers containing only hashes. A
-- dehydrated context map is very suitable for serializing, since it
-- makes sharing completely explicit---even if a given context is
-- referenced multiple times, the references are simply hash values,
-- and the context is stored only once, under its hash.
dehydrate :: CtxMap CtxTree t -> CtxMap (Const CtxHash) t
dehydrate = M.map (restructure (\(CtxTree h1 _) -> Const h1))

-- | "Rehydrate" a dehydrated context map by replacing every hash with
-- an actual context structure. We do this by building the result
-- as a lazy, recursive map, replacing each hash by the result we
-- get when looking it up in the map being built. A context which
-- is referenced multiple times will thus be shared in memory.
rehydrate :: CtxMap (Const CtxHash) t -> CtxMap CtxTree t
rehydrate m = m'
where
m' = M.map (restructure (\(Const h) -> CtxTree h (m' M.! h))) m
14 changes: 8 additions & 6 deletions src/swarm-lang/Swarm/Language/JSON.hs
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,13 @@
-- to put them all here to avoid circular module dependencies.
module Swarm.Language.JSON where

import Data.Aeson (FromJSON (..), ToJSON (..), genericParseJSON, genericToJSON, withText)
import Data.Aeson (FromJSON (..), ToJSON (..), withText)
import Data.Aeson qualified as Ae
import Swarm.Language.Pipeline (processTermEither)
import Swarm.Language.Syntax (Term)
import Swarm.Language.Syntax.Pattern (Syntax, TSyntax)
import Swarm.Language.Value (Env, Value)
import Swarm.Pretty (prettyText)
import Swarm.Util.JSON (optionsMinimize)
import Witch (into)

instance FromJSON TSyntax where
Expand All @@ -30,10 +29,13 @@
instance ToJSON Syntax

instance ToJSON Value where
toJSON = genericToJSON optionsMinimize
toJSON = undefined

Check warning on line 32 in src/swarm-lang/Swarm/Language/JSON.hs

View workflow job for this annotation

GitHub Actions / HLint

Warning in module Swarm.Language.JSON: Avoid restricted function ▫︎ Found: "undefined" ▫︎ Note: may break the code

instance FromJSON Value where
parseJSON = genericParseJSON optionsMinimize
parseJSON = undefined

Check warning on line 35 in src/swarm-lang/Swarm/Language/JSON.hs

View workflow job for this annotation

GitHub Actions / HLint

Warning in module Swarm.Language.JSON: Avoid restricted function ▫︎ Found: "undefined" ▫︎ Note: may break the code

deriving instance FromJSON Env
deriving instance ToJSON Env
instance ToJSON Env where
toJSON = undefined

Check warning on line 38 in src/swarm-lang/Swarm/Language/JSON.hs

View workflow job for this annotation

GitHub Actions / HLint

Warning in module Swarm.Language.JSON: Avoid restricted function ▫︎ Found: "undefined" ▫︎ Note: may break the code

instance FromJSON Env where
parseJSON = undefined

Check warning on line 41 in src/swarm-lang/Swarm/Language/JSON.hs

View workflow job for this annotation

GitHub Actions / HLint

Warning in module Swarm.Language.JSON: Avoid restricted function ▫︎ Found: "undefined" ▫︎ Note: may break the code
Loading
Loading