Skip to content

Commit

Permalink
Simply parsing of bytes literals.
Browse files Browse the repository at this point in the history
We now outsource this to a runtime function to make the generated code
a bit simpler. As a side effect this also provides more informative
error messages when a literal isn't found.
  • Loading branch information
rsmmr committed Aug 19, 2024
1 parent 6a12134 commit 6369bff
Show file tree
Hide file tree
Showing 28 changed files with 435 additions and 346 deletions.
1 change: 1 addition & 0 deletions spicy/lib/spicy_rt.hlt
Original file line number Diff line number Diff line change
Expand Up @@ -94,5 +94,6 @@ declare public void backtrack() &cxxname="spicy::rt::detail::backtrack" &have_pr
declare public void initializeParsedUnit(inout ParsedUnit punit, any unit, TypeInfo ti) &cxxname="spicy::rt::ParsedUnit::initialize" &have_prototype;

declare public bytes extractBytes(inout value_ref<stream> data, view<stream> cur, uint<64> n, bool eod_ok, string location, inout strong_ref<Filters> filters) &cxxname="spicy::rt::detail::extractBytes" &have_prototype;
declare public void expectBytesLiteral(inout value_ref<stream> data, view<stream> cur, bytes literal, string location, inout strong_ref<Filters> filters) &cxxname="spicy::rt::detail::expectBytesLiteral" &have_prototype;

}
19 changes: 18 additions & 1 deletion spicy/runtime/include/parser.h
Original file line number Diff line number Diff line change
Expand Up @@ -534,12 +534,29 @@ std::optional<hilti::rt::stream::SafeConstIterator> unitFind(
* @param data stream view to extract from
* @param cur view of *data* that's being parsed
* @param size number of bytes to extract
* @param size eod_ok if true, will be ok if end-of-data is reached without *size* behind reached
* @param eod_ok if true, will be ok if end-of-data is reached without *size* behind reached
* @param location location associated with the situation
* @param filters filter state associated with current unit instance (which may be null)
* @returns extracted bytes
* @throws ParseError if not enough data is available
*/
hilti::rt::Bytes extractBytes(hilti::rt::ValueReference<hilti::rt::Stream>& data, const hilti::rt::stream::View& cur,
uint64_t size, bool eod_ok, std::string_view location,
const hilti::rt::StrongReference<spicy::rt::filter::detail::Filters>& filters);

/**
* Confirms that a stream view begins with a given bytes literal.
*
* @param data stream view to extract from
* @param cur view of *data* that's being parsed
* @param literal raw bytes representation of the literal to extract
* @param location location associated with the situation
* @param filters filter state associated with current unit instance (which may be null)
* @throws ParseError if the literal isn't found at the beginning of *cur*
*/
void expectBytesLiteral(hilti::rt::ValueReference<hilti::rt::Stream>& data, const hilti::rt::stream::View& cur,
const hilti::rt::Bytes& literal, std::string_view location,
const hilti::rt::StrongReference<spicy::rt::filter::detail::Filters>& filters);

} // namespace detail
} // namespace spicy::rt
14 changes: 14 additions & 0 deletions spicy/runtime/src/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -199,3 +199,17 @@ hilti::rt::Bytes detail::extractBytes(hilti::rt::ValueReference<hilti::rt::Strea

return cur.sub(cur.begin() + size).data();
}

void detail::expectBytesLiteral(hilti::rt::ValueReference<hilti::rt::Stream>& data, const hilti::rt::stream::View& cur,
const hilti::rt::Bytes& literal, std::string_view location,
const hilti::rt::StrongReference<spicy::rt::filter::detail::Filters>& filters) {
detail::waitForInput(data, cur, literal.size(),
hilti::rt::fmt("expected %" PRIu64 R"( bytes for bytes literal "%s")", literal.size(),
literal),
location, filters);
if ( ! cur.startsWith(literal) ) {
auto content = cur.sub(cur.begin() + literal.size()).data();
throw ParseError(hilti::rt::fmt(R"(expected bytes literal "%s" but input starts with "%s")", literal, content),
location);
}
}
15 changes: 15 additions & 0 deletions spicy/runtime/src/tests/parser.cc
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,21 @@ TEST_CASE("extractBytes") {
}
}

TEST_CASE("expectBytesLiteral") {
// Most of the work in extractBytesLiteral() is done through the waitFor...()
// function, which we test separately.

auto data = hilti::rt::ValueReference<hilti::rt::Stream>();
data->append("12345");
data->freeze();
auto view = data->view();

CHECK_NOTHROW(detail::expectBytesLiteral(data, data->view(), "123", "<location>", {}));
CHECK_THROWS_WITH_AS(detail::expectBytesLiteral(data, data->view(), "abc", "<location>", {}),
"expected bytes literal \"abc\" but input starts with \"123\" (<location>)",
const spicy::rt::ParseError&);
}

TEST_CASE("unitFind") {
// We just tests the argument forwarding here, the matching itself is
// covered by hilti::rt::stream::View::find().
Expand Down
4 changes: 2 additions & 2 deletions spicy/toolchain/include/ast/types/unit.h
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ class Unit : public UnqualifiedType {

void setAttributes(ASTContext* ctx, AttributeSet* attrs) { setChild(ctx, 1, attrs); }
void setContextType(ASTContext* ctx, UnqualifiedType* type) { setChild(ctx, 2, type); }
void setGrammar(std::shared_ptr<spicy::detail::codegen::Grammar> g) { _grammar = std::move(g); }
void setGrammar(spicy::detail::codegen::Grammar* g) { _grammar = g; }
void setPublic(bool p) { _public = p; }

std::string_view typeClass() const final { return "unit"; }
Expand Down Expand Up @@ -170,7 +170,7 @@ class Unit : public UnqualifiedType {

bool _public = false;
bool _may_have_filter = false;
std::shared_ptr<spicy::detail::codegen::Grammar> _grammar;
spicy::detail::codegen::Grammar* _grammar = nullptr;
};

} // namespace type
Expand Down
11 changes: 10 additions & 1 deletion spicy/toolchain/include/compiler/detail/codegen/codegen.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

#include <map>
#include <memory>
#include <set>
#include <string>
#include <unordered_set>
#include <utility>
Expand All @@ -26,7 +27,13 @@ namespace spicy::detail {
namespace codegen {
class GrammarBuilder;
class ParserBuilder;
struct ASTInfo;

// Information collected from the AST in an initial pass for any code generation.
struct ASTInfo {
std::set<ID> uses_sync_advance; // type ID of units implementing %sync_advance
std::set<uint64_t> look_aheads_in_use;
};

} // namespace codegen

/**
Expand All @@ -44,6 +51,7 @@ class CodeGen {
auto driver() const { return context()->driver(); }
const auto& compilerContext() const { return driver()->context(); }
const auto& options() const { return compilerContext()->options(); }
const auto& astInfo() const { return _ast_info; }

/** Entry point for transformation from a Spicy AST to a HILTI AST. */
bool compileAST(hilti::ASTRoot* root);
Expand Down Expand Up @@ -102,6 +110,7 @@ class CodeGen {
Builder* _builder;
codegen::GrammarBuilder _gb;
codegen::ParserBuilder _pb;
codegen::ASTInfo _ast_info;

std::vector<hilti::declaration::Property> _properties;
std::map<UnqualifiedType*, UnqualifiedType*> _type_mappings;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,12 @@ class GrammarBuilder {
hilti::Result<hilti::Nothing> run(type::Unit* unit);

/**
* Returns the grammar for a unit type. The type must have been computed
* through `run()` already, otherwise this will abort That's generally
* done for all AST unit types at the beginning of code generation.
* Returns the grammar for a unit type. The type's grammar must have been
* computed through `run()` already, which is generally done for all AST
* unit types at the beginning of code generation. If the grammar hasn't
* been computed yet, this will return null.
*/
const Grammar& grammar(const type::Unit& unit);
const Grammar* grammar(const type::Unit& unit);

private:
CodeGen* _cg;
Expand Down
5 changes: 4 additions & 1 deletion spicy/toolchain/include/compiler/detail/codegen/grammar.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,9 @@ class Grammar {
*
* @note will always return false until the root production gets set.
*/
bool needsLookAhead() const { return _needs_look_ahead; }
bool needsLookAhead() const { return _needs_look_ahead; } // XXX I believe we can remove this

const auto& lookAheadsInUse() const { return _look_aheads_in_use; }

/**
* Prints the grammar in a (somewhat) human readable form. This is for
Expand Down Expand Up @@ -147,6 +149,7 @@ class Grammar {
std::map<std::string, bool> _nullable;
std::map<std::string, std::set<std::string>> _first;
std::map<std::string, std::set<std::string>> _follow;
std::set<uint64_t> _look_aheads_in_use;
};

} // namespace spicy::detail::codegen
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ struct ParserState {
ID unit_id;

/** True if the current grammar needs look-ahead tracking. */
bool needs_look_ahead;
bool needs_look_ahead; // XXX I believe we can remove this

/**< Expression* referencing the current parse object. */
Expression* self = nullptr;
Expand Down
68 changes: 42 additions & 26 deletions spicy/toolchain/src/compiler/codegen/codegen.cc
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
// Copyright (c) 2020-2023 by the Zeek Project. See LICENSE for details.

#include <utility>

#include <hilti/ast/builder/builder.h>
#include <hilti/ast/ctors/coerced.h>
#include <hilti/ast/ctors/tuple.h>
Expand All @@ -27,6 +25,8 @@
#include <spicy/compiler/detail/codegen/grammar-builder.h>
#include <spicy/compiler/detail/codegen/grammar.h>

#include "compiler/detail/codegen/productions/ctor.h"

using namespace spicy;
using namespace spicy::detail;
using namespace spicy::detail::codegen;
Expand All @@ -37,38 +37,59 @@ namespace spicy::logging::debug {
inline const hilti::logging::DebugStream CodeGen("spicy-codegen");
} // namespace spicy::logging::debug

namespace spicy::detail::codegen {

// Information collected from the AST in an initial pass for any code generation.
struct ASTInfo {
std::set<ID> uses_sync_advance; // type ID of units implementing %sync_advance
};

} // namespace spicy::detail::codegen

namespace {

// Read-only visitor collecting information from the AST that's needed for
// subsequent code generation.
struct VisitorASTInfo : public visitor::PreOrder {
VisitorASTInfo(ASTContext* ctx, ASTInfo* info) : context(ctx), info(info) {}
VisitorASTInfo(CodeGen* cg, ASTInfo* info) : cg(cg), info(info) {}

ASTContext* context;
CodeGen* cg;
ASTInfo* info;

void operator()(declaration::UnitHook* n) final {
if ( n->id().local() == ID("0x25_sync_advance") ) {
const auto& unit = context->lookup(n->hook()->unitTypeIndex());
const auto& unit = cg->context()->lookup(n->hook()->unitTypeIndex());
info->uses_sync_advance.insert(unit->typeID());
}
}

void operator()(type::unit::item::UnitHook* n) final {
if ( n->id() == ID("0x25_sync_advance") ) {
const auto& unit = context->lookup(n->hook()->unitTypeIndex());
const auto& unit = cg->context()->lookup(n->hook()->unitTypeIndex());
info->uses_sync_advance.insert(unit->typeID());
}
}

void operator()(hilti::declaration::Type* n) final {
if ( auto unit = n->type()->type()->tryAs<type::Unit>() ) {
if ( n->type()->alias() )
return;

if ( auto r = cg->grammarBuilder()->run(unit); ! r ) {
hilti::logger().error(r.error().description(), n->location());
return;
}

auto lahs = unit->grammar().lookAheadsInUse();
info->look_aheads_in_use.insert(lahs.begin(), lahs.end());

for ( const auto& [id, p] : unit->grammar().productions() ) {
auto field = p->meta().field();
if ( ! field || ! field->attributes()->has("&synchronize") )
continue;

auto lahs = unit->grammar().lookAheadsForProduction(p);
if ( ! lahs )
continue;

for ( const auto* lah_prod : *lahs ) {
if ( const auto* ctor = lah_prod->tryAs<production::Ctor>() )
info->look_aheads_in_use.insert(ctor->tokenID());
}
}
}
}
};

// Visitor that runs over each module's AST at the beginning of their
Expand Down Expand Up @@ -109,12 +130,6 @@ struct VisitorPass1 : public visitor::MutatingPostOrder {
}

// Replace unit type with compiled struct type.

if ( auto r = cg->grammarBuilder()->run(u); ! r ) {
hilti::logger().error(r.error().description(), n->location());
return;
}

bool declare_only = false;
if ( auto m = n->parent<hilti::declaration::Module>(); m && m->skipImplementation() )
declare_only = true;
Expand Down Expand Up @@ -567,13 +582,14 @@ bool CodeGen::compileAST(hilti::ASTRoot* root) {
}
};

ASTInfo info;
visitor::visit(VisitorASTInfo(context(), &info), root, ".spicy");
visitor::visit(VisitorASTInfo(this, &_ast_info), root, ".spicy");

auto modified =
visitor::visit(VisitorModule(this, 1, &info), root, ".spicy", [](const auto& v) { return v.modified; });
modified |= visitor::visit(VisitorModule(this, 2, &info), root, ".spicy", [](const auto& v) { return v.modified; });
modified |= visitor::visit(VisitorModule(this, 3, &info), root, ".spicy", [](const auto& v) { return v.modified; });
visitor::visit(VisitorModule(this, 1, &_ast_info), root, ".spicy", [](const auto& v) { return v.modified; });
modified |=
visitor::visit(VisitorModule(this, 2, &_ast_info), root, ".spicy", [](const auto& v) { return v.modified; });
modified |=
visitor::visit(VisitorModule(this, 3, &_ast_info), root, ".spicy", [](const auto& v) { return v.modified; });

// Update the context with type changes record by any of the passes.
for ( auto [old, new_] : _type_mappings )
Expand Down
12 changes: 6 additions & 6 deletions spicy/toolchain/src/compiler/codegen/grammar-builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -352,15 +352,15 @@ hilti::Result<hilti::Nothing> GrammarBuilder::run(type::Unit* unit) {
return r.error();

_grammars[id] = std::move(g);
unit->setGrammar(&_grammars[id]);
return hilti::Nothing();
}

const Grammar& GrammarBuilder::grammar(const type::Unit& unit) {
const Grammar* GrammarBuilder::grammar(const type::Unit& unit) {
assert(unit.canonicalID());
auto id = unit.canonicalID();
if ( _grammars.find(id) == _grammars.end() )
hilti::logger().internalError(fmt("grammar for unit %s accessed before it's been computed", id),
unit.meta().location());

return _grammars[id];
if ( _grammars.find(id) != _grammars.end() )
return &_grammars[id];
else
return nullptr;
}
6 changes: 6 additions & 0 deletions spicy/toolchain/src/compiler/codegen/grammar.cc
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,12 @@ hilti::Result<hilti::Nothing> Grammar::_computeTables() {
continue;

lap->setLookAheads(std::make_pair(*v0, *v1));

// Add v0 and v1 to the set of look-ahead tokens in use.
for ( const auto& v : {v0, v1} ) {
for ( const auto& x : *v )
_look_aheads_in_use.insert(x->tokenID());
}
}

return _check();
Expand Down
Loading

0 comments on commit 6369bff

Please sign in to comment.