Skip to content

Commit

Permalink
Add 3 vector shorthand
Browse files Browse the repository at this point in the history
  • Loading branch information
mustafaquraish committed Dec 1, 2024
1 parent 4ff1ed9 commit 63422c6
Show file tree
Hide file tree
Showing 14 changed files with 315 additions and 130 deletions.
140 changes: 51 additions & 89 deletions compiler/ast/nodes.oc
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,9 @@ enum ASTType {
Match
Defer
Specialization
ArrayLiteral
CreateClosure
ArrayLiteral
VectorLiteral // $[1, 2, 3]

// Used for heap-allocating the result of an expression
// @new XYZ => { let a = mem::alloc<typeof(XYZ)>(); *a = XYZ; yield a }
Expand All @@ -51,6 +52,49 @@ enum ASTType {
BinaryOp
}

union ASTUnion {
assertion: Assertion
binary: Binary
block: Block
bool_literal: bool
call: FuncCall
cast: Cast
ident: Identifier
if_stmt: IfStatement
import_path: Import
lookup: NSLookup
loop: Loop
member: Member
num_literal: NumLiteral
string_literal: str
char_literal: str
unary: Unary
operator_span: Span
var_decl: &Variable
fmt_str: FormatString
size_of_type: &Type
match_stmt: Match
spec: Specialization
array_literal: ArrayLiteral
vec_literal: VectorLiteral
child: &AST
ret: Return
closure: &Function
}

struct AST {
type: ASTType
span: Span
u: ASTUnion

// Resolved by type checker
etype: &Type
hint: &Type // For LSP
resolved_symbol: &Symbol
returns: bool
}


struct Variable {
sym: &Symbol
type: &Type
Expand Down Expand Up @@ -465,99 +509,17 @@ struct ArrayLiteral {
elements: &Vector<&AST>
}

struct VectorLiteral {
elements: &Vector<&AST>
vec_type: &Type // Type for Vector<X>
vec_struc: &Structure // Structure for Vector<X>
}

struct Return {
expr: &AST
return_span: Span
}

union ASTUnion {
assertion: Assertion
binary: Binary
block: Block
bool_literal: bool
call: FuncCall
cast: Cast
ident: Identifier
if_stmt: IfStatement
import_path: Import
lookup: NSLookup
loop: Loop
member: Member
num_literal: NumLiteral
string_literal: str
char_literal: str
unary: Unary
operator_span: Span
var_decl: &Variable
fmt_str: FormatString
size_of_type: &Type
match_stmt: Match
spec: Specialization
array_literal: ArrayLiteral
child: &AST
ret: Return
closure: &Function
}

struct AST {
type: ASTType
span: Span
u: ASTUnion

// Resolved by type checker
etype: &Type
hint: &Type // For LSP
resolved_symbol: &Symbol
returns: bool
}

// enum AST2 {
// span: Span
// etype: &Type = null
// hint: &Type = null
// resolved_symbol: &Symbol = null
// returns: bool = false

// Assert(assertion: Assertion)
// Block(block: Block)
// BoolLiteral(value: bool)
// Break
// Call(call: FuncCall)
// Continue
// Error
// Identifier(ident: Identifier)
// If(if_stmt: IfStatement)
// Import(import_path: Import)
// IntLiteral(value: NumLiteral)
// Member(member: Member)
// NSLookup(lookup: NSLookup)
// OverloadedOperator(symbol: &Symbol)
// Return(expr: &AST2)
// Yield(expr: &AST2)
// StringLiteral(value: str)
// SizeOf(type: &Type)
// VarDeclaration(var: &Variable)
// While(loop: Loop)
// For(loop: Loop)
// CharLiteral(value: str)
// FloatLiteral(value: NumLiteral)
// FormatStringLiteral(fmt_str: FormatString)
// Cast(cast: Cast)
// Null
// Match(match_stmt: Match)
// Defer(stmt: &AST2)
// Specialization(spec: Specialization)
// ArrayLiteral(arr: ArrayLiteral)
// CreateClosure(func: &Function)

// // Used for heap-allocating the result of an expression
// // @new XYZ => { let a = mem::alloc<typeof(XYZ)>(); *a = XYZ; yield a }
// CreateNew(expr: &AST2)

// UnaryOp(unop: Unary)
// BinaryOp(binop: Binary)
// }

def AST::new(type: ASTType, span: Span): &AST {
let ast = mem::alloc<AST>()
ast.type = type
Expand Down
1 change: 1 addition & 0 deletions compiler/ast/program.oc
Original file line number Diff line number Diff line change
Expand Up @@ -366,6 +366,7 @@ struct CachedSymbols {
fmt_string_fn: &Symbol
mem_alloc_fn: &Symbol
mem_allocator: &Symbol
std_vector: &Symbol
}

def Program::iter_namespaces(&this): NSIterator {
Expand Down
21 changes: 18 additions & 3 deletions compiler/ast/scopes.oc
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,13 @@ import std::mem
import std::span::{ Span, Location }
import std::map::Map
import std::vector::Vector
import std::buffer::{ Buffer }

import @ast::nodes::{ AST, Structure, Variable, Function }
import @ast::nodes::{ Enum, EnumVariant }
import @ast::program::Namespace
import @types::Type
import @lexer::{ is_valid_utf8_start }

enum SymbolType {
Function
Expand Down Expand Up @@ -143,11 +145,24 @@ def Symbol::new(type: SymbolType, ns: &Namespace, name: str, display: str, full_

// Replace invalid characters in full_name, since we codegen it to C
let full_name_len = full_name.len()
for let i = 0; i < full_name_len; i++ {
full_name = full_name.copy()
for let i = 0; i < full_name_len; {
let c = full_name[i]
if (i == 0 and c.is_digit()) or (not c.is_alnum() and c != '_') {
full_name[i] = '_'
let ln = 1
assert is_valid_utf8_start(c, &ln)
match ln {
1 => {
if (i == 0 and c.is_digit()) or (not c.is_alnum() and c != '_') {
full_name[i] = '_'
}
}
else => {
for let j = 0; j < ln; j++ {
full_name[i + j] = (('a' as u8) + (full_name[i + j] as u8)%26) as char
}
}
}
i += ln
}
item.full_name = full_name

Expand Down
2 changes: 2 additions & 0 deletions compiler/errors.oc
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,8 @@ def display_message_span(type: MessageType, span: Span, msg: str, line_after: bo
if line_no > max_line break
if line_no >= min_line {
print(f"{line_no:4d} | ")
// FIXME: Properly handle utf-8 characters; we can't count bytes as columns
if line_no == span.start.line {
let start_col = span.start.col - 1
let end_col = span.end.col - 1
Expand Down
55 changes: 43 additions & 12 deletions compiler/lexer.oc
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,25 @@ struct Lexer {
loc: Location
seen_newline: bool
tokens: &Vector<&Token>

errors: &Vector<&Error>

in_comment: bool
comment: Buffer
comment_start: Location
}

def Lexer::make(source: str, filename: str): Lexer {
def Lexer::make(source: str, filename: str, errors: &Vector<&Error> = null): Lexer {
let start_loc = Location(filename, 1, 1, 0)
if not errors? {
errors = Vector<&Error>::new()
}
return Lexer(
source,
source_len: source.len(),
i: 0,
loc: start_loc,
seen_newline: false,
tokens: Vector<&Token>::new(),
errors: Vector<&Error>::new(),
errors: errors,
in_comment: false,
comment: Buffer::make(),
comment_start: start_loc,
Expand Down Expand Up @@ -61,17 +62,43 @@ def Lexer::push_type(&this, type: TokenType, len: u32 = 1) {

def Lexer::cur(&this): char => .source[.i]

def is_valid_ident_char(c: char): bool {
// Allow unicode characters in identifiers
return c.is_alnum() or c == '_' or (c as u8 >> 7) & 1 == 1
}

def is_valid_utf8_start(c: char, out_sz: &u32 = null): bool {
let cu8 = c as u8
let sz = if {
cu8 & 0b10000000 == 0 => 1
cu8 & 0b11100000 == 0b11000000 => 2
cu8 & 0b11110000 == 0b11100000 => 3
cu8 & 0b11111000 == 0b11110000 => 4
else => 1
}
if out_sz? then *out_sz = sz
return true
}

def Lexer::inc(&this) {
match .cur() {
'\n' => {
let c = .cur()
let cu8 = c as u8
if {
c == '\n' => {
.loc.line += 1
.loc.col = 1
.seen_newline = true
.loc.index += 1
.i += 1
}
else => {
let ln = 1
assert is_valid_utf8_start(c, out_sz: &ln), "Invalid UTF-8 character"
.loc.col += 1
.loc.index += ln
.i += ln
}
else => .loc.col += 1
}
.i += 1
.loc.index += 1
}

def Lexer::peek(&this, offset: u32 = 1): char {
Expand Down Expand Up @@ -133,6 +160,7 @@ def Lexer::lex_string_literal(&this, has_seen_f: bool) {
.errors.push(Error::new(Span(.loc, .loc), "Unterminated string literal"))
}

let span = Span(start_loc, .loc)
match end_char == '`' or has_seen_f {
true => .push(Token::new(FormatStringLiteral, Span(start_loc, .loc), text))
false => .push(Token::new(StringLiteral, Span(start_loc, .loc), text))
Expand Down Expand Up @@ -293,6 +321,7 @@ def Lexer::lex(&this): &Vector<&Token> {
'|' => .push_type(Line)
'?' => .push_type(Question)
'~' => .push_type(Tilde)
'$' => .push_type(Dollar)
'.' => match .peek(1) == '.' and .peek(2) == '.' {
true => .push_type(Ellipsis, len: 3)
false => .push_type(Dot)
Expand Down Expand Up @@ -352,9 +381,9 @@ def Lexer::lex(&this): &Vector<&Token> {
.lex_raw_string_literal()
}
c.is_digit() => .lex_numeric_literal()
c.is_alpha() or c == '_' => {
is_valid_ident_char(c) and not c.is_digit() => {
let start = .i
while .cur().is_alnum() or .cur() == '_' {
while is_valid_ident_char(.cur()) {
.inc()
}
let len = .i - start
Expand All @@ -363,8 +392,10 @@ def Lexer::lex(&this): &Vector<&Token> {
.push(Token::from_ident(text, Span(start_loc, .loc)))
}
else => {
.errors.push(Error::new(Span(.loc, .loc), `Unrecognized char in lexer: '{c}'`))
println(``)
let start = .loc
.inc()
.errors.push(Error::new(Span(start, .loc), `Unrecognized char in lexer: '{c}'`))
}
}
}
Expand Down
4 changes: 4 additions & 0 deletions compiler/main.oc
Original file line number Diff line number Diff line change
Expand Up @@ -212,3 +212,7 @@ def main(argc: i32, argv: &str) {
if run_after_compile then run_executable(argc, argv)
}
}


/// Assorted Fix-mes:
// FIXME: `let 👀` does not properly highlighy `let`
Loading

0 comments on commit 63422c6

Please sign in to comment.