Initial commit

This commit is contained in:
2026-02-23 08:51:37 -05:00
commit 757a132930
25 changed files with 1780 additions and 0 deletions

6
aurac_lexer/Cargo.toml Normal file
View File

@@ -0,0 +1,6 @@
[package]
name = "aurac_lexer"
version = "0.1.0"
edition = "2021"
[dependencies]

203
aurac_lexer/src/lexer.rs Normal file
View File

@@ -0,0 +1,203 @@
use crate::token::{Span, Token, TokenKind};
use std::str::CharIndices;
pub struct Lexer<'a> {
input: &'a str,
chars: CharIndices<'a>,
current: Option<(usize, char)>,
line: usize,
column: usize,
indent_stack: Vec<usize>,
pending_dedents: usize,
emitted_eof: bool,
}
impl<'a> Lexer<'a> {
pub fn new(input: &'a str) -> Self {
let mut chars = input.char_indices();
let current = chars.next();
Self {
input,
chars,
current,
line: 1,
column: 1,
indent_stack: vec![0], // base level indentation
pending_dedents: 0,
emitted_eof: false,
}
}
fn advance(&mut self) -> Option<(usize, char)> {
let result = self.current;
if let Some((_, c)) = result {
if c == '\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
}
self.current = self.chars.next();
result
}
fn peek(&self) -> Option<(usize, char)> {
self.current
}
pub fn next_token(&mut self) -> Token<'a> {
if self.pending_dedents > 0 {
self.pending_dedents -= 1;
return Token::new(
TokenKind::Dedent,
Span { line: self.line, column: 1, offset: self.current.map(|(o,_)| o).unwrap_or(self.input.len()), len: 0 }
);
}
if let Some((offset, c)) = self.current {
// Indentation mapping at the start of lines
if self.column == 1 && c != '\n' && c != '\r' {
let mut spaces = 0;
while let Some((_, pc)) = self.current {
if pc == ' ' {
spaces += 1;
self.advance();
} else {
break;
}
}
if self.current.map_or(false, |(_, c)| c == '\n' || c == '\r') {
// Empty/whitespace-only line: proceed to standard token matching
// which will hit the '\n' matcher below.
} else {
let current_indent = *self.indent_stack.last().unwrap_or(&0);
if spaces > current_indent {
self.indent_stack.push(spaces);
return Token::new(
TokenKind::Indent,
Span { line: self.line, column: 1, offset, len: spaces }
);
} else if spaces < current_indent {
let mut dedents = 0;
while let Some(&last) = self.indent_stack.last() {
if last > spaces {
self.indent_stack.pop();
dedents += 1;
} else {
break;
}
}
if dedents > 0 {
self.pending_dedents = dedents - 1;
return Token::new(
TokenKind::Dedent,
Span { line: self.line, column: 1, offset, len: spaces }
);
}
}
}
}
// Normal matching path
let (start_offset, c) = self.advance().unwrap();
let start_col = self.column - 1;
match c {
' ' | '\r' => self.next_token(),
'\n' => Token::new(TokenKind::Newline, Span { line: self.line - 1, column: start_col, offset: start_offset, len: 1 }),
':' => {
if self.peek().map(|(_, pc)| pc) == Some(':') {
let _ = self.advance();
Token::new(TokenKind::DoubleColon, Span { line: self.line, column: start_col, offset: start_offset, len: 2 })
} else {
Token::new(TokenKind::Colon, Span { line: self.line, column: start_col, offset: start_offset, len: 1 })
}
}
'-' => {
if self.peek().map(|(_, pc)| pc) == Some('>') {
let _ = self.advance();
Token::new(TokenKind::Arrow, Span { line: self.line, column: start_col, offset: start_offset, len: 2 })
} else {
Token::new(TokenKind::Minus, Span { line: self.line, column: start_col, offset: start_offset, len: 1 })
}
}
'+' => Token::new(TokenKind::Plus, Span { line: self.line, column: start_col, offset: start_offset, len: 1 }),
'*' => Token::new(TokenKind::Star, Span { line: self.line, column: start_col, offset: start_offset, len: 1 }),
'/' => Token::new(TokenKind::Slash, Span { line: self.line, column: start_col, offset: start_offset, len: 1 }),
'(' => Token::new(TokenKind::OpenParen, Span { line: self.line, column: start_col, offset: start_offset, len: 1 }),
')' => Token::new(TokenKind::CloseParen, Span { line: self.line, column: start_col, offset: start_offset, len: 1 }),
'{' => Token::new(TokenKind::OpenBrace, Span { line: self.line, column: start_col, offset: start_offset, len: 1 }),
'}' => Token::new(TokenKind::CloseBrace, Span { line: self.line, column: start_col, offset: start_offset, len: 1 }),
',' => Token::new(TokenKind::Comma, Span { line: self.line, column: start_col, offset: start_offset, len: 1 }),
'=' => {
if self.peek().map(|(_, pc)| pc) == Some('=') {
let _ = self.advance();
Token::new(TokenKind::EqualEqual, Span { line: self.line, column: start_col, offset: start_offset, len: 2 })
} else {
Token::new(TokenKind::Equal, Span { line: self.line, column: start_col, offset: start_offset, len: 1 })
}
}
'|' => Token::new(TokenKind::Pipe, Span { line: self.line, column: start_col, offset: start_offset, len: 1 }),
'>' => Token::new(TokenKind::Greater, Span { line: self.line, column: start_col, offset: start_offset, len: 1 }),
'<' => Token::new(TokenKind::Less, Span { line: self.line, column: start_col, offset: start_offset, len: 1 }),
_ if c.is_alphabetic() => {
while let Some((_, pc)) = self.peek() {
if pc.is_alphanumeric() || pc == '_' {
self.advance();
} else {
break;
}
}
let end_offset = self.current.map(|(o, _)| o).unwrap_or(self.input.len());
let ident_str = &self.input[start_offset..end_offset];
let kind = match ident_str {
"struct" => TokenKind::Struct,
"fn" => TokenKind::Fn,
"pure" => TokenKind::Pure,
"actor" => TokenKind::Actor,
"let" => TokenKind::Let,
"if" => TokenKind::If,
"else" => TokenKind::Else,
"match" => TokenKind::Match,
"return" => TokenKind::Return,
"type" => TokenKind::Type,
"gpu" => TokenKind::Gpu,
"i8" | "i16" | "i32" | "i64" | "u8" | "u16" | "u32" | "u64" | "f32" | "f64" | "bool" | "str" => TokenKind::BaseType(ident_str),
_ => TokenKind::Ident(ident_str),
};
Token::new(kind, Span { line: self.line, column: start_col, offset: start_offset, len: end_offset - start_offset })
}
_ if c.is_ascii_digit() => {
while let Some((_, pc)) = self.peek() {
if pc.is_ascii_digit() || pc == '.' {
self.advance();
} else {
break;
}
}
let end_offset = self.current.map(|(o, _)| o).unwrap_or(self.input.len());
let num_str = &self.input[start_offset..end_offset];
Token::new(TokenKind::Number(num_str), Span { line: self.line, column: start_col, offset: start_offset, len: end_offset - start_offset })
}
_ => Token::new(TokenKind::Error(c), Span { line: self.line, column: start_col, offset: start_offset, len: c.len_utf8() }),
}
} else {
if self.indent_stack.len() > 1 {
self.indent_stack.pop();
Token::new(TokenKind::Dedent, Span { line: self.line, column: self.column, offset: self.input.len(), len: 0 })
} else if !self.emitted_eof {
self.emitted_eof = true;
Token::new(TokenKind::Eof, Span { line: self.line, column: self.column, offset: self.input.len(), len: 0 })
} else {
Token::new(TokenKind::Eof, Span { line: self.line, column: self.column, offset: self.input.len(), len: 0 })
}
}
}
}

43
aurac_lexer/src/lib.rs Normal file
View File

@@ -0,0 +1,43 @@
pub mod lexer;
pub mod token;
#[cfg(test)]
mod tests {
use super::lexer::Lexer;
use super::token::TokenKind::*;
#[test]
fn test_struct_indentation() {
let input = "struct Position:\n x: f32\n y: f32\n";
let mut lexer = Lexer::new(input);
let expected_tokens = vec![
Struct,
Ident("Position"),
Colon,
Newline,
Indent,
Ident("x"),
Colon,
BaseType("f32"),
Newline,
Ident("y"),
Colon,
BaseType("f32"),
Newline,
Dedent,
Eof,
];
let mut actual_tokens = Vec::new();
loop {
let tok = lexer.next_token();
actual_tokens.push(tok.kind.clone());
if tok.kind == Eof {
break;
}
}
assert_eq!(actual_tokens, expected_tokens);
}
}

45
aurac_lexer/src/token.rs Normal file
View File

@@ -0,0 +1,45 @@
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct Span {
pub line: usize,
pub column: usize,
pub offset: usize,
pub len: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub enum TokenKind<'a> {
// Keywords
Struct, Fn, Pure, Actor, Let, If, Else, Match, Return, Type, Gpu,
// Identifiers and Literals bound to the input lifetime ('a) for zero-copy
Ident(&'a str),
Number(&'a str),
StringLit(&'a str),
// Base Types
BaseType(&'a str),
// Symbols & Operators
Colon, DoubleColon, Comma, Arrow, Equal, Pipe,
Plus, Minus, Star, Slash,
OpenParen, CloseParen, OpenBrace, CloseBrace, OpenAngle, CloseAngle,
Greater, Less, EqualEqual,
// Significant Whitespace
Indent, Dedent, Newline,
Eof,
Error(char),
}
#[derive(Debug, Clone, PartialEq)]
pub struct Token<'a> {
pub kind: TokenKind<'a>,
pub span: Span,
}
impl<'a> Token<'a> {
pub fn new(kind: TokenKind<'a>, span: Span) -> Self {
Self { kind, span }
}
}