From cfa44907065eb10e3b990506881b30c5891f0af2 Mon Sep 17 00:00:00 2001 From: Mica White Date: Sun, 7 Dec 2025 14:25:35 -0500 Subject: First commit --- .gitignore | 2 + Cargo.toml | 8 + rustfmt.toml | 3 + src/ast.rs | 343 +++++++++++++++++++++++++++++++ src/builtins.rs | 300 +++++++++++++++++++++++++++ src/builtins/lists.rs | 170 ++++++++++++++++ src/builtins/numbers.rs | 227 +++++++++++++++++++++ src/builtins/pairlists.rs | 66 ++++++ src/interpreter.rs | 293 +++++++++++++++++++++++++++ src/ir.rs | 294 +++++++++++++++++++++++++++ src/lib.rs | 5 + src/tokens.rs | 501 ++++++++++++++++++++++++++++++++++++++++++++++ 12 files changed, 2212 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 rustfmt.toml create mode 100644 src/ast.rs create mode 100644 src/builtins.rs create mode 100644 src/builtins/lists.rs create mode 100644 src/builtins/numbers.rs create mode 100644 src/builtins/pairlists.rs create mode 100644 src/interpreter.rs create mode 100644 src/ir.rs create mode 100644 src/lib.rs create mode 100644 src/tokens.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..96ef6c0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..b716b6d --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "delsh" +version = "0.1.0" +edition = "2021" + +[dependencies] +rust_decimal = { version = "1", features=["maths"] } +snob = "0.1.0" diff --git a/rustfmt.toml b/rustfmt.toml new file mode 100644 index 0000000..d3d0d1e --- /dev/null +++ b/rustfmt.toml @@ -0,0 +1,3 @@ +edition = "2024" +hard_tabs = true +newline_style = "Unix" diff --git a/src/ast.rs b/src/ast.rs new file mode 100644 index 0000000..5584975 --- /dev/null +++ b/src/ast.rs @@ -0,0 +1,343 @@ +use std::iter::Peekable; +use std::sync::Arc; + +use rust_decimal::Decimal; + +use crate::tokens::{Lexer, Token}; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Program { + pub commands: Arc<[Command]>, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Command { + Statement(Statement), + Expression(Expression), + PanicMode(Arc<[Token]>), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Statement { + pub function_name: Arc, + pub function_token: Token, + pub args: Arc<[Expression]>, + pub newline: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Expression { + pub prefix: Arc<[Token]>, + pub suffix: ExpressionSuffix, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ExpressionSuffix { + Nothing, + Number { value: Decimal, token: Token }, + String { value: Arc, token: Token }, + Identifier { name: Arc, token: Token }, + List(List), +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct List { + pub left_paren: Token, + pub items: Arc<[ListItem]>, + pub right_paren: Option, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ListItem { + Dot(Token), + Expression(Expression), +} + +impl Command { + pub fn is_empty(&self) -> bool { + if let Self::PanicMode(tokens) = self { + tokens.is_empty() + } else { + false + } + } + + pub fn expression(&self) -> Option<&Expression> { + if let Self::Expression(expr) = self { + Some(expr) + } else { + None + } + } + + pub fn statement(&self) -> Option<&Statement> { + if let Self::Statement(statement) = self { + Some(statement) + } else { + None + } + } +} + +impl Expression { + pub fn is_quoted(&self) -> bool { + self.prefix.len() == 1 && self.prefix[0].is_apostrophe() + } +} + +impl ExpressionSuffix { + pub fn is_empty(&self) -> bool { + matches!(self, Self::Nothing) + } + + pub fn number(&self) -> Option<&Decimal> { + if let Self::Number { value, .. } = self { + Some(value) + } else { + None + } + } + + pub fn string(&self) -> Option<&Arc> { + if let Self::String { value, .. } = self { + Some(value) + } else { + None + } + } + + pub fn identifier(&self) -> Option<&Arc> { + if let Self::Identifier { name, .. } = self { + Some(name) + } else { + None + } + } + + pub fn list(&self) -> Option<&List> { + if let Self::List(list) = self { + Some(list) + } else { + None + } + } +} + +impl List { + pub fn contains_dot(&self) -> bool { + self.items.iter().any(|item| item.dot().is_some()) + } +} + +impl ListItem { + pub fn dot(&self) -> Option<&Token> { + if let Self::Dot(token) = self { + Some(token) + } else { + None + } + } + + pub fn expression(&self) -> Option<&Expression> { + if let Self::Expression(expression) = self { + Some(expression) + } else { + None + } + } +} + +fn parse_many( + lexer: &mut Peekable, + predicate: fn(&Token) -> bool, + parser: fn(&mut Peekable) -> Result, +) -> Result, String> { + let mut items = Vec::new(); + while let Some(next) = lexer.peek() { + if !predicate(next) { + break; + } + + items.push(parser(lexer)?) + } + + Ok(items) +} + +fn parse_if( + lexer: &mut Peekable, + predicate: fn(&Token) -> bool, + parser: fn(&mut Peekable) -> Result, +) -> Option> { + let next = lexer.peek()?; + predicate(next).then(|| parser(lexer)) +} + +fn skip_whitespace_and_comments(lexer: &mut Peekable) { + while lexer + .next_if(|token| token.is_comment() || token.is_whitespace()) + .is_some() + {} +} + +fn skip_spaces_and_comments(lexer: &mut Peekable) { + while lexer + .next_if(|token: &Token| { + (token.is_whitespace() && !token.contains_newline()) || token.is_comment() + }) + .is_some() + {} +} + +fn skip_non_expression_tokens(lexer: &mut Peekable) -> Vec { + let mut tokens = Vec::new(); + while let Some(token) = lexer.next_if(|token| !begins_expression(token)) { + tokens.push(token); + } + + tokens +} + +fn begins_expression(token: &Token) -> bool { + match &token.ty { + crate::tokens::TokenType::Whitespace(_) => false, + crate::tokens::TokenType::LineComment(_) => false, + crate::tokens::TokenType::BlockComment { .. } => false, + crate::tokens::TokenType::LeftParenthesis => true, + crate::tokens::TokenType::RightParenthesis => false, + crate::tokens::TokenType::Apostrophe => true, + crate::tokens::TokenType::Pound => true, + crate::tokens::TokenType::Dot => false, + crate::tokens::TokenType::Identifier(_) => true, + crate::tokens::TokenType::String { .. } => true, + crate::tokens::TokenType::Number(_) => true, + } +} + +fn begins_list_item(token: &Token) -> bool { + begins_expression(token) || token.is_dot() +} + +pub fn parse_program(lexer: &mut Peekable) -> Result { + skip_whitespace_and_comments(lexer); + let mut commands = parse_many(lexer, |_| true, parse_command)?; + if commands.last().map(|c| c.is_empty()).unwrap_or(false) { + commands.pop(); + } + + let commands = commands.into(); + Ok(Program { commands }) +} + +fn parse_command(lexer: &mut Peekable) -> Result { + skip_whitespace_and_comments(lexer); + if let Some(result) = parse_if(lexer, |token| token.is_identifier(), parse_statement) { + result.map(Command::Statement) + } else if let Some(result) = parse_if(lexer, begins_expression, parse_expression) { + result.map(Command::Expression) + } else { + Ok(Command::PanicMode(skip_non_expression_tokens(lexer).into())) + } +} + +fn parse_statement(lexer: &mut Peekable) -> Result { + skip_whitespace_and_comments(lexer); + let function_token = lexer + .next() + .ok_or_else(|| String::from("expected a function name"))?; + let function_name = function_token + .identifier() + .ok_or_else(|| String::from("expected the function name to be an identifier"))? + .into(); + skip_spaces_and_comments(lexer); + let args = parse_many(lexer, begins_expression, parse_expression)?.into(); + skip_spaces_and_comments(lexer); + let newline = lexer.next_if(|token| token.contains_newline()); + + Ok(Statement { + function_name, + function_token, + args, + newline, + }) +} + +fn parse_expression(lexer: &mut Peekable) -> Result { + skip_whitespace_and_comments(lexer); + let prefix = parse_many( + lexer, + |token| token.is_pound() || token.is_apostrophe(), + |parser| { + let result = parser.next().ok_or(String::new()); + skip_whitespace_and_comments(parser); + result + }, + )? + .into(); + skip_whitespace_and_comments(lexer); + let suffix = parse_expression_suffix(lexer)?; + skip_spaces_and_comments(lexer); + + Ok(Expression { prefix, suffix }) +} + +fn parse_expression_suffix(lexer: &mut Peekable) -> Result { + skip_whitespace_and_comments(lexer); + if let Some(list) = parse_if(lexer, |token| token.is_left_parenthesis(), parse_list) { + list.map(ExpressionSuffix::List) + } else if let Some(identifier) = lexer.next_if(|token| token.is_identifier()) { + Ok(ExpressionSuffix::Identifier { + name: identifier + .identifier() + .ok_or_else(|| String::from("we just checked for an identifier"))? + .into(), + token: identifier, + }) + } else if let Some(string) = lexer.next_if(|token| token.is_string()) { + Ok(ExpressionSuffix::String { + value: string + .computed_string() + .ok_or_else(|| String::from("we just checked for an string"))? + .into(), + token: string, + }) + } else if let Some(number) = lexer.next_if(|token| token.is_number()) { + Ok(ExpressionSuffix::Number { + value: *number + .number() + .ok_or_else(|| String::from("we just checked for a number"))?, + token: number, + }) + } else { + Ok(ExpressionSuffix::Nothing) + } +} + +fn parse_list(lexer: &mut Peekable) -> Result { + skip_whitespace_and_comments(lexer); + let left_paren = lexer + .next_if(|token| token.is_left_parenthesis()) + .ok_or_else(|| String::from("Unexpected token. Expected a left parenthesis"))?; + skip_whitespace_and_comments(lexer); + let items = parse_many(lexer, begins_list_item, parse_list_item)?.into(); + skip_whitespace_and_comments(lexer); + let right_paren = lexer.next_if(|token| token.is_right_parenthesis()); + + Ok(List { + left_paren, + items, + right_paren, + }) +} + +fn parse_list_item(lexer: &mut Peekable) -> Result { + skip_whitespace_and_comments(lexer); + if let Some(dot) = lexer.next_if(|token| token.is_dot()) { + skip_whitespace_and_comments(lexer); + Ok(ListItem::Dot(dot)) + } else { + let result = parse_expression(lexer).map(ListItem::Expression); + skip_whitespace_and_comments(lexer); + result + } +} diff --git a/src/builtins.rs b/src/builtins.rs new file mode 100644 index 0000000..201cd89 --- /dev/null +++ b/src/builtins.rs @@ -0,0 +1,300 @@ +use std::sync::{Arc, LazyLock}; + +use crate::interpreter::{self, Interpreter, Value, wrap_in_quote}; + +mod lists; +mod numbers; +mod pairlists; + +pub use lists::*; +pub use numbers::*; +pub use pairlists::*; + +pub static T: LazyLock> = LazyLock::new(|| Arc::new(Value::Identifier("T".into()))); +pub static F: LazyLock> = LazyLock::new(|| Arc::new(Value::Identifier("F".into()))); +pub static NIL: LazyLock> = LazyLock::new(|| Arc::new(Value::Identifier("NIL".into()))); + +fn delsh_bool(value: bool) -> Arc { + match value { + true => T.clone(), + false => NIL.clone(), + } +} + +pub fn quote(_interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + args[0].clone() +} + +pub fn eval(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let form = interpreter.eval(args[0].clone()); + let variables = args + .get(1) + .map(|value| interpreter.eval(value.clone()).list().expect("a pair list")); + + interpreter.push_frame(); + + if let Some(variables) = variables { + for variable in variables { + let Some((name, value)) = variable.pair() else { + continue; + }; + let Some(name) = name.string() else { + continue; + }; + interpreter.set_atom(name.clone(), value.clone()); + } + } + + let result = interpreter.eval(form); + + interpreter.pop_frame(); + result +} + +pub fn apply(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let function = interpreter.eval(args[0].clone()); + let arguments = interpreter + .eval(args[1].clone()) + .list() + .expect("a list of arguments") + .into_iter() + .map(wrap_in_quote) + .collect::>(); + let variables = args.get(2).map(|value| { + interpreter + .eval(value.clone()) + .list() + .expect("a list of variables") + }); + + interpreter.push_frame(); + + if let Some(variables) = variables { + for variable in variables { + let Some((name, value)) = variable.pair() else { + continue; + }; + let Some(name) = name.string() else { + continue; + }; + interpreter.set_atom(name.clone(), value.clone()); + } + } + + let result = interpreter.run_function(&function, &arguments); + + interpreter.pop_frame(); + result +} + +pub fn define(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let definitions = args[0].list().expect("a list of definitions"); + for definition in definitions { + let pair = definition.list().expect("a name-value pair"); + let Some(name) = pair[0].string() else { + continue; + }; + + let value = interpreter.eval(pair[1].clone()); + interpreter.set_atom(name.clone(), value); + } + + NIL.clone() +} + +pub fn set(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let lhs = args[0] + .identifier() + .expect("an identifier on the left side"); + let rhs = interpreter.eval(args[1].clone()); + interpreter.set_atom(lhs.clone(), rhs.clone()); + + rhs +} + +pub fn lambda(_interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + Arc::new(Value::DelshFn { + args: args[0] + .list() + .expect("a list of argument names") + .into_iter() + .filter_map(|value| value.identifier().cloned()) + .collect(), + command: args[1].clone(), + }) +} + +pub fn defun(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let Value::Identifier(function_name) = &*args[0] else { + return NIL.clone(); + }; + + let function = Value::DelshFn { + args: args[1] + .list() + .expect("a list of parameter names") + .into_iter() + .filter_map(|value| value.identifier().cloned()) + .collect(), + command: args[2].clone(), + }; + + let function = Arc::new(function); + interpreter.set_atom(function_name.clone(), function.clone()); + function +} + +pub fn do_function(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let Some(last_arg) = args.last() else { + return NIL.clone(); + }; + + for arg in &args[..args.len() - 1] { + interpreter.eval(arg.clone()); + } + + interpreter.eval(last_arg.clone()) +} + +pub fn if_function(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let condition = interpreter.eval(args[0].clone()); + let if_block = args[1].clone(); + let else_block = args[2].clone(); + + if condition != *NIL { + interpreter.eval(if_block) + } else { + interpreter.eval(else_block) + } +} + +pub fn cond_function(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + for case in args { + let case = case.list().expect("a list of cases"); + let condition = interpreter.eval(case[0].clone()); + if condition != *NIL { + let mut result = NIL.clone(); + for command in &case[1..] { + result = interpreter.eval(command.clone()); + } + return result; + } + } + + NIL.clone() +} + +pub fn while_function(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let condition = args[0].clone(); + let while_block = args[1].clone(); + + while interpreter.eval(condition.clone()) != NIL.clone() { + interpreter.eval(while_block.clone()); + } + + NIL.clone() +} + +pub fn loop_function(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let expression = args[0].clone(); + + loop { + interpreter.eval(expression.clone()); + } +} + +pub fn cons(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + Arc::new(Value::Pair( + interpreter.eval(args[0].clone()), + interpreter.eval(args[1].clone()), + )) +} + +pub fn ff(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + fn ff_inner(arg: Arc) -> Arc { + if let Value::Pair(a, _b) = &*arg { + ff_inner(a.clone()) + } else { + arg + } + } + + ff_inner(interpreter.eval(args[0].clone())) +} + +pub fn subst(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + fn subst_inner(replacement: Arc, pattern: Arc, full: Arc) -> Arc { + if full == pattern { + return replacement; + } + + if let Value::Pair(a, b) = &*full { + return Arc::new(Value::Pair( + subst_inner(replacement.clone(), pattern.clone(), a.clone()), + subst_inner(replacement, pattern, b.clone()), + )); + } + + full + } + + let arg1 = interpreter.eval(args[0].clone()); + let arg2 = interpreter.eval(args[1].clone()); + let arg3 = interpreter.eval(args[2].clone()); + + subst_inner(arg1, arg2, arg3) +} + +pub fn is_atom(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + match &*interpreter.eval(args[0].clone()) { + Value::Identifier(_) => T.clone(), + Value::Number(_) => T.clone(), + Value::String(_) => T.clone(), + Value::RustFn(_) => T.clone(), + Value::DelshFn { .. } => T.clone(), + Value::Pair(..) => NIL.clone(), + } +} + +pub fn is_nil(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let x = interpreter.eval(args[0].clone()); + + delsh_bool(x == *NIL) +} + +pub fn maplist(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let list = interpreter.eval(args[0].clone()).list().expect("a list"); + let mapper = interpreter.eval(args[1].clone()); + + let mapped = list + .into_iter() + .map(|i| interpreter.run_function(&mapper, &[i])) + .collect::>(); + interpreter::vec_to_value(&mapped) +} + +pub fn and(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + for arg in args { + let arg = interpreter.eval(arg.clone()); + if arg != *NIL { + return arg; + } + } + + NIL.clone() +} + +pub fn or(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + for arg in args { + let arg = interpreter.eval(arg.clone()); + if arg == *NIL { + return arg; + } + } + + NIL.clone() +} + +pub fn not(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + is_nil(interpreter, args) +} diff --git a/src/builtins/lists.rs b/src/builtins/lists.rs new file mode 100644 index 0000000..5ecf415 --- /dev/null +++ b/src/builtins/lists.rs @@ -0,0 +1,170 @@ +use std::{collections::HashSet, sync::Arc}; + +use rust_decimal::Decimal; + +use crate::{ + builtins::delsh_bool, + interpreter::{self, Interpreter, Value, vec_to_value}, +}; + +use super::NIL; + +fn car_inner(list: Arc) -> Arc { + let Value::Pair(a, _b) = &*list else { + panic!("expected a list") + }; + + a.clone() +} + +fn cdr_inner(list: Arc) -> Arc { + let Value::Pair(_a, b) = &*list else { + panic!("expected a list") + }; + + b.clone() +} + +pub fn is_member(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let pattern = interpreter.eval(args[0].clone()); + let full = interpreter.eval(args[1].clone()); + + fn is_among_inner(pattern: Arc, full: Arc) -> bool { + if pattern == full { + return true; + } + + if let Some((a, b)) = full.pair() { + return is_among_inner(pattern.clone(), a.clone()) + || is_among_inner(pattern, b.clone()); + } + + false + } + + delsh_bool(is_among_inner(pattern, full)) +} + +macro_rules! cr { + ($name: ident => $($cr: expr),*) => { + pub fn $name(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let list = interpreter.eval(args[0].clone()); + $(let list = $cr(list);)* + list + } + }; +} + +cr!(car => car_inner); +cr!(cdr => cdr_inner); +cr!(caar => car_inner, car_inner); +cr!(cadr => cdr_inner, car_inner); +cr!(cdar => car_inner, cdr_inner); +cr!(cddr => cdr_inner, cdr_inner); +cr!(caaar => car_inner, car_inner, car_inner); +cr!(caadr => cdr_inner, car_inner, car_inner); +cr!(cadar => car_inner, cdr_inner, car_inner); +cr!(caddr => cdr_inner, cdr_inner, car_inner); +cr!(cdaar => car_inner, car_inner, cdr_inner); +cr!(cdadr => cdr_inner, car_inner, cdr_inner); +cr!(cddar => car_inner, cdr_inner, cdr_inner); +cr!(cdddr => cdr_inner, cdr_inner, cdr_inner); + +macro_rules! index { + (fn $fn_name: ident => $index: expr) => { + pub fn $fn_name(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let list = interpreter + .eval(args[0].clone()) + .list() + .expect("a list to index"); + list[$index].clone() + } + }; +} + +index!(fn first => 0); +index!(fn second => 1); +index!(fn third => 2); +index!(fn fourth => 3); +index!(fn fifth => 4); +index!(fn sixth => 5); +index!(fn seventh => 6); +index!(fn eighth => 7); +index!(fn ninth => 8); +index!(fn tenth => 9); + +pub fn list(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + vec_to_value( + &(args + .iter() + .map(|a| interpreter.eval(a.clone())) + .collect::>()), + ) +} + +pub fn reverse(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let mut list = interpreter.eval(args[0].clone()).list().expect("a list"); + list.reverse(); + + interpreter::vec_to_value(&list) +} + +pub fn append(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let mut new_list = Vec::new(); + + for arg in args { + let arg = interpreter.eval(arg.clone()).list().expect("a list"); + new_list.extend(arg); + } + + vec_to_value(&new_list) +} + +pub fn length(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let list = interpreter.eval(args[0].clone()).list().expect("a list"); + Arc::new(Value::Number(Decimal::from(list.len()))) +} + +pub fn efface(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let element = interpreter.eval(args[0].clone()); + let mut list = interpreter.eval(args[1].clone()).list().expect("a list"); + + let Some(index_to_remove) = list.iter().position(|i| *i == element) else { + return NIL.clone(); + }; + + list.remove(index_to_remove); + interpreter::vec_to_value(&list) +} + +pub fn union(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let mut unique_values = HashSet::new(); + + for arg in args { + let arg = interpreter + .eval(arg.clone()) + .set() + .expect("all arguments to be lists"); + for value in arg { + unique_values.insert(value); + } + } + + interpreter::vec_to_value(&unique_values.into_iter().collect::>()) +} + +pub fn intersection(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let mut common_values = Vec::new(); + + for arg in args { + let arg = interpreter + .eval(arg.clone()) + .list() + .expect("all arguments to be lists"); + let arg = arg.into_iter().collect::>(); + + common_values.retain(|element| arg.contains(element)); + } + + interpreter::vec_to_value(&common_values.into_iter().collect::>()) +} diff --git a/src/builtins/numbers.rs b/src/builtins/numbers.rs new file mode 100644 index 0000000..86e159a --- /dev/null +++ b/src/builtins/numbers.rs @@ -0,0 +1,227 @@ +use std::sync::Arc; + +use rust_decimal::{Decimal, MathematicalOps}; + +use crate::interpreter::{Interpreter, Value}; + +use super::{NIL, delsh_bool}; + +pub fn is_number(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let arg = interpreter.eval(args[0].clone()); + delsh_bool(arg.number().is_some()) +} + +pub fn is_int(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let arg = interpreter.eval(args[0].clone()); + delsh_bool(arg.number().is_some_and(|num| num.is_integer())) +} + +pub fn is_equal(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let a = interpreter.eval(args[0].clone()); + let b = interpreter.eval(args[1].clone()); + + delsh_bool(a == b) +} + +pub fn is_less(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let a = *interpreter + .eval(args[0].clone()) + .number() + .expect("two numbers"); + let b = *interpreter + .eval(args[1].clone()) + .number() + .expect("two numbers"); + + delsh_bool(a < b) +} + +pub fn is_greater(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let a = *interpreter + .eval(args[0].clone()) + .number() + .expect("two numbers"); + let b = *interpreter + .eval(args[1].clone()) + .number() + .expect("two numbers"); + + delsh_bool(a > b) +} + +pub fn is_zero(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let arg = interpreter.eval(args[0].clone()); + delsh_bool(arg.number().is_some_and(|num| *num == Decimal::ZERO)) +} + +pub fn is_one(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let arg = interpreter.eval(args[0].clone()); + delsh_bool(arg.number().is_some_and(|num| *num == Decimal::ONE)) +} + +pub fn is_negative(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let arg = interpreter.eval(args[0].clone()); + delsh_bool(arg.number().is_some_and(|num| num.is_sign_negative())) +} + +pub fn plus(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let mut total = Decimal::ZERO; + for arg in args { + let arg = interpreter.eval(arg.clone()); + match &*arg { + Value::Number(num) => total += num, + _ => panic!("Expected a number"), + } + } + + Arc::new(Value::Number(total)) +} + +pub fn minus(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + if args.len() == 1 { + let Value::Number(arg) = &*interpreter.eval(args[0].clone()) else { + panic!("expected a number") + }; + Arc::new(Value::Number(-arg)) + } else if args.len() == 2 { + let Value::Number(a) = &*interpreter.eval(args[0].clone()) else { + panic!("expected a number") + }; + let Value::Number(b) = &*interpreter.eval(args[1].clone()) else { + panic!("expected a number") + }; + return Arc::new(Value::Number(a - b)); + } else { + panic!("expected one or two arguments") + } +} + +pub fn negate(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let Value::Number(arg) = &*interpreter.eval(args[0].clone()) else { + panic!("expected a number") + }; + + Arc::new(Value::Number(-arg)) +} + +pub fn times(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let mut total = Decimal::ONE; + + for arg in args { + let Value::Number(arg) = &*interpreter.eval(arg.clone()) else { + panic!("expected a number") + }; + + total *= arg; + } + + Arc::new(Value::Number(total)) +} + +pub fn divide(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let Value::Number(arg1) = &*interpreter.eval(args[0].clone()) else { + panic!("expected a number") + }; + let Value::Number(arg2) = &*interpreter.eval(args[1].clone()) else { + panic!("expected a number") + }; + + Arc::new(Value::Number(arg1 / arg2)) +} + +pub fn quotient(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let numerator = interpreter.eval(args[0].clone()); + let numerator = numerator.number().expect("two numbers"); + let denominator = interpreter.eval(args[1].clone()); + let denominator = denominator.number().expect("two numbers"); + + Arc::new(Value::Number((numerator / denominator).floor())) +} + +pub fn remainder(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let Value::Number(arg1) = &*interpreter.eval(args[0].clone()) else { + panic!("expected a number") + }; + let Value::Number(arg2) = &*interpreter.eval(args[1].clone()) else { + panic!("expected a number") + }; + + Arc::new(Value::Number(arg1 % arg2)) +} + +pub fn add1(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + Arc::new(Value::Number( + interpreter + .eval(args[0].clone()) + .number() + .expect("a number") + + Decimal::ONE, + )) +} + +pub fn sub1(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + Arc::new(Value::Number( + interpreter + .eval(args[0].clone()) + .number() + .expect("a number") + - Decimal::ONE, + )) +} + +pub fn max(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let max = interpreter.eval(args[0].clone()); + let mut max = *max.number().expect("all args to be numbers"); + + for arg in &args[1..] { + let arg = interpreter.eval(arg.clone()); + let arg = arg.number().expect("all args to be numbers"); + max = max.max(*arg); + } + + Arc::new(Value::Number(max)) +} + +pub fn min(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let min = interpreter.eval(args[0].clone()); + let mut min = *min.number().expect("all args to be numbers"); + + for arg in &args[1..] { + let arg = interpreter.eval(arg.clone()); + let arg = arg.number().expect("all args to be numbers"); + min = min.min(*arg); + } + + Arc::new(Value::Number(min)) +} + +pub fn recip(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let number = interpreter.eval(args[0].clone()); + let number = number.number().expect("a number"); + Arc::new(Value::Number(Decimal::ONE / number)) +} + +pub fn expt(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let x = *interpreter + .eval(args[0].clone()) + .number() + .expect("two numbers"); + let y = *interpreter + .eval(args[0].clone()) + .number() + .expect("two numbers"); + + Arc::new(Value::Number(x.powd(y))) +} + +pub fn sqrt(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let Value::Number(arg) = &*interpreter.eval(args[0].clone()) else { + panic!("expected a number") + }; + + let Some(sqrt) = arg.sqrt() else { + return NIL.clone(); + }; + + Arc::new(Value::Number(sqrt)) +} diff --git a/src/builtins/pairlists.rs b/src/builtins/pairlists.rs new file mode 100644 index 0000000..1c18810 --- /dev/null +++ b/src/builtins/pairlists.rs @@ -0,0 +1,66 @@ +use std::sync::Arc; + +use crate::interpreter::{self, Interpreter, Value}; + +pub fn pair(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let keys = interpreter + .eval(args[0].clone()) + .list() + .expect("a list of keys"); + let vals = interpreter + .eval(args[1].clone()) + .list() + .expect("a list of values"); + + let pairs = keys + .into_iter() + .zip(vals) + .map(|(key, value)| Arc::new(Value::Pair(key, value))) + .collect::>(); + + interpreter::vec_to_value(&pairs) +} + +fn assoc_inner(key: Arc, pairs: &[Arc]) -> Option> { + let get_value = |pair: &Arc| { + if let Value::Pair(k, v) = &**pair { + (k == &key).then(|| v.clone()) + } else { + None + } + }; + + pairs.iter().filter_map(get_value).next() +} + +pub fn assoc(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + let key = interpreter.eval(args[0].clone()); + let pairs = interpreter.eval(args[1].clone()).list().expect("a list"); + + assoc_inner(key, &pairs).unwrap() +} + +pub fn sublis(interpreter: &mut Interpreter, args: &[Arc]) -> Arc { + fn sublis_inner(expression: Arc, replacements: &[Arc]) -> Arc { + if let Some(value) = assoc_inner(expression.clone(), replacements) { + return value.clone(); + } + + if let Value::Pair(a, b) = &*expression { + return Arc::new(Value::Pair( + sublis_inner(a.clone(), replacements), + sublis_inner(b.clone(), replacements), + )); + } + + expression + } + + let expression = interpreter.eval(args[1].clone()); + let replacements = interpreter + .eval(args[0].clone()) + .list() + .expect("a list of replacements"); + + sublis_inner(expression, &replacements) +} diff --git a/src/interpreter.rs b/src/interpreter.rs new file mode 100644 index 0000000..264e634 --- /dev/null +++ b/src/interpreter.rs @@ -0,0 +1,293 @@ +use std::{ + collections::{HashMap, HashSet}, + fmt::Debug, + sync::Arc, +}; + +use rust_decimal::Decimal; + +use crate::{ + ast::{self, Expression, List}, + builtins::NIL, +}; + +type NativeFunction = fn(&mut Interpreter, &[Arc]) -> Arc; + +#[derive(Debug, Default)] +pub struct Interpreter { + stack: Vec, Arc>>, +} + +#[derive(Clone, PartialEq, Eq, Hash)] +pub enum Value { + Identifier(Arc), + Number(Decimal), + String(Arc), + Pair(Arc, Arc), + RustFn(NativeFunction), + DelshFn { + args: Arc<[Arc]>, + command: Arc, + }, +} + +impl Debug for Value { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + fn fmt_pair( + f: &mut std::fmt::Formatter<'_>, + a: Arc, + b: Arc, + ) -> std::fmt::Result { + a.fmt(f)?; + + if let Value::Pair(a, b) = &*b { + f.write_str(" ")?; + fmt_pair(f, a.clone(), b.clone()) + } else if b == NIL.clone() { + f.write_str(")") + } else { + f.write_str(" . ")?; + b.fmt(f)?; + f.write_str(")") + } + } + + match self { + Value::Identifier(identifier) => f.write_str(identifier), + Value::Number(number) => write!(f, "{number}"), + Value::String(string) => write!(f, "\"{string}\""), + Value::Pair(a, b) => { + f.write_str("(")?; + fmt_pair(f, a.clone(), b.clone()) + } + Value::RustFn(_) => f.write_str("builtin function"), + Value::DelshFn { .. } => f.write_str("function"), + } + } +} + +impl Value { + pub fn identifier(&self) -> Option<&Arc> { + if let Value::Identifier(identifier) = self { + Some(identifier) + } else { + None + } + } + + pub fn number(&self) -> Option<&Decimal> { + if let Value::Number(number) = self { + Some(number) + } else { + None + } + } + + pub fn string(&self) -> Option<&Arc> { + if let Value::String(string) = self { + Some(string) + } else { + None + } + } + + pub fn pair(&self) -> Option<(&Arc, &Arc)> { + if let Value::Pair(a, b) = self { + Some((a, b)) + } else { + None + } + } + + pub fn list(&self) -> Option>> { + let mut list = self; + let mut builder = Vec::new(); + loop { + if let Value::Pair(head, rest) = &list { + builder.push(head.clone()); + list = rest; + } else if list == &**NIL { + return Some(builder); + } else { + return None; + } + } + } + + pub fn set(&self) -> Option>> { + let mut head = self; + let mut builder = HashSet::new(); + loop { + if let Value::Pair(car, cdr) = &head { + builder.insert(car.clone()); + head = cdr; + } else if head == &**NIL { + return Some(builder); + } else { + return None; + } + } + } + + pub fn rust_fn(&self) -> Option<&NativeFunction> { + if let Value::RustFn(func) = self { + Some(func) + } else { + None + } + } + + pub fn is_function(&self) -> bool { + matches!(self, Value::RustFn(_) | Value::DelshFn { .. }) + } +} + +pub fn wrap_in_quote(value: Arc) -> Arc { + Arc::new(Value::Pair( + Arc::new(Value::Identifier("QUOTE".into())), + Arc::new(Value::Pair(value, NIL.clone())), + )) +} + +pub fn vec_to_value(list: &[Arc]) -> Arc { + if list.is_empty() { + NIL.clone() + } else if list.len() == 1 { + Arc::new(Value::Pair(list[0].clone(), NIL.clone())) + } else { + Arc::new(Value::Pair(list[0].clone(), vec_to_value(&list[1..]))) + } +} + +impl Interpreter { + pub fn new() -> Self { + Self { + stack: vec![HashMap::new()], + } + } + + fn current_stack_frame_mut(&mut self) -> &mut HashMap, Arc> { + self.stack + .last_mut() + .expect("there should always be at least one stack frame") + } + + pub fn push_frame(&mut self) { + self.stack.push(HashMap::new()); + } + + pub fn pop_frame(&mut self) { + if self.stack.len() == 1 { + return; + } + + self.stack.pop(); + } + + pub fn get_atom(&self, name: &str) -> Option> { + for frame in self.stack.iter().rev() { + if let Some(value) = frame.get(name) { + return Some(value.clone()); + } + } + + None + } + + pub fn set_atom(&mut self, name: Arc, value: Arc) { + self.current_stack_frame_mut() + .insert(name.to_uppercase().into(), value); + } + + pub fn run_function(&mut self, function: &Value, arguments: &[Arc]) -> Arc { + match function { + Value::RustFn(func) => func(self, arguments), + Value::DelshFn { args, command } => { + self.push_frame(); + for (name, value) in args.iter().zip(arguments) { + let value = self.eval(value.clone()); + self.set_atom(name.clone(), value); + } + + let return_value = self.eval(command.clone()); + self.pop_frame(); + return_value + } + _ => panic!("expected a function"), + } + } + + pub fn eval(&mut self, value: Arc) -> Arc { + match &*value { + Value::Identifier(identifier) => { + self.get_atom(identifier).expect("a defined atom").clone() + } + Value::Pair(function, arguments) => { + let function = self.eval(function.clone()); + let arguments = arguments.list().expect("arguments to be a list"); + self.run_function(&function, &arguments) + } + _ => value, + } + } + + fn ast_list_to_value(&mut self, list: &List) -> Arc { + let contains_dot = list.items.iter().any(|item| item.dot().is_some()); + let mut head = (!contains_dot).then(|| NIL.clone()); + for item in list.items.iter().rev() { + let Some(item) = item.expression() else { + continue; + }; + let Some(head) = head.as_mut() else { + head = Some(self.ast_expression_value(item)); + continue; + }; + + *head = Arc::new(Value::Pair(self.ast_expression_value(item), head.clone())); + } + + head.unwrap_or_else(|| NIL.clone()) + } + + pub fn ast_expression_value(&mut self, expr: &Expression) -> Arc { + let suffix_value = match &expr.suffix { + ast::ExpressionSuffix::Nothing => NIL.clone(), + ast::ExpressionSuffix::Number { value, .. } => Arc::new(Value::Number(*value)), + ast::ExpressionSuffix::String { value, .. } => Arc::new(Value::String(value.clone())), + ast::ExpressionSuffix::Identifier { name, .. } => { + Arc::new(Value::Identifier(name.clone())) + } + ast::ExpressionSuffix::List(list) => self.ast_list_to_value(list), + }; + + if expr.is_quoted() { + wrap_in_quote(suffix_value) + } else { + suffix_value + } + } + + pub fn run_ast_command(&mut self, command: &ast::Command) -> Arc { + match command { + ast::Command::Statement(statement) => { + let function_name = &statement.function_name; + let function = self + .get_atom(function_name) + .expect("expected a defined value") + .clone(); + let arguments = statement + .args + .iter() + .map(|arg| self.ast_expression_value(arg)) + .collect::>(); + + self.run_function(&function, &arguments) + } + ast::Command::Expression(expression) => { + let value = self.ast_expression_value(expression); + self.eval(value) + } + ast::Command::PanicMode(_) => NIL.clone(), + } + } +} diff --git a/src/ir.rs b/src/ir.rs new file mode 100644 index 0000000..eb08775 --- /dev/null +++ b/src/ir.rs @@ -0,0 +1,294 @@ +use rust_decimal::Decimal; + +use crate::{ + ast::{Command, Expression, ExpressionSuffix, List, ListItem, Program, Statement}, + tokens::{Token, TokenType}, +}; + +#[derive(Debug, Clone, PartialEq)] +pub enum Ty { + Any, + Union(Vec), + Pair(Box, Box), + Function { + args: Vec, + return_ty: Box, + }, + String { + value: Option, + }, + Number { + minimum: Option, + maximum: Option, + }, + Atom { + value: Option, + }, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct Register { + id: usize, + ty: Ty, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum IrStatement { + LoadImmediateNumber { + destination: Register, + immediate: Decimal, + }, + LoadImmediateString { + destination: Register, + immediate: String, + }, + LoadImmediateAtom { + destination: Register, + immediate: String, + }, + CreateConsPair { + destination: Register, + left: Register, + right: Register, + }, + Copy { + destination: Register, + source: Register, + }, + EvaluateAtom { + destination: Register, + atom: Register, + }, + CallFunction { + destination: Register, + function: Register, + args: Vec, + }, +} + +pub struct Scope { + registers: Vec, + statements: Vec, +} + +fn new_register(scope: &mut Scope, ty: Ty) -> Register { + let id = scope.registers.len(); + let register = Register { id, ty }; + scope.registers.push(register.clone()); + register +} + +pub fn compile_program(program: &Program) -> Scope { + let mut scope = Scope { + registers: Vec::new(), + statements: Vec::new(), + }; + + for command in &*program.commands { + compile_command(&mut scope, command); + } + + scope +} + +fn compile_command(scope: &mut Scope, command: &Command) { + match command { + Command::Statement(statement) => compile_statement(scope, statement), + Command::Expression(_) => unimplemented!("top-level lists are not currently supported"), + }; +} + +fn compile_statement(scope: &mut Scope, statement: &Statement) { + let args = statement + .args + .iter() + .map(|expr| compile_expression(scope, expr)) + .collect(); + + let function_atom = new_register(scope, Ty::Atom { value: None }); + let function = new_register(scope, Ty::Any); + let destination = new_register(scope, Ty::Any); + + let TokenType::Identifier(function_name) = &statement.function_token.ty else { + panic!("expected a function name") + }; + + scope.statements.push(IrStatement::LoadImmediateAtom { + destination: function_atom.clone(), + immediate: function_name.to_string(), + }); + + scope.statements.push(IrStatement::EvaluateAtom { + destination: function.clone(), + atom: function_atom, + }); + + scope.statements.push(IrStatement::CallFunction { + destination, + function, + args, + }); +} + +fn compile_expression(scope: &mut Scope, expression: &Expression) -> Register { + let expression_suffix = compile_expression_suffix(scope, &expression.suffix); + + if expression.is_quoted() { + let head = new_register( + scope, + Ty::Pair( + Box::new(Ty::Function { + args: vec![Ty::Any], + return_ty: Box::new(Ty::Any), + }), + Box::new(Ty::Any), + ), + ); + let tail = new_register(scope, Ty::Any); + let quote = new_register( + scope, + Ty::Atom { + value: Some("QUOTE".to_string()), + }, + ); + let null = new_register( + scope, + Ty::Atom { + value: Some("NIL".to_string()), + }, + ); + + scope.statements.push(IrStatement::LoadImmediateAtom { + destination: quote.clone(), + immediate: "QUOTE".to_string(), + }); + scope.statements.push(IrStatement::LoadImmediateAtom { + destination: null.clone(), + immediate: "NIL".to_string(), + }); + scope.statements.push(IrStatement::CreateConsPair { + destination: tail.clone(), + left: expression_suffix, + right: null, + }); + scope.statements.push(IrStatement::CreateConsPair { + destination: head.clone(), + left: quote, + right: tail, + }); + + head + } else { + expression_suffix + } +} + +fn compile_expression_suffix(scope: &mut Scope, suffix: &ExpressionSuffix) -> Register { + match suffix { + ExpressionSuffix::Nothing => panic!("invalid value"), + ExpressionSuffix::Atom(atom) => compile_atom(scope, atom), + ExpressionSuffix::List(list) => compile_list(scope, list), + } +} + +fn compile_atom(scope: &mut Scope, suffix: &Token) -> Register { + match &suffix.ty { + TokenType::Identifier(atom) => { + let register = new_register( + scope, + Ty::Atom { + value: Some(atom.to_string()), + }, + ); + + scope.statements.push(IrStatement::LoadImmediateAtom { + destination: register.clone(), + immediate: atom.to_string(), + }); + + register + } + TokenType::String { + content, + terminated: _, + } => { + let register = new_register( + scope, + Ty::String { + value: Some(content.to_string()), + }, + ); + + scope.statements.push(IrStatement::LoadImmediateString { + destination: register.clone(), + immediate: content.to_string(), + }); + + register + } + TokenType::Number(number) => { + let register = new_register( + scope, + Ty::Number { + minimum: Some(*number), + maximum: Some(*number), + }, + ); + + scope.statements.push(IrStatement::LoadImmediateNumber { + destination: register.clone(), + immediate: *number, + }); + + register + } + _ => { + panic!("expected a string, number, or identifier"); + } + } +} + +fn compile_list(scope: &mut Scope, list: &List) -> Register { + let contains_dot = list + .items + .iter() + .any(|item| matches!(item, ListItem::Dot(_))); + let mut head = (!contains_dot).then(|| { + let register = new_register( + scope, + Ty::Atom { + value: Some("NIL".to_string()), + }, + ); + scope.statements.push(IrStatement::LoadImmediateAtom { + destination: register.clone(), + immediate: "NIL".to_string(), + }); + register + }); + + for item in list.items.iter().rev() { + let ListItem::Expression(item) = item else { + continue; + }; + let Some(head) = head.as_mut() else { + head = Some(compile_expression(scope, item)); + continue; + }; + + let expression = compile_expression(scope, item); + let register = new_register( + scope, + Ty::Pair(Box::new(expression.ty.clone()), Box::new(head.ty.clone())), + ); + + scope.statements.push(IrStatement::CreateConsPair { + destination: register.clone(), + left: expression, + right: head.clone(), + }); + *head = register; + } + + head.expect("list should be non-empty") +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..7f3fbdb --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,5 @@ +pub mod ast; +pub mod builtins; +pub mod interpreter; +//pub mod ir; +pub mod tokens; diff --git a/src/tokens.rs b/src/tokens.rs new file mode 100644 index 0000000..4654ddf --- /dev/null +++ b/src/tokens.rs @@ -0,0 +1,501 @@ +use std::fmt::Display; +use std::sync::Arc; + +use rust_decimal::{Decimal, MathematicalOps}; +use snob::csets; + +#[derive(Debug)] +pub struct Lexer { + scanner: snob::Scanner, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct Token { + pub span: Span, + pub ty: TokenType, +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct Span { + pub start: usize, + pub end: usize, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum TokenType { + Whitespace(Arc), + LineComment(Arc), + BlockComment { comment: Arc, terminated: bool }, + + LeftParenthesis, + RightParenthesis, + + Apostrophe, + Pound, + Dot, + + Identifier(Arc), + String { content: Arc, terminated: bool }, + Number(Decimal), +} + +impl Display for TokenType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Self::Whitespace(string) => f.write_str(string), + Self::LineComment(string) => f.write_str(string), + Self::BlockComment { + comment, + terminated: _, + } => f.write_str(comment), + Self::LeftParenthesis => f.write_str("("), + Self::RightParenthesis => f.write_str(")"), + Self::Apostrophe => f.write_str("'"), + Self::Pound => f.write_str("#"), + Self::Dot => f.write_str("."), + Self::Identifier(ident) => f.write_str(ident), + Self::String { + content, + terminated: _, + } => f.write_str(content), + Self::Number(number) => f.write_str(&number.to_string()), + } + } +} + +impl Display for Token { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.ty.fmt(f) + } +} + +impl Token { + fn new(start: usize, end: usize, ty: TokenType) -> Self { + Self { + span: Span { start, end }, + ty, + } + } + + pub fn is_whitespace(&self) -> bool { + matches!(self.ty, TokenType::Whitespace(_)) + } + + pub fn contains_newline(&self) -> bool { + if let TokenType::Whitespace(space) = &self.ty { + space.contains('\n') + } else { + false + } + } + + pub fn is_comment(&self) -> bool { + matches!( + self.ty, + TokenType::LineComment(_) | TokenType::BlockComment { .. } + ) + } + + pub fn is_left_parenthesis(&self) -> bool { + self.ty == TokenType::LeftParenthesis + } + + pub fn is_right_parenthesis(&self) -> bool { + self.ty == TokenType::RightParenthesis + } + + pub fn is_apostrophe(&self) -> bool { + self.ty == TokenType::Apostrophe + } + + pub fn is_pound(&self) -> bool { + self.ty == TokenType::Pound + } + + pub fn is_dot(&self) -> bool { + self.ty == TokenType::Dot + } + + pub fn is_identifier(&self) -> bool { + self.raw_identifier().is_some() + } + + pub fn is_number(&self) -> bool { + self.number().is_some() + } + + pub fn is_string(&self) -> bool { + self.raw_string().is_some() + } + + pub fn raw_identifier(&self) -> Option<&str> { + if let TokenType::Identifier(identifier) = &self.ty { + Some(identifier) + } else { + None + } + } + + pub fn identifier(&self) -> Option { + Some(self.raw_identifier()?.to_uppercase()) + } + + pub fn number(&self) -> Option<&Decimal> { + if let TokenType::Number(number) = &self.ty { + Some(number) + } else { + None + } + } + + pub fn raw_string(&self) -> Option<&str> { + if let TokenType::String { content, .. } = &self.ty { + Some(content) + } else { + None + } + } + + pub fn computed_string(&self) -> Option { + enum State { + Default, + Backslash, + Unicode { + remaining_chars: usize, + current: u32, + }, + } + + fn handle_default_state(char: char, computed: &mut String, state: &mut State) { + if char == '\\' { + *state = State::Backslash; + } else { + computed.push(char); + } + } + + fn handle_backslash_state(char: char, computed: &mut String, state: &mut State) { + match char { + 'u' => { + *state = State::Unicode { + remaining_chars: 6, + current: 0, + } + } + 'x' => { + *state = State::Unicode { + remaining_chars: 2, + current: 0, + } + } + 'n' => { + *state = State::Default; + computed.push('\n'); + } + 't' => { + *state = State::Default; + computed.push('\t'); + } + _ => { + *state = State::Default; + computed.push(char); + } + } + } + + fn handle_unicode_state( + char: char, + remaining_chars: usize, + current: u32, + computed: &mut String, + state: &mut State, + ) { + let digit = match char { + '0' => 0, + '1' => 1, + '2' => 2, + '3' => 3, + '4' => 4, + '5' => 5, + '6' => 6, + '7' => 7, + '8' => 8, + '9' => 9, + 'a' | 'A' => 10, + 'b' | 'B' => 11, + 'c' | 'C' => 12, + 'd' | 'D' => 13, + 'e' | 'E' => 14, + 'f' | 'F' => 15, + _ => { + *state = State::Default; + return; + } + }; + + if remaining_chars == 0 { + let charcode = current * 16 + digit; + computed.push(char::from_u32(charcode).unwrap_or(char::REPLACEMENT_CHARACTER)); + *state = State::Default; + } else { + *state = State::Unicode { + remaining_chars: remaining_chars - 1, + current: current * 16 + digit, + }; + } + } + + let TokenType::String { content, .. } = &self.ty else { + return None; + }; + + let mut computed = String::new(); + let mut state = State::Default; + for char in content.chars() { + match state { + State::Default => handle_default_state(char, &mut computed, &mut state), + State::Backslash => handle_backslash_state(char, &mut computed, &mut state), + State::Unicode { + remaining_chars, + current, + } => handle_unicode_state(char, remaining_chars, current, &mut computed, &mut state), + } + } + + Some(computed) + } +} + +impl Lexer { + pub fn new(str: &str) -> Self { + Self { + scanner: snob::Scanner::new(str), + } + } + + fn goto(&mut self, position: usize) -> String { + self.scanner + .goto(position) + .expect("The position should be valid") + } + + fn simple_token(&self, start: usize, ty: TokenType) -> Token { + Token::new(start, self.scanner.position(), ty) + } + + fn scan_block_comment(&mut self, start: usize) -> Token { + self.scanner.advance_if_starts_with("#|"); + let mut comment = String::new(); + let mut terminated = false; + + while let Some(position) = self.scanner.upto('|') { + comment.push_str(&self.goto(position)); + + if self.scanner.advance_if_starts_with("|#").is_some() { + terminated = true; + break; + } + } + + if !terminated { + comment.push_str(&self.goto(self.scanner.len())); + } + + self.simple_token( + start, + TokenType::BlockComment { + comment: comment.into(), + terminated, + }, + ) + } + + fn scan_string(&mut self, start: usize) -> Token { + let mut content = String::new(); + let mut terminated = false; + + if let Some(position) = self.scanner.any('"') { + self.goto(position); + } + + while let Some(position) = self.scanner.upto("\\\"") { + content.push_str(&self.goto(position)); + + if self.scanner.advance_if_starts_with("\"").is_some() { + terminated = true; + break; + } + + let backslash = self.scanner.advance(1).expect("we found a backslash"); + content.push_str(&backslash); + if let Some(c) = self.scanner.advance(1) { + content.push_str(&c) + } + } + + if !terminated { + content.push_str(&self.goto(self.scanner.len())); + } + + self.simple_token( + start, + TokenType::String { + content: content.into(), + terminated, + }, + ) + } + + fn scan_digit(&mut self) -> Option { + let digit = self.scanner.advance(1)?; + let digit = match digit.as_str() { + "0" => Decimal::from(0), + "1" => Decimal::from(1), + "2" => Decimal::from(2), + "3" => Decimal::from(3), + "4" => Decimal::from(4), + "5" => Decimal::from(5), + "6" => Decimal::from(6), + "7" => Decimal::from(7), + "8" => Decimal::from(8), + "9" => Decimal::from(9), + _ => return None, + }; + + Some(digit) + } + + fn scan_decimal_number(&mut self) -> Decimal { + let mut number = Decimal::ZERO; + while self.scanner.any(csets::AsciiDigits).is_some() { + let digit = self.scan_digit().expect("we saw there's a digit here"); + number = number * Decimal::TEN + digit; + } + + number + } + + fn scan_octal_number(&mut self) -> Decimal { + let mut number = Decimal::ZERO; + while self.scanner.any("01234567").is_some() { + let digit = self.scan_digit().expect("we saw there's a digit here"); + number = number * Decimal::TEN + digit; + } + + number + } + + fn scan_number(&mut self, start: usize) -> Token { + let mut sign = Decimal::ONE; + let mut fraction_numerator = Decimal::ZERO; + let mut fraction_denominator = Decimal::ONE; + let mut exponent = Decimal::ZERO; + let mut octal_exponent = Decimal::ZERO; + + self.scanner.advance_if_starts_with("+"); + if self.scanner.advance_if_starts_with("-").is_some() { + sign = Decimal::NEGATIVE_ONE; + } + + let whole_part = if self.scanner.advance_if_starts_with("0o").is_some() { + self.scan_octal_number() + } else { + self.scan_decimal_number() + }; + + if self.scanner.advance_if_starts_with(".").is_some() { + while self.scanner.any(csets::AsciiDigits).is_some() { + let digit = self.scan_digit().expect("we saw that there's a digit here"); + fraction_numerator = fraction_numerator * Decimal::TEN + digit; + fraction_denominator *= Decimal::TEN; + } + } + + if let Some(position) = self.scanner.any("eE") { + let mut is_negative = false; + + self.goto(position); + self.scanner.advance_if_starts_with("+"); + if self.scanner.advance_if_starts_with("-").is_some() { + is_negative = true; + } + + exponent = self.scan_decimal_number(); + if is_negative { + exponent *= Decimal::NEGATIVE_ONE; + } + } + + if let Some(position) = self.scanner.any("qQ") { + let mut is_negative = false; + + self.goto(position); + self.scanner.advance_if_starts_with("+"); + if self.scanner.advance_if_starts_with("-").is_some() { + is_negative = true; + } + + octal_exponent = self.scan_decimal_number(); + if is_negative { + octal_exponent *= Decimal::NEGATIVE_ONE; + } + } + + let number = sign + * (whole_part + fraction_numerator / fraction_denominator) + * (Decimal::TEN.powd(exponent)) + * (Decimal::from(8).powd(octal_exponent)); + self.simple_token(start, TokenType::Number(number)) + } +} + +impl Iterator for Lexer { + type Item = Token; + + fn next(&mut self) -> Option { + let start = self.scanner.position(); + + if self.scanner.is_at_end() { + return None; + } + + Some(if let Some(whitespace) = self.scanner.many(" \t\r\n\x11") { + let whitespace = self.goto(whitespace); + self.simple_token(start, TokenType::Whitespace(whitespace.into())) + } else if let Some(semicolon) = self.scanner.any(';') { + self.goto(semicolon); + let position = self.scanner.upto('\n').unwrap_or(self.scanner.len()); + let comment = self.goto(position); + self.simple_token(start, TokenType::LineComment(comment.into())) + } else if self.scanner.starts_with("#|").is_some() { + self.scan_block_comment(start) + } else if self.scanner.advance_if_starts_with("(").is_some() { + self.simple_token(start, TokenType::LeftParenthesis) + } else if self.scanner.advance_if_starts_with(")").is_some() { + self.simple_token(start, TokenType::RightParenthesis) + } else if self.scanner.advance_if_starts_with("'").is_some() { + self.simple_token(start, TokenType::Apostrophe) + } else if self.scanner.advance_if_starts_with("#").is_some() { + self.simple_token(start, TokenType::Pound) + } else if self.scanner.advance_if_starts_with(".").is_some() { + self.simple_token(start, TokenType::Dot) + } else if self.scanner.any('"').is_some() { + self.scan_string(start) + } else if self.scanner.any(csets::AsciiDigits).is_some() + || (self.scanner.any("+-").is_some() + && self + .scanner + .char_at(self.scanner.position() + 1) + .is_some_and(|char| char.is_ascii_digit())) + { + self.scan_number(start) + } else { + let position = self + .scanner + .upto(" \t\r\n\x11;#()'#.\"") + .unwrap_or(self.scanner.len()); + let identifier = self.goto(position); + self.simple_token(start, TokenType::Identifier(identifier.into())) + }) + } +} -- cgit v1.2.3