diff options
| author | Micha White <botahamec@outlook.com> | 2023-10-03 18:21:04 -0400 |
|---|---|---|
| committer | Micha White <botahamec@outlook.com> | 2023-10-03 18:21:04 -0400 |
| commit | 9d12d498a91caeb37ec5f7c7b20f5be6d5513119 (patch) | |
| tree | 0f90a5a9f70524f366ba1a5921c745a44db114b0 /pdn | |
| parent | 7f70505a5ed087c718a79edcc910867ce944a308 (diff) | |
First attempt at PDN implementation
Diffstat (limited to 'pdn')
| -rw-r--r-- | pdn/Cargo.toml | 1 | ||||
| -rw-r--r-- | pdn/src/grammar.rs | 443 | ||||
| -rw-r--r-- | pdn/src/lib.rs | 16 | ||||
| -rw-r--r-- | pdn/src/tokens.rs | 284 |
4 files changed, 730 insertions, 14 deletions
diff --git a/pdn/Cargo.toml b/pdn/Cargo.toml index 1ed6d4b..032e20d 100644 --- a/pdn/Cargo.toml +++ b/pdn/Cargo.toml @@ -6,3 +6,4 @@ edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html [dependencies] +snob = "0.1" diff --git a/pdn/src/grammar.rs b/pdn/src/grammar.rs new file mode 100644 index 0000000..9529b59 --- /dev/null +++ b/pdn/src/grammar.rs @@ -0,0 +1,443 @@ +use std::{iter::Peekable, sync::Arc}; + +use crate::tokens::{Color, PdnToken, PdnTokenBody, TokenHeader}; + +#[derive(Debug, Clone)] +pub struct PdnFile { + games: Vec<Game>, + game_separators: Vec<TokenHeader>, +} + +#[derive(Debug, Clone)] +pub struct Game { + header: Vec<PdnTag>, + body: Vec<BodyPart>, +} + +#[derive(Debug, Clone)] +pub struct PdnTag { + left_bracket: TokenHeader, + identifier_token: TokenHeader, + string_token: TokenHeader, + right_bracket: TokenHeader, + + identifier: Arc<str>, + string: Arc<str>, +} + +#[derive(Debug, Clone)] +pub enum BodyPart { + Move(GameMove), + Variation(Variation), + Comment(TokenHeader, Arc<str>), + Setup(TokenHeader, Arc<str>), + Nag(TokenHeader, usize), +} + +#[derive(Debug, Clone)] +pub struct Variation { + left_parenthesis: TokenHeader, + body: Vec<BodyPart>, + right_parenthesis: TokenHeader, +} + +#[derive(Debug, Clone)] +pub struct GameMove { + move_number: Option<(TokenHeader, usize, Color)>, + game_move: Move, + move_strength: Option<(TokenHeader, Arc<str>)>, +} + +#[derive(Debug, Clone)] +pub enum Move { + Normal(Square, TokenHeader, Square), + Capture(Square, Vec<(TokenHeader, Square)>), +} + +#[derive(Debug, Clone)] +pub enum Square { + Alpha(TokenHeader, char, char), + Num(TokenHeader, u8), +} + +/// Returns `Ok` if parsed successfully. If there are no tokens left, +/// `Err(None)` is returned. If the next token is not a square position, then +/// `Err(Some(token))` is returned. +fn parse_square(scanner: &mut impl Iterator<Item = PdnToken>) -> Result<Square, Option<PdnToken>> { + let Some(token) = scanner.next() else { + return Err(None); + }; + let header = token.header; + let body = &token.body; + + match *body { + PdnTokenBody::AlphaSquare(letter, number) => Ok(Square::Alpha(header, letter, number)), + PdnTokenBody::NumSquare(number) => Ok(Square::Num(header, number)), + _ => Err(Some(token)), + } +} + +#[derive(Debug, Clone)] +pub enum MoveError { + EndOfFile, + NoStartSquare(Option<PdnToken>), + NoEndSquare(Option<PdnToken>), + InvalidCaptureSquares(Vec<Option<PdnToken>>), + NoMoveSeparator, +} + +fn parse_normal_move( + first_square: Square, + scanner: &mut impl Iterator<Item = PdnToken>, +) -> Result<Move, MoveError> { + let Some(separator) = scanner.next() else { + return Err(MoveError::NoMoveSeparator); + }; + let square = match parse_square(scanner) { + Ok(square) => square, + Err(error) => return Err(MoveError::NoEndSquare(error)), + }; + Ok(Move::Normal(first_square, separator.header, square)) +} + +fn parse_capture_move( + first_square: Square, + scanner: &mut Peekable<impl Iterator<Item = PdnToken>>, +) -> Result<Move, MoveError> { + let mut captures = Vec::new(); + let mut errors = Vec::new(); + + while let Some(token) = scanner.peek() { + if token.body != PdnTokenBody::CaptureSeparator { + break; + } + + let separator = scanner.next().expect("separator should be next"); + match parse_square(scanner) { + Ok(square) => captures.push((separator.header, square)), + Err(error) => errors.push(error), + } + } + + if !errors.is_empty() { + Err(MoveError::InvalidCaptureSquares(errors)) + } else { + Ok(Move::Capture(first_square, captures)) + } +} + +fn parse_move(scanner: &mut Peekable<impl Iterator<Item = PdnToken>>) -> Result<Move, MoveError> { + let square = match parse_square(scanner) { + Ok(square) => square, + Err(error) => return Err(MoveError::NoStartSquare(error)), + }; + + let Some(token) = scanner.peek() else { + return Err(MoveError::NoMoveSeparator); + }; + let body = &token.body; + + match body { + PdnTokenBody::MoveSeparator => parse_normal_move(square, scanner), + PdnTokenBody::CaptureSeparator => parse_capture_move(square, scanner), + _ => Err(MoveError::NoMoveSeparator), + } +} + +#[derive(Debug, Clone)] +pub enum GameMoveError { + EndOfFile, + BadMove(MoveError), +} + +fn whitespace_if_found( + scanner: &mut Peekable<impl Iterator<Item = PdnToken>>, +) -> Option<TokenHeader> { + let token = scanner.peek()?; + if let PdnTokenBody::Space(_) = token.body { + Some(scanner.next()?.header) + } else { + None + } +} + +fn parse_game_move( + scanner: &mut Peekable<impl Iterator<Item = PdnToken>>, +) -> Result<GameMove, GameMoveError> { + let Some(next_token) = scanner.peek() else { + return Err(GameMoveError::EndOfFile); + }; + + let move_number = match next_token.body { + PdnTokenBody::MoveNumber(number, color) => Some((next_token.header, number, color)), + _ => None, + }; + + if move_number.is_some() { + scanner.next(); + } + + whitespace_if_found(scanner); + + let game_move = parse_move(scanner); + + let move_strength = if let Some(token) = scanner.peek() { + if let PdnTokenBody::MoveStrength(string) = &token.body { + Some((token.header, string.clone())) + } else { + None + } + } else { + None + }; + + if move_strength.is_some() { + scanner.next(); + } + + match game_move { + Ok(game_move) => Ok(GameMove { + move_number, + game_move, + move_strength, + }), + Err(error) => Err(GameMoveError::BadMove(error)), + } +} + +#[derive(Debug, Clone)] +pub enum VariationError { + UnexpectedEnd(BodyError), + BadBody(BodyError), +} + +fn parse_variation( + scanner: &mut Peekable<impl Iterator<Item = PdnToken>>, +) -> Result<Variation, VariationError> { + let left_parenthesis = scanner.next().expect("should start with left paren").header; + let body = parse_body_until(scanner, PdnTokenBody::RightParenthesis)?; + let right_parenthesis = scanner.next().expect("should end with right paren").header; + + Ok(Variation { + left_parenthesis, + body, + right_parenthesis, + }) +} + +#[derive(Debug, Clone)] +pub enum BodyPartError { + EndOfFile, + InvalidToken(PdnToken), + BadMove(GameMoveError), + BadVariation(VariationError), +} + +fn parse_body_part( + scanner: &mut Peekable<impl Iterator<Item = PdnToken>>, +) -> Result<BodyPart, BodyPartError> { + let Some(token) = scanner.peek() else { + return Err(BodyPartError::EndOfFile); + }; + + match &token.body { + PdnTokenBody::MoveNumber(..) + | PdnTokenBody::AlphaSquare(..) + | PdnTokenBody::NumSquare(..) => match parse_game_move(scanner) { + Ok(mov) => Ok(BodyPart::Move(mov)), + Err(error) => Err(BodyPartError::BadMove(error)), + }, + PdnTokenBody::LeftParenthesis => match parse_variation(scanner) { + Ok(variation) => Ok(BodyPart::Variation(variation)), + Err(error) => Err(BodyPartError::BadVariation(error)), + }, + PdnTokenBody::Comment(string) => Ok(BodyPart::Comment(token.header, string.clone())), + PdnTokenBody::Setup(string) => Ok(BodyPart::Setup(token.header, string.clone())), + PdnTokenBody::Nag(number) => Ok(BodyPart::Nag(token.header, *number)), + _ => Err(BodyPartError::InvalidToken(token.clone())), + } +} + +pub type BodyError = Vec<Result<BodyPart, BodyPartError>>; + +fn parse_body_until( + scanner: &mut Peekable<impl Iterator<Item = PdnToken>>, + until: PdnTokenBody, +) -> Result<Vec<BodyPart>, VariationError> { + let mut parts = Vec::new(); + + loop { + whitespace_if_found(scanner); + + let Some(token) = scanner.peek() else { + return Err(VariationError::UnexpectedEnd(parts)); + }; + + if token.body == until { + break; + } + + parts.push(parse_body_part(scanner)); + whitespace_if_found(scanner); + } + + if parts.iter().any(|r| r.is_err()) { + Err(VariationError::BadBody(parts)) + } else { + Ok(parts.iter().map(|r| r.as_ref().cloned().unwrap()).collect()) + } +} + +#[derive(Debug, Clone)] +pub enum PdnTagError { + EndOfFile, + NoStartBracket(PdnToken), + Unterminated(Vec<PdnToken>), + NoIdentifier, + NoString, + NoEndBracket, +} + +fn parse_pdn_tag( + scanner: &mut Peekable<impl Iterator<Item = PdnToken>>, +) -> Result<PdnTag, PdnTagError> { + whitespace_if_found(scanner); + + let Some(left_bracket) = scanner.next() else { + return Err(PdnTagError::EndOfFile); + }; + + if left_bracket.body != PdnTokenBody::LeftBracket { + return Err(PdnTagError::NoStartBracket(left_bracket)); + } + + whitespace_if_found(scanner); + + let Some(identifier_token) = scanner.next() else { + return Err(PdnTagError::Unterminated(vec![left_bracket])); + }; + + let PdnTokenBody::Identifier(identifier) = &identifier_token.body else { + return Err(PdnTagError::NoIdentifier); + }; + + whitespace_if_found(scanner); + + let Some(value_token) = scanner.next() else { + return Err(PdnTagError::Unterminated(vec![ + left_bracket, + identifier_token, + ])); + }; + + let PdnTokenBody::String(value) = &value_token.body else { + return Err(PdnTagError::NoIdentifier); + }; + + whitespace_if_found(scanner); + + let Some(right_bracket) = scanner.next() else { + return Err(PdnTagError::Unterminated(vec![ + left_bracket, + identifier_token, + value_token, + ])); + }; + + if right_bracket.body != PdnTokenBody::RightBracket { + return Err(PdnTagError::NoEndBracket); + } + + whitespace_if_found(scanner); + + Ok(PdnTag { + left_bracket: left_bracket.header, + identifier_token: identifier_token.header, + string_token: value_token.header, + right_bracket: right_bracket.header, + identifier: identifier.clone(), + string: value.clone(), + }) +} + +pub type HeaderError = Vec<Result<PdnTag, PdnTagError>>; + +fn parse_header( + scanner: &mut Peekable<impl Iterator<Item = PdnToken>>, +) -> Result<Vec<PdnTag>, HeaderError> { + let mut tags = Vec::new(); + + loop { + let Some(token) = scanner.peek() else { + break; + }; + + if token.body != PdnTokenBody::LeftBracket { + break; + } + + tags.push(parse_pdn_tag(scanner)); + } + + if tags.iter().any(|r| r.is_err()) { + Err(tags) + } else { + Ok(tags.iter().map(|r| r.as_ref().cloned().unwrap()).collect()) + } +} + +#[derive(Debug, Clone)] +pub struct GameError { + header: Result<Vec<PdnTag>, HeaderError>, + body: Result<Vec<BodyPart>, VariationError>, +} + +fn parse_game(scanner: &mut Peekable<impl Iterator<Item = PdnToken>>) -> Result<Game, GameError> { + let header = parse_header(scanner); + let body = parse_body_until(scanner, PdnTokenBody::Asterisk); + whitespace_if_found(scanner); + + if let Ok(header) = header { + if let Ok(body) = body { + Ok(Game { header, body }) + } else { + Err(GameError { + header: Ok(header), + body, + }) + } + } else { + Err(GameError { header, body }) + } +} + +pub type PdnError = Vec<Result<Game, GameError>>; + +fn parse(scanner: &mut impl Iterator<Item = PdnToken>) -> Result<PdnFile, PdnError> { + let mut scanner = scanner.peekable(); + let mut games = Vec::new(); + let mut game_separators = Vec::new(); + + loop { + let Some(token) = scanner.peek() else { + break; + }; + + if token.body != PdnTokenBody::LeftBracket { + break; + } + + games.push(parse_game(&mut scanner)); + game_separators.push(scanner.next().unwrap().header); + } + + if games.iter().any(|r| r.is_err()) { + Err(games) + } else { + let games = games.iter().map(|r| r.as_ref().cloned().unwrap()).collect(); + Ok(PdnFile { + games, + game_separators, + }) + } +} diff --git a/pdn/src/lib.rs b/pdn/src/lib.rs index 06d268d..099a9d0 100644 --- a/pdn/src/lib.rs +++ b/pdn/src/lib.rs @@ -1,14 +1,2 @@ -pub fn add(left: usize, right: usize) -> usize { - left + right -} - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn it_works() { - let result = add(2, 2); - assert_eq!(result, 4); - } -} +mod grammar; +mod tokens; diff --git a/pdn/src/tokens.rs b/pdn/src/tokens.rs new file mode 100644 index 0000000..d37d910 --- /dev/null +++ b/pdn/src/tokens.rs @@ -0,0 +1,284 @@ +use std::sync::Arc; + +use snob::{csets, csets::CharacterSet, Scanner}; + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub enum Color { + White, + Black, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum PdnTokenBody { + MoveNumber(usize, Color), + MoveSeparator, + CaptureSeparator, + AlphaSquare(char, char), + NumSquare(u8), + MoveStrength(Arc<str>), + Nag(usize), + LeftParenthesis, + RightParenthesis, + LeftBracket, + RightBracket, + Asterisk, + Setup(Arc<str>), + String(Arc<str>), + Comment(Arc<str>), + Identifier(Arc<str>), + Space(Arc<str>), +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] +pub struct TokenHeader { + start: usize, + len: usize, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct PdnToken { + pub header: TokenHeader, + pub body: PdnTokenBody, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum TokenErrorType { + InvalidNumber(usize), + InvalidNag, + InvalidSquare, + UnterminatedSetup, + UnterminatedComment, + UnterminatedString, + InvalidToken, +} + +pub struct TokenError { + header: TokenHeader, + ty: TokenErrorType, +} + +pub struct PdnScanner { + scanner: Scanner, +} + +impl PdnScanner { + fn scan_string(&mut self) -> Option<String> { + let mut string = String::new(); + loop { + if let Some(position) = self.scanner.many("\\\"".complement()) { + let part = self + .scanner + .goto(position) + .expect("position should be valid"); + string.push_str(&part); + } else if let Some(position) = self.scanner.starts_with("\\\"") { + self.scanner.goto(position); + string.push('"'); + } else { + break; + } + } + + if let Some(position) = self.scanner.any('"') { + self.scanner.goto(position); + Some(string) + } else { + None + } + } + + fn scan_unescaped_string(&mut self, terminator: char) -> Option<String> { + let position = self.scanner.upto(terminator)?; + let string = self + .scanner + .goto(position) + .expect("position should be valid"); + let position = self + .scanner + .any(terminator) + .expect("there should be a terminator next"); + self.scanner.goto(position); + Some(string) + } + + fn scan_number(&mut self) -> Option<usize> { + let position = self.scanner.many(csets::AsciiDigits)?; + let number = self + .scanner + .goto(position) + .expect("position should be valid"); + let number: usize = number.parse().expect("should be a valid number"); + Some(number) + } + + fn scan_identifier(&mut self) -> Option<String> { + let position = self + .scanner + .many(csets::AsciiLetters.union(csets::AsciiDigits).union('_'))?; + let identifier = self + .scanner + .goto(position) + .expect("position should be valid"); + Some(identifier) + } + + fn next_token(&mut self) -> Option<Result<PdnTokenBody, TokenErrorType>> { + if self.scanner.is_at_end() { + return None; + } + + let token = if let Some(position) = self.scanner.any('-') { + self.scanner.goto(position); + Ok(PdnTokenBody::MoveSeparator) + } else if let Some(position) = self.scanner.any('x') { + self.scanner.goto(position); + Ok(PdnTokenBody::CaptureSeparator) + } else if let Some(position) = self.scanner.any('(') { + self.scanner.goto(position); + + // try a move strength token + if let Some(position) = self.scanner.many("?!") { + let char = self + .scanner + .char_at(position) + .expect("position should be valid"); + if char == ')' { + let strength = self + .scanner + .goto(position) + .expect("position should be valid"); + let position = self + .scanner + .any(')') + .expect("move strength should terminate"); + self.scanner.goto(position); + return Some(Ok(PdnTokenBody::MoveStrength(strength.into()))); + } + } + + Ok(PdnTokenBody::LeftParenthesis) + } else if let Some(position) = self.scanner.any(')') { + self.scanner.goto(position); + Ok(PdnTokenBody::RightParenthesis) + } else if let Some(position) = self.scanner.any('[') { + self.scanner.goto(position); + Ok(PdnTokenBody::LeftBracket) + } else if let Some(position) = self.scanner.any(']') { + self.scanner.goto(position); + Ok(PdnTokenBody::RightBracket) + } else if let Some(position) = self.scanner.any('*') { + self.scanner.goto(position); + Ok(PdnTokenBody::Asterisk) + } else if let Some(position) = self.scanner.any('$') { + self.scanner.goto(position); + match self.scan_number() { + Some(number) => Ok(PdnTokenBody::Nag(number)), + None => Err(TokenErrorType::InvalidNag), + } + } else if let Some(position) = self.scanner.any('/') { + self.scanner.goto(position); + match self.scan_unescaped_string('/') { + Some(string) => Ok(PdnTokenBody::Setup(string.into())), + None => Err(TokenErrorType::UnterminatedSetup), + } + } else if let Some(position) = self.scanner.any('{') { + self.scanner.goto(position); + match self.scan_unescaped_string('}') { + Some(string) => Ok(PdnTokenBody::Comment(string.into())), + None => Err(TokenErrorType::UnterminatedComment), + } + } else if let Some(position) = self.scanner.any('"') { + self.scanner.goto(position); + match self.scan_string() { + Some(string) => Ok(PdnTokenBody::String(string.into())), + None => Err(TokenErrorType::UnterminatedString), + } + } else if let Some(position) = self.scanner.many("?!") { + let strength = self + .scanner + .goto(position) + .expect("position should be valid"); + Ok(PdnTokenBody::MoveStrength(strength.into())) + } else if let Some(position) = self.scanner.any("abcdefgh") { + let letter = self + .scanner + .goto(position) + .expect("position should be valid") + .chars() + .next() + .expect("should contain one letter"); + if let Some(position) = self.scanner.any("12345678") { + let number = self + .scanner + .goto(position) + .expect("position should be valid") + .chars() + .next() + .expect("should contain one letter"); + Ok(PdnTokenBody::AlphaSquare(letter, number)) + } else { + self.scanner.advance(1); // skip over second character + Err(TokenErrorType::InvalidSquare) + } + } else if self.scanner.any(csets::AsciiUppercase).is_some() { + let identifier = self + .scan_identifier() + .expect("should be a valid identifier"); + Ok(PdnTokenBody::Identifier(identifier.into())) + } else if self.scanner.any(csets::AsciiDigits).is_some() { + let number = self.scan_number().expect("should be a valid number"); + if let Some(position) = self.scanner.starts_with("...") { + self.scanner.goto(position); + Ok(PdnTokenBody::MoveNumber(number, Color::Black)) + } else if let Some(position) = self.scanner.any('.') { + self.scanner.goto(position); + Ok(PdnTokenBody::MoveNumber(number, Color::White)) + } else if number < 100 { + Ok(PdnTokenBody::NumSquare(number as u8)) + } else { + Err(TokenErrorType::InvalidNumber(number)) + } + } else if let Some(position) = self.scanner.many(csets::AsciiWhitespace) { + let whitespace = self + .scanner + .goto(position) + .expect("position should be valid"); + Ok(PdnTokenBody::Space(whitespace.into())) + } else { + let position = self + .scanner + .upto(csets::AsciiLetters.union(csets::AsciiDigits.union("-x(?!)[]"))) + .unwrap_or_else(|| self.scanner.len()); + + self.scanner + .goto(position) + .expect("position should be valid"); + + Err(TokenErrorType::InvalidToken) + }; + + Some(token) + } +} + +impl Iterator for PdnScanner { + type Item = Result<PdnToken, TokenError>; + + fn next(&mut self) -> Option<Self::Item> { + let start = self.scanner.position(); + let token = self.next_token()?; + let end = self.scanner.position(); + let len = end - start; + let header = TokenHeader { start, len }; + + let token = match token { + Ok(token) => Ok(PdnToken { + header, + body: token, + }), + Err(error) => Err(TokenError { header, ty: error }), + }; + + Some(token) + } +} |
