summaryrefslogtreecommitdiff
path: root/pdn/src
diff options
context:
space:
mode:
authorMicha White <botahamec@outlook.com>2023-10-03 18:21:04 -0400
committerMicha White <botahamec@outlook.com>2023-10-03 18:21:04 -0400
commit9d12d498a91caeb37ec5f7c7b20f5be6d5513119 (patch)
tree0f90a5a9f70524f366ba1a5921c745a44db114b0 /pdn/src
parent7f70505a5ed087c718a79edcc910867ce944a308 (diff)
First attempt at PDN implementation
Diffstat (limited to 'pdn/src')
-rw-r--r--pdn/src/grammar.rs443
-rw-r--r--pdn/src/lib.rs16
-rw-r--r--pdn/src/tokens.rs284
3 files changed, 729 insertions, 14 deletions
diff --git a/pdn/src/grammar.rs b/pdn/src/grammar.rs
new file mode 100644
index 0000000..9529b59
--- /dev/null
+++ b/pdn/src/grammar.rs
@@ -0,0 +1,443 @@
+use std::{iter::Peekable, sync::Arc};
+
+use crate::tokens::{Color, PdnToken, PdnTokenBody, TokenHeader};
+
+#[derive(Debug, Clone)]
+pub struct PdnFile {
+ games: Vec<Game>,
+ game_separators: Vec<TokenHeader>,
+}
+
+#[derive(Debug, Clone)]
+pub struct Game {
+ header: Vec<PdnTag>,
+ body: Vec<BodyPart>,
+}
+
+#[derive(Debug, Clone)]
+pub struct PdnTag {
+ left_bracket: TokenHeader,
+ identifier_token: TokenHeader,
+ string_token: TokenHeader,
+ right_bracket: TokenHeader,
+
+ identifier: Arc<str>,
+ string: Arc<str>,
+}
+
+#[derive(Debug, Clone)]
+pub enum BodyPart {
+ Move(GameMove),
+ Variation(Variation),
+ Comment(TokenHeader, Arc<str>),
+ Setup(TokenHeader, Arc<str>),
+ Nag(TokenHeader, usize),
+}
+
+#[derive(Debug, Clone)]
+pub struct Variation {
+ left_parenthesis: TokenHeader,
+ body: Vec<BodyPart>,
+ right_parenthesis: TokenHeader,
+}
+
+#[derive(Debug, Clone)]
+pub struct GameMove {
+ move_number: Option<(TokenHeader, usize, Color)>,
+ game_move: Move,
+ move_strength: Option<(TokenHeader, Arc<str>)>,
+}
+
+#[derive(Debug, Clone)]
+pub enum Move {
+ Normal(Square, TokenHeader, Square),
+ Capture(Square, Vec<(TokenHeader, Square)>),
+}
+
+#[derive(Debug, Clone)]
+pub enum Square {
+ Alpha(TokenHeader, char, char),
+ Num(TokenHeader, u8),
+}
+
+/// Returns `Ok` if parsed successfully. If there are no tokens left,
+/// `Err(None)` is returned. If the next token is not a square position, then
+/// `Err(Some(token))` is returned.
+fn parse_square(scanner: &mut impl Iterator<Item = PdnToken>) -> Result<Square, Option<PdnToken>> {
+ let Some(token) = scanner.next() else {
+ return Err(None);
+ };
+ let header = token.header;
+ let body = &token.body;
+
+ match *body {
+ PdnTokenBody::AlphaSquare(letter, number) => Ok(Square::Alpha(header, letter, number)),
+ PdnTokenBody::NumSquare(number) => Ok(Square::Num(header, number)),
+ _ => Err(Some(token)),
+ }
+}
+
+#[derive(Debug, Clone)]
+pub enum MoveError {
+ EndOfFile,
+ NoStartSquare(Option<PdnToken>),
+ NoEndSquare(Option<PdnToken>),
+ InvalidCaptureSquares(Vec<Option<PdnToken>>),
+ NoMoveSeparator,
+}
+
+fn parse_normal_move(
+ first_square: Square,
+ scanner: &mut impl Iterator<Item = PdnToken>,
+) -> Result<Move, MoveError> {
+ let Some(separator) = scanner.next() else {
+ return Err(MoveError::NoMoveSeparator);
+ };
+ let square = match parse_square(scanner) {
+ Ok(square) => square,
+ Err(error) => return Err(MoveError::NoEndSquare(error)),
+ };
+ Ok(Move::Normal(first_square, separator.header, square))
+}
+
+fn parse_capture_move(
+ first_square: Square,
+ scanner: &mut Peekable<impl Iterator<Item = PdnToken>>,
+) -> Result<Move, MoveError> {
+ let mut captures = Vec::new();
+ let mut errors = Vec::new();
+
+ while let Some(token) = scanner.peek() {
+ if token.body != PdnTokenBody::CaptureSeparator {
+ break;
+ }
+
+ let separator = scanner.next().expect("separator should be next");
+ match parse_square(scanner) {
+ Ok(square) => captures.push((separator.header, square)),
+ Err(error) => errors.push(error),
+ }
+ }
+
+ if !errors.is_empty() {
+ Err(MoveError::InvalidCaptureSquares(errors))
+ } else {
+ Ok(Move::Capture(first_square, captures))
+ }
+}
+
+fn parse_move(scanner: &mut Peekable<impl Iterator<Item = PdnToken>>) -> Result<Move, MoveError> {
+ let square = match parse_square(scanner) {
+ Ok(square) => square,
+ Err(error) => return Err(MoveError::NoStartSquare(error)),
+ };
+
+ let Some(token) = scanner.peek() else {
+ return Err(MoveError::NoMoveSeparator);
+ };
+ let body = &token.body;
+
+ match body {
+ PdnTokenBody::MoveSeparator => parse_normal_move(square, scanner),
+ PdnTokenBody::CaptureSeparator => parse_capture_move(square, scanner),
+ _ => Err(MoveError::NoMoveSeparator),
+ }
+}
+
+#[derive(Debug, Clone)]
+pub enum GameMoveError {
+ EndOfFile,
+ BadMove(MoveError),
+}
+
+fn whitespace_if_found(
+ scanner: &mut Peekable<impl Iterator<Item = PdnToken>>,
+) -> Option<TokenHeader> {
+ let token = scanner.peek()?;
+ if let PdnTokenBody::Space(_) = token.body {
+ Some(scanner.next()?.header)
+ } else {
+ None
+ }
+}
+
+fn parse_game_move(
+ scanner: &mut Peekable<impl Iterator<Item = PdnToken>>,
+) -> Result<GameMove, GameMoveError> {
+ let Some(next_token) = scanner.peek() else {
+ return Err(GameMoveError::EndOfFile);
+ };
+
+ let move_number = match next_token.body {
+ PdnTokenBody::MoveNumber(number, color) => Some((next_token.header, number, color)),
+ _ => None,
+ };
+
+ if move_number.is_some() {
+ scanner.next();
+ }
+
+ whitespace_if_found(scanner);
+
+ let game_move = parse_move(scanner);
+
+ let move_strength = if let Some(token) = scanner.peek() {
+ if let PdnTokenBody::MoveStrength(string) = &token.body {
+ Some((token.header, string.clone()))
+ } else {
+ None
+ }
+ } else {
+ None
+ };
+
+ if move_strength.is_some() {
+ scanner.next();
+ }
+
+ match game_move {
+ Ok(game_move) => Ok(GameMove {
+ move_number,
+ game_move,
+ move_strength,
+ }),
+ Err(error) => Err(GameMoveError::BadMove(error)),
+ }
+}
+
+#[derive(Debug, Clone)]
+pub enum VariationError {
+ UnexpectedEnd(BodyError),
+ BadBody(BodyError),
+}
+
+fn parse_variation(
+ scanner: &mut Peekable<impl Iterator<Item = PdnToken>>,
+) -> Result<Variation, VariationError> {
+ let left_parenthesis = scanner.next().expect("should start with left paren").header;
+ let body = parse_body_until(scanner, PdnTokenBody::RightParenthesis)?;
+ let right_parenthesis = scanner.next().expect("should end with right paren").header;
+
+ Ok(Variation {
+ left_parenthesis,
+ body,
+ right_parenthesis,
+ })
+}
+
+#[derive(Debug, Clone)]
+pub enum BodyPartError {
+ EndOfFile,
+ InvalidToken(PdnToken),
+ BadMove(GameMoveError),
+ BadVariation(VariationError),
+}
+
+fn parse_body_part(
+ scanner: &mut Peekable<impl Iterator<Item = PdnToken>>,
+) -> Result<BodyPart, BodyPartError> {
+ let Some(token) = scanner.peek() else {
+ return Err(BodyPartError::EndOfFile);
+ };
+
+ match &token.body {
+ PdnTokenBody::MoveNumber(..)
+ | PdnTokenBody::AlphaSquare(..)
+ | PdnTokenBody::NumSquare(..) => match parse_game_move(scanner) {
+ Ok(mov) => Ok(BodyPart::Move(mov)),
+ Err(error) => Err(BodyPartError::BadMove(error)),
+ },
+ PdnTokenBody::LeftParenthesis => match parse_variation(scanner) {
+ Ok(variation) => Ok(BodyPart::Variation(variation)),
+ Err(error) => Err(BodyPartError::BadVariation(error)),
+ },
+ PdnTokenBody::Comment(string) => Ok(BodyPart::Comment(token.header, string.clone())),
+ PdnTokenBody::Setup(string) => Ok(BodyPart::Setup(token.header, string.clone())),
+ PdnTokenBody::Nag(number) => Ok(BodyPart::Nag(token.header, *number)),
+ _ => Err(BodyPartError::InvalidToken(token.clone())),
+ }
+}
+
+pub type BodyError = Vec<Result<BodyPart, BodyPartError>>;
+
+fn parse_body_until(
+ scanner: &mut Peekable<impl Iterator<Item = PdnToken>>,
+ until: PdnTokenBody,
+) -> Result<Vec<BodyPart>, VariationError> {
+ let mut parts = Vec::new();
+
+ loop {
+ whitespace_if_found(scanner);
+
+ let Some(token) = scanner.peek() else {
+ return Err(VariationError::UnexpectedEnd(parts));
+ };
+
+ if token.body == until {
+ break;
+ }
+
+ parts.push(parse_body_part(scanner));
+ whitespace_if_found(scanner);
+ }
+
+ if parts.iter().any(|r| r.is_err()) {
+ Err(VariationError::BadBody(parts))
+ } else {
+ Ok(parts.iter().map(|r| r.as_ref().cloned().unwrap()).collect())
+ }
+}
+
+#[derive(Debug, Clone)]
+pub enum PdnTagError {
+ EndOfFile,
+ NoStartBracket(PdnToken),
+ Unterminated(Vec<PdnToken>),
+ NoIdentifier,
+ NoString,
+ NoEndBracket,
+}
+
+fn parse_pdn_tag(
+ scanner: &mut Peekable<impl Iterator<Item = PdnToken>>,
+) -> Result<PdnTag, PdnTagError> {
+ whitespace_if_found(scanner);
+
+ let Some(left_bracket) = scanner.next() else {
+ return Err(PdnTagError::EndOfFile);
+ };
+
+ if left_bracket.body != PdnTokenBody::LeftBracket {
+ return Err(PdnTagError::NoStartBracket(left_bracket));
+ }
+
+ whitespace_if_found(scanner);
+
+ let Some(identifier_token) = scanner.next() else {
+ return Err(PdnTagError::Unterminated(vec![left_bracket]));
+ };
+
+ let PdnTokenBody::Identifier(identifier) = &identifier_token.body else {
+ return Err(PdnTagError::NoIdentifier);
+ };
+
+ whitespace_if_found(scanner);
+
+ let Some(value_token) = scanner.next() else {
+ return Err(PdnTagError::Unterminated(vec![
+ left_bracket,
+ identifier_token,
+ ]));
+ };
+
+ let PdnTokenBody::String(value) = &value_token.body else {
+ return Err(PdnTagError::NoIdentifier);
+ };
+
+ whitespace_if_found(scanner);
+
+ let Some(right_bracket) = scanner.next() else {
+ return Err(PdnTagError::Unterminated(vec![
+ left_bracket,
+ identifier_token,
+ value_token,
+ ]));
+ };
+
+ if right_bracket.body != PdnTokenBody::RightBracket {
+ return Err(PdnTagError::NoEndBracket);
+ }
+
+ whitespace_if_found(scanner);
+
+ Ok(PdnTag {
+ left_bracket: left_bracket.header,
+ identifier_token: identifier_token.header,
+ string_token: value_token.header,
+ right_bracket: right_bracket.header,
+ identifier: identifier.clone(),
+ string: value.clone(),
+ })
+}
+
+pub type HeaderError = Vec<Result<PdnTag, PdnTagError>>;
+
+fn parse_header(
+ scanner: &mut Peekable<impl Iterator<Item = PdnToken>>,
+) -> Result<Vec<PdnTag>, HeaderError> {
+ let mut tags = Vec::new();
+
+ loop {
+ let Some(token) = scanner.peek() else {
+ break;
+ };
+
+ if token.body != PdnTokenBody::LeftBracket {
+ break;
+ }
+
+ tags.push(parse_pdn_tag(scanner));
+ }
+
+ if tags.iter().any(|r| r.is_err()) {
+ Err(tags)
+ } else {
+ Ok(tags.iter().map(|r| r.as_ref().cloned().unwrap()).collect())
+ }
+}
+
+#[derive(Debug, Clone)]
+pub struct GameError {
+ header: Result<Vec<PdnTag>, HeaderError>,
+ body: Result<Vec<BodyPart>, VariationError>,
+}
+
+fn parse_game(scanner: &mut Peekable<impl Iterator<Item = PdnToken>>) -> Result<Game, GameError> {
+ let header = parse_header(scanner);
+ let body = parse_body_until(scanner, PdnTokenBody::Asterisk);
+ whitespace_if_found(scanner);
+
+ if let Ok(header) = header {
+ if let Ok(body) = body {
+ Ok(Game { header, body })
+ } else {
+ Err(GameError {
+ header: Ok(header),
+ body,
+ })
+ }
+ } else {
+ Err(GameError { header, body })
+ }
+}
+
+pub type PdnError = Vec<Result<Game, GameError>>;
+
+fn parse(scanner: &mut impl Iterator<Item = PdnToken>) -> Result<PdnFile, PdnError> {
+ let mut scanner = scanner.peekable();
+ let mut games = Vec::new();
+ let mut game_separators = Vec::new();
+
+ loop {
+ let Some(token) = scanner.peek() else {
+ break;
+ };
+
+ if token.body != PdnTokenBody::LeftBracket {
+ break;
+ }
+
+ games.push(parse_game(&mut scanner));
+ game_separators.push(scanner.next().unwrap().header);
+ }
+
+ if games.iter().any(|r| r.is_err()) {
+ Err(games)
+ } else {
+ let games = games.iter().map(|r| r.as_ref().cloned().unwrap()).collect();
+ Ok(PdnFile {
+ games,
+ game_separators,
+ })
+ }
+}
diff --git a/pdn/src/lib.rs b/pdn/src/lib.rs
index 06d268d..099a9d0 100644
--- a/pdn/src/lib.rs
+++ b/pdn/src/lib.rs
@@ -1,14 +1,2 @@
-pub fn add(left: usize, right: usize) -> usize {
- left + right
-}
-
-#[cfg(test)]
-mod tests {
- use super::*;
-
- #[test]
- fn it_works() {
- let result = add(2, 2);
- assert_eq!(result, 4);
- }
-}
+mod grammar;
+mod tokens;
diff --git a/pdn/src/tokens.rs b/pdn/src/tokens.rs
new file mode 100644
index 0000000..d37d910
--- /dev/null
+++ b/pdn/src/tokens.rs
@@ -0,0 +1,284 @@
+use std::sync::Arc;
+
+use snob::{csets, csets::CharacterSet, Scanner};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum Color {
+ White,
+ Black,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub enum PdnTokenBody {
+ MoveNumber(usize, Color),
+ MoveSeparator,
+ CaptureSeparator,
+ AlphaSquare(char, char),
+ NumSquare(u8),
+ MoveStrength(Arc<str>),
+ Nag(usize),
+ LeftParenthesis,
+ RightParenthesis,
+ LeftBracket,
+ RightBracket,
+ Asterisk,
+ Setup(Arc<str>),
+ String(Arc<str>),
+ Comment(Arc<str>),
+ Identifier(Arc<str>),
+ Space(Arc<str>),
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub struct TokenHeader {
+ start: usize,
+ len: usize,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct PdnToken {
+ pub header: TokenHeader,
+ pub body: PdnTokenBody,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub enum TokenErrorType {
+ InvalidNumber(usize),
+ InvalidNag,
+ InvalidSquare,
+ UnterminatedSetup,
+ UnterminatedComment,
+ UnterminatedString,
+ InvalidToken,
+}
+
+pub struct TokenError {
+ header: TokenHeader,
+ ty: TokenErrorType,
+}
+
+pub struct PdnScanner {
+ scanner: Scanner,
+}
+
+impl PdnScanner {
+ fn scan_string(&mut self) -> Option<String> {
+ let mut string = String::new();
+ loop {
+ if let Some(position) = self.scanner.many("\\\"".complement()) {
+ let part = self
+ .scanner
+ .goto(position)
+ .expect("position should be valid");
+ string.push_str(&part);
+ } else if let Some(position) = self.scanner.starts_with("\\\"") {
+ self.scanner.goto(position);
+ string.push('"');
+ } else {
+ break;
+ }
+ }
+
+ if let Some(position) = self.scanner.any('"') {
+ self.scanner.goto(position);
+ Some(string)
+ } else {
+ None
+ }
+ }
+
+ fn scan_unescaped_string(&mut self, terminator: char) -> Option<String> {
+ let position = self.scanner.upto(terminator)?;
+ let string = self
+ .scanner
+ .goto(position)
+ .expect("position should be valid");
+ let position = self
+ .scanner
+ .any(terminator)
+ .expect("there should be a terminator next");
+ self.scanner.goto(position);
+ Some(string)
+ }
+
+ fn scan_number(&mut self) -> Option<usize> {
+ let position = self.scanner.many(csets::AsciiDigits)?;
+ let number = self
+ .scanner
+ .goto(position)
+ .expect("position should be valid");
+ let number: usize = number.parse().expect("should be a valid number");
+ Some(number)
+ }
+
+ fn scan_identifier(&mut self) -> Option<String> {
+ let position = self
+ .scanner
+ .many(csets::AsciiLetters.union(csets::AsciiDigits).union('_'))?;
+ let identifier = self
+ .scanner
+ .goto(position)
+ .expect("position should be valid");
+ Some(identifier)
+ }
+
+ fn next_token(&mut self) -> Option<Result<PdnTokenBody, TokenErrorType>> {
+ if self.scanner.is_at_end() {
+ return None;
+ }
+
+ let token = if let Some(position) = self.scanner.any('-') {
+ self.scanner.goto(position);
+ Ok(PdnTokenBody::MoveSeparator)
+ } else if let Some(position) = self.scanner.any('x') {
+ self.scanner.goto(position);
+ Ok(PdnTokenBody::CaptureSeparator)
+ } else if let Some(position) = self.scanner.any('(') {
+ self.scanner.goto(position);
+
+ // try a move strength token
+ if let Some(position) = self.scanner.many("?!") {
+ let char = self
+ .scanner
+ .char_at(position)
+ .expect("position should be valid");
+ if char == ')' {
+ let strength = self
+ .scanner
+ .goto(position)
+ .expect("position should be valid");
+ let position = self
+ .scanner
+ .any(')')
+ .expect("move strength should terminate");
+ self.scanner.goto(position);
+ return Some(Ok(PdnTokenBody::MoveStrength(strength.into())));
+ }
+ }
+
+ Ok(PdnTokenBody::LeftParenthesis)
+ } else if let Some(position) = self.scanner.any(')') {
+ self.scanner.goto(position);
+ Ok(PdnTokenBody::RightParenthesis)
+ } else if let Some(position) = self.scanner.any('[') {
+ self.scanner.goto(position);
+ Ok(PdnTokenBody::LeftBracket)
+ } else if let Some(position) = self.scanner.any(']') {
+ self.scanner.goto(position);
+ Ok(PdnTokenBody::RightBracket)
+ } else if let Some(position) = self.scanner.any('*') {
+ self.scanner.goto(position);
+ Ok(PdnTokenBody::Asterisk)
+ } else if let Some(position) = self.scanner.any('$') {
+ self.scanner.goto(position);
+ match self.scan_number() {
+ Some(number) => Ok(PdnTokenBody::Nag(number)),
+ None => Err(TokenErrorType::InvalidNag),
+ }
+ } else if let Some(position) = self.scanner.any('/') {
+ self.scanner.goto(position);
+ match self.scan_unescaped_string('/') {
+ Some(string) => Ok(PdnTokenBody::Setup(string.into())),
+ None => Err(TokenErrorType::UnterminatedSetup),
+ }
+ } else if let Some(position) = self.scanner.any('{') {
+ self.scanner.goto(position);
+ match self.scan_unescaped_string('}') {
+ Some(string) => Ok(PdnTokenBody::Comment(string.into())),
+ None => Err(TokenErrorType::UnterminatedComment),
+ }
+ } else if let Some(position) = self.scanner.any('"') {
+ self.scanner.goto(position);
+ match self.scan_string() {
+ Some(string) => Ok(PdnTokenBody::String(string.into())),
+ None => Err(TokenErrorType::UnterminatedString),
+ }
+ } else if let Some(position) = self.scanner.many("?!") {
+ let strength = self
+ .scanner
+ .goto(position)
+ .expect("position should be valid");
+ Ok(PdnTokenBody::MoveStrength(strength.into()))
+ } else if let Some(position) = self.scanner.any("abcdefgh") {
+ let letter = self
+ .scanner
+ .goto(position)
+ .expect("position should be valid")
+ .chars()
+ .next()
+ .expect("should contain one letter");
+ if let Some(position) = self.scanner.any("12345678") {
+ let number = self
+ .scanner
+ .goto(position)
+ .expect("position should be valid")
+ .chars()
+ .next()
+ .expect("should contain one letter");
+ Ok(PdnTokenBody::AlphaSquare(letter, number))
+ } else {
+ self.scanner.advance(1); // skip over second character
+ Err(TokenErrorType::InvalidSquare)
+ }
+ } else if self.scanner.any(csets::AsciiUppercase).is_some() {
+ let identifier = self
+ .scan_identifier()
+ .expect("should be a valid identifier");
+ Ok(PdnTokenBody::Identifier(identifier.into()))
+ } else if self.scanner.any(csets::AsciiDigits).is_some() {
+ let number = self.scan_number().expect("should be a valid number");
+ if let Some(position) = self.scanner.starts_with("...") {
+ self.scanner.goto(position);
+ Ok(PdnTokenBody::MoveNumber(number, Color::Black))
+ } else if let Some(position) = self.scanner.any('.') {
+ self.scanner.goto(position);
+ Ok(PdnTokenBody::MoveNumber(number, Color::White))
+ } else if number < 100 {
+ Ok(PdnTokenBody::NumSquare(number as u8))
+ } else {
+ Err(TokenErrorType::InvalidNumber(number))
+ }
+ } else if let Some(position) = self.scanner.many(csets::AsciiWhitespace) {
+ let whitespace = self
+ .scanner
+ .goto(position)
+ .expect("position should be valid");
+ Ok(PdnTokenBody::Space(whitespace.into()))
+ } else {
+ let position = self
+ .scanner
+ .upto(csets::AsciiLetters.union(csets::AsciiDigits.union("-x(?!)[]")))
+ .unwrap_or_else(|| self.scanner.len());
+
+ self.scanner
+ .goto(position)
+ .expect("position should be valid");
+
+ Err(TokenErrorType::InvalidToken)
+ };
+
+ Some(token)
+ }
+}
+
+impl Iterator for PdnScanner {
+ type Item = Result<PdnToken, TokenError>;
+
+ fn next(&mut self) -> Option<Self::Item> {
+ let start = self.scanner.position();
+ let token = self.next_token()?;
+ let end = self.scanner.position();
+ let len = end - start;
+ let header = TokenHeader { start, len };
+
+ let token = match token {
+ Ok(token) => Ok(PdnToken {
+ header,
+ body: token,
+ }),
+ Err(error) => Err(TokenError { header, ty: error }),
+ };
+
+ Some(token)
+ }
+}