436 lines
16 KiB
C
436 lines
16 KiB
C
/*
|
|
* Program: pgn-extract: a Portable Game Notation (PGN) extractor.
|
|
* Copyright (C) 1994-2022 David Barnes
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation; either version 1, or (at your option)
|
|
* any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*
|
|
* David Barnes may be contacted as D.J.Barnes@kent.ac.uk
|
|
* https://www.cs.kent.ac.uk/people/staff/djb/
|
|
*
|
|
*/
|
|
|
|
/* Type definitions required by multiple files. */
|
|
|
|
/* Define a type to represent different output formats.
|
|
* Currently represented are:
|
|
* SOURCE: the original source notation.
|
|
* SAN: SAN.
|
|
* CM: Chess Master input format.
|
|
* LALG: Long-algebraic, e.g. e2e4.
|
|
* HALG: Hyphenated long-algebraic, e.g. e2-e4.
|
|
* ELALG: Enhanced long-algebraic. Includes piece names, e.g. Ng1f3,
|
|
* and en-passant notation.
|
|
* XLALG: Enhanced long-algebraic. Includes piece names, e.g. Ng1f3,
|
|
* en-passant notation and either - or x between squares for
|
|
* non-capture and capture moves respectively.
|
|
* XOLALG: As XLALG but with O-O and O-O-O for castling moves.
|
|
* UCI: UCI-compatible format - actually LALG.
|
|
*/
|
|
#ifndef TYPEDEF_H
|
|
#define TYPEDEF_H
|
|
|
|
typedef enum { SOURCE, SAN, EPD, FEN, CM, LALG, HALG, ELALG, XLALG, XOLALG, UCI } OutputFormat;
|
|
|
|
/* Define a type to specify whether a move gives check, checkmate,
|
|
* or nocheck.
|
|
* checkmate implies check, but check does not imply that a move
|
|
* is not checkmate.
|
|
*/
|
|
typedef enum { NOCHECK, CHECK, CHECKMATE } CheckStatus;
|
|
|
|
/* Permit lists of strings, e.g. lists of comments,
|
|
* list of NAGs, etc.
|
|
*/
|
|
typedef struct string_list {
|
|
const char *str;
|
|
struct string_list *next;
|
|
} StringList;
|
|
|
|
typedef struct comment_list {
|
|
StringList *comment;
|
|
struct comment_list *next;
|
|
} CommentList;
|
|
|
|
typedef struct variation {
|
|
CommentList *prefix_comment;
|
|
struct move *moves;
|
|
/* The role of a suffix_comment is not entirely clear.
|
|
* It is a comment that immediately follows a variation
|
|
* so its role could either be a comment on the variation,
|
|
* a preceding comment for the first move following the variation or
|
|
* a following comment for the move immediately preceding the variation.
|
|
*
|
|
* NB: @@@ With both -V and --splitvariants, suffix_comments are output
|
|
* following the move immediately preceding the variation.
|
|
*/
|
|
CommentList *suffix_comment;
|
|
struct variation *next;
|
|
} Variation;
|
|
|
|
typedef struct nag {
|
|
StringList *text;
|
|
CommentList *comments;
|
|
struct nag *next;
|
|
} Nag;
|
|
|
|
/* Define a maximum length for the text of moves.
|
|
* This is generous.
|
|
*/
|
|
#define MAX_MOVE_LEN 15
|
|
|
|
/* Retain the text of a move and any associated
|
|
* NAGs and comments.
|
|
*/
|
|
typedef struct move {
|
|
/* This array is of type unsigned char,
|
|
* in order to accommodate full 8-bit letters without
|
|
* sign extension.
|
|
*/
|
|
unsigned char move[MAX_MOVE_LEN+1];
|
|
/* Class of move, e.g. PAWN_MOVE, PIECE_MOVE. */
|
|
MoveClass class;
|
|
Col from_col;
|
|
Rank from_rank;
|
|
Col to_col;
|
|
Rank to_rank;
|
|
Piece piece_to_move;
|
|
/* captured_piece is EMPTY if there is no capture. */
|
|
Piece captured_piece;
|
|
/* promoted_piece is EMPTY if class is not PAWN_MOVE_WITH_PROMOTION. */
|
|
Piece promoted_piece;
|
|
/* Whether this move gives check. */
|
|
CheckStatus check_status;
|
|
/* An EPD representation of the board immediately before this move
|
|
* has been played.
|
|
*/
|
|
char *epd;
|
|
/* The move count additions to the EPD representation to complete
|
|
* a FEN description. Only relevant if (epd != NULL).
|
|
*/
|
|
char *fen_suffix;
|
|
/* zobrist hash code of the position after this move has been played.
|
|
* Only set if GlobalState.add_hashcode_comments.
|
|
*/
|
|
uint64_t zobrist;
|
|
/* Evaluation of the position after this move has been played.
|
|
* This is primarily a hook for anyone wanting to build a proper
|
|
* evaluation function (see apply.c) or interface to an external
|
|
* engine, say.
|
|
*/
|
|
double evaluation;
|
|
Nag *NAGs;
|
|
CommentList *comment_list;
|
|
/* terminating_result holds the result of the current list of moves. */
|
|
char *terminating_result;
|
|
Variation *Variants;
|
|
/* Pointers to the previous and next move.
|
|
* The extraction program does not need the prev field, but
|
|
* interfaces that might need it.
|
|
* For instance, a game viewer would need to be able to move backwards
|
|
* and forwards through a game.
|
|
*/
|
|
struct move *prev, *next;
|
|
} Move;
|
|
|
|
typedef struct {
|
|
/* Tags for this game. */
|
|
char **tags;
|
|
/* The maximum number of strings in tags. */
|
|
int tags_length;
|
|
/* Any comment prefixing the game, between
|
|
* the tags and the moves.
|
|
*/
|
|
CommentList *prefix_comment;
|
|
/* The hash value of the final position. */
|
|
HashCode final_hash_value;
|
|
/* An accumulated hash value, used to disambiguate false clashes
|
|
* of final_hash_value.
|
|
*/
|
|
HashCode cumulative_hash_value;
|
|
/* Board hash value at fuzzy_move_depth, if required. */
|
|
HashCode fuzzy_duplicate_hash;
|
|
/* The move list of the game. */
|
|
Move *moves;
|
|
/* Whether the moves have been checked, or not. */
|
|
Boolean moves_checked;
|
|
/* Whether the moves are ok, or not. */
|
|
Boolean moves_ok;
|
|
/* if !moves_ok, the first ply at which an error was found.
|
|
* 0 => no error found.
|
|
*/
|
|
int error_ply;
|
|
/* Counts of the number of times each position has been reached.
|
|
* Used for repetition detection, if required.
|
|
*/
|
|
struct PositionCount *position_counts;
|
|
/* Line numbers of the start and end of the game in the input file. */
|
|
unsigned long start_line, end_line;
|
|
} Game;
|
|
|
|
/* Define a type to distinguish between CHECK files, NORMAL files,
|
|
* and ECO files.
|
|
* CHECKFILEs are those whose contents are not output.
|
|
* Their contents are used to check for duplicates in NORMALFILEs.
|
|
* An ECOFILE consists of ECO lines for classification.
|
|
*/
|
|
typedef enum { NORMALFILE, CHECKFILE, ECOFILE } SourceFileType;
|
|
|
|
/* 0 = don't divide on ECO code.
|
|
* 1 = divide by letter.
|
|
* 2 = divide by letter and single digit.
|
|
* N > 1 = divide by letter and N-1 digits.
|
|
* In principle, it should be possible to expand the ECO classification
|
|
* with an arbitrary number of digits.
|
|
*/
|
|
typedef enum {
|
|
DONT_DIVIDE = 0, MIN_ECO_LEVEL = 1, MAX_ECO_LEVEL = 10
|
|
} EcoDivision;
|
|
|
|
/* Define a type to describe which tags are to be output.
|
|
*/
|
|
typedef enum {
|
|
ALL_TAGS = 0, SEVEN_TAG_ROSTER = 1, NO_TAGS = 2,
|
|
} TagOutputForm;
|
|
|
|
/* Whether games with a SETUP_TAG should be kept. */
|
|
typedef enum {
|
|
SETUP_TAG_OK, NO_SETUP_TAG, SETUP_TAG_ONLY,
|
|
} SetupOutputStatus;
|
|
|
|
/* A type to support the storing of a list of game numbers.
|
|
* Used to support the --selectonly and --skip arguments.
|
|
*/
|
|
typedef struct game_number {
|
|
unsigned long min, max;
|
|
struct game_number *next;
|
|
} game_number;
|
|
|
|
/* This structure holds details of the program state.
|
|
* Most of these fields are set from the program's arguments.
|
|
*/
|
|
typedef struct {
|
|
/* Whether we are skipping the current game - typically because
|
|
* of an error in its text.
|
|
*/
|
|
Boolean skipping_current_game;
|
|
/* Whether to check, but not write the converted output. */
|
|
Boolean check_only;
|
|
/* Verbosity level.
|
|
* 0 -> nothing at all.
|
|
* 1 -> only the number of games processed.
|
|
* 2 -> a running commentary to logfile.
|
|
*/
|
|
int verbosity;
|
|
/* Whether to keep NAGs along with moves. */
|
|
Boolean keep_NAGs;
|
|
/* Whether to keep comments along with moves. */
|
|
Boolean keep_comments;
|
|
/* Whether to keep variations along with moves. */
|
|
Boolean keep_variations;
|
|
/* Which tags are to be output. */
|
|
TagOutputForm tag_output_format;
|
|
/* Whether to match permutations of textual variations or not. */
|
|
Boolean match_permutations;
|
|
/* Whether we are matching positional variations or not. */
|
|
Boolean positional_variations;
|
|
/* Whether we are using Soundex matching or not. */
|
|
Boolean use_soundex;
|
|
/* Whether to suppress duplicate game scores. */
|
|
Boolean suppress_duplicates;
|
|
/* Whether to suppress unique game scores. */
|
|
Boolean suppress_originals;
|
|
/* Whether to use fuzzy matching for duplicates. */
|
|
Boolean fuzzy_match_duplicates;
|
|
/* At what depth to use fuzzy matching. */
|
|
unsigned fuzzy_match_depth;
|
|
/* Whether to check the tags for matches. */
|
|
Boolean check_tags;
|
|
/* Whether to add ECO codes. */
|
|
Boolean add_ECO;
|
|
/* Whether an ECO file is currently being parsed. */
|
|
Boolean parsing_ECO_file;
|
|
|
|
/* Which level to divide the output. */
|
|
EcoDivision ECO_level;
|
|
/* What form to write the output in. */
|
|
OutputFormat output_format;
|
|
/* Maximum output line length. */
|
|
unsigned max_line_length;
|
|
|
|
/* Whether to use a virtual hash table or not. */
|
|
Boolean use_virtual_hash_table;
|
|
/* Whether to match on the number of moves in a game. */
|
|
Boolean check_move_bounds;
|
|
/* Whether to match only games ending in checkmate. */
|
|
Boolean match_only_checkmate;
|
|
/* Whether to match only games ending in stalemate. */
|
|
Boolean match_only_stalemate;
|
|
/* Whether to output move numbers in the output. */
|
|
Boolean keep_move_numbers;
|
|
/* Whether to output results in the output. */
|
|
Boolean keep_results;
|
|
/* Whether to keep check and mate characters in the output. */
|
|
Boolean keep_checks;
|
|
/* Whether to output an evaluation value after each move. */
|
|
Boolean output_evaluation;
|
|
/* Whether to keep games which have incorrect moves. */
|
|
Boolean keep_broken_games;
|
|
/* Whether to suppress irrelevant ep info in EPD and FEN output. */
|
|
Boolean suppress_redundant_ep_info;
|
|
/* Whether the output should be in JSON format. */
|
|
Boolean json_format;
|
|
/* Whether tag matches can be made other than at the start of the tag. */
|
|
Boolean tag_match_anywhere;
|
|
/* Whether to match only games involving underpromotion. */
|
|
Boolean match_underpromotion;
|
|
|
|
/* Maximum ply depth to search for positional variations (-x).
|
|
* This is picked up from the length of variations in the positional
|
|
* variations file.
|
|
* Also set by the --maxply argument.
|
|
*/
|
|
unsigned depth_of_positional_search;
|
|
/* Number of games processed so far. */
|
|
unsigned long num_games_processed;
|
|
/* Number of games matched so far. */
|
|
unsigned long num_games_matched;
|
|
/* How many games to store in each file. */
|
|
unsigned games_per_file;
|
|
/* Which is the next file number. */
|
|
unsigned next_file_number;
|
|
/* Lower and upper bounds for moves if check_move_bounds.
|
|
* From v17-33 these values are ply rather than moves.
|
|
*/
|
|
unsigned lower_move_bound, upper_move_bound;
|
|
/* Limit to the number of plies to appear in the output. */
|
|
int output_ply_limit;
|
|
/* How quiescent the game needs to be for it to be output. */
|
|
unsigned quiescence_threshold;
|
|
/* First game number to process. */
|
|
unsigned long first_game_number;
|
|
/* Last game number to process. */
|
|
unsigned long game_limit;
|
|
/* Maximum number to output (maximum_matches > 0) */
|
|
unsigned long maximum_matches;
|
|
/* Number of ply to drop at the start (+ve) or end (-ve)
|
|
* of the game.
|
|
*/
|
|
int drop_ply_number;
|
|
/* Starting ply for looking for matches. */
|
|
unsigned startply;
|
|
/* Which type of repetition to check for if > 0 (3 or 5). */
|
|
unsigned check_for_repetition;
|
|
/* Check for N-move draw games if check > 0. */
|
|
unsigned check_for_N_move_rule;
|
|
|
|
/* Whether to output a FEN string. Either at the end of the game
|
|
* or replacing a matching comment (see FEN_comment_pattern). */
|
|
Boolean output_FEN_string;
|
|
/* Whether to add a FEN comment after every move. */
|
|
Boolean add_FEN_comments;
|
|
/* Whether to add a hashcode comment after every move. */
|
|
Boolean add_hashcode_comments;
|
|
/* Whether to add a 'matching position' comment. */
|
|
Boolean add_position_match_comments;
|
|
/* Whether to include a PlyCount tag. */
|
|
Boolean output_plycount;
|
|
/* Whether to include a tag with the total ply count of the game. */
|
|
Boolean output_total_plycount;
|
|
/* Whether to add a HashCode tag. */
|
|
Boolean add_hashcode_tag;
|
|
/* Whether to fix a Result tag that does not match the game outcome. */
|
|
Boolean fix_result_tags;
|
|
/* Whether to attempt to fix broken tag strings. */
|
|
Boolean fix_tag_strings;
|
|
/* Whether to add assumed castling rights to a FEN tag that does not include them. */
|
|
Boolean add_fen_castling;
|
|
/* Whether comments should appear on separate lines. */
|
|
Boolean separate_comment_lines;
|
|
/* Whether to output each variation as a separate game. */
|
|
Boolean split_variants;
|
|
/* Whether to reject games with inconsistent result indications. */
|
|
Boolean reject_inconsistent_results;
|
|
/* Whether to allow NULL moves in the main line. */
|
|
Boolean allow_null_moves;
|
|
/* Whether to allow nested comments. */
|
|
Boolean allow_nested_comments;
|
|
/* Whether to add a MaterialMatch tag with -z. */
|
|
Boolean add_match_tag;
|
|
/* Whether to add a MatchLabel tag with FENPattern */
|
|
Boolean add_matchlabel_tag;
|
|
/* Whether to only output tags that are explicitly wanted; e.g.,
|
|
* via -7 or -R.
|
|
*/
|
|
Boolean only_output_wanted_tags;
|
|
/* Delete games with the same starting position as any seen before. */
|
|
Boolean delete_same_setup;
|
|
/* Move comments at the start of a variation to after the first move
|
|
* of the variation.
|
|
* This is a lichess-specific fix as it otherwise deletes prefix
|
|
* comments in variations..
|
|
*/
|
|
Boolean lichess_comment_fix;
|
|
|
|
/* The depth limit for splitting variations.
|
|
* 0 => no limit.
|
|
*/
|
|
unsigned split_depth_limit;
|
|
/* Whether this is a CHECKFILE or a NORMALFILE. */
|
|
SourceFileType current_file_type;
|
|
/* Whether SETUP_TAGs are ok in extracted games. */
|
|
SetupOutputStatus setup_status;
|
|
/* For positional matches, whether the player to move matters. */
|
|
WhoseMove whose_move;
|
|
|
|
/* The comment to use for position matches, if required. */
|
|
const char *position_match_comment;
|
|
/* The comment pattern to match for FEN comments (see output_FEN_string) */
|
|
const char *FEN_comment_pattern;
|
|
/* The comment pattern to match for dropping ply */
|
|
const char *drop_comment_pattern;
|
|
/* The comment marker to use for input line numbers, if required. */
|
|
const char *line_number_marker;
|
|
/* Current input file name. */
|
|
const char *current_input_file;
|
|
/* File of ECO lines. */
|
|
const char *eco_file;
|
|
/* Where to write the extracted games. */
|
|
FILE *outputfile;
|
|
/* Output file name. */
|
|
const char *output_filename;
|
|
/* Where to write errors and running commentary. */
|
|
FILE *logfile;
|
|
/* Where to write duplicate games. */
|
|
FILE *duplicate_file;
|
|
/* Where to write games that don't match the criteria. */
|
|
FILE *non_matching_file;
|
|
/* Which game numbers to output (matching_game_numbers != NULL) */
|
|
game_number *matching_game_numbers;
|
|
/* Which game number to output next (matching_game_numbers != NULL) */
|
|
game_number *next_game_number_to_output;
|
|
/* Which game numbers to skip (skip_game_numbers != NULL) */
|
|
game_number *skip_game_numbers;
|
|
/* Which game number to skip next (skip_game_numbers != NULL) */
|
|
game_number *next_game_number_to_skip;
|
|
} StateInfo;
|
|
|
|
/* Provide access to the global state that has been set
|
|
* through command line arguments.
|
|
*/
|
|
extern StateInfo GlobalState;
|
|
FILE *must_open_file(const char *filename,const char *mode);
|
|
|
|
#endif // TYPEDEF_H
|
|
|