1683 lines
52 KiB
C
1683 lines
52 KiB
C
/*
|
|
* This file is part of pgn-extract: a Portable Game Notation (PGN) extractor.
|
|
* Copyright (C) 1994-2022 David J. Barnes
|
|
*
|
|
* pgn-extract is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* pgn-extract is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with pgn-extract. If not, see <http://www.gnu.org/licenses/>.
|
|
*
|
|
* David J. Barnes may be contacted as d.j.barnes@kent.ac.uk
|
|
* https://www.cs.kent.ac.uk/people/staff/djb/
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <ctype.h>
|
|
#if defined(__BORLANDC__) || defined(_MSC_VER)
|
|
#include <io.h>
|
|
#ifndef R_OK
|
|
#define R_OK 0
|
|
#endif
|
|
#else
|
|
#include <unistd.h>
|
|
#endif
|
|
#include "bool.h"
|
|
#include "mymalloc.h"
|
|
#include "defs.h"
|
|
#include "typedef.h"
|
|
#include "tokens.h"
|
|
#include "taglist.h"
|
|
#include "lex.h"
|
|
#include "moves.h"
|
|
#include "lists.h"
|
|
#include "decode.h"
|
|
#include "lines.h"
|
|
#include "grammar.h"
|
|
#include "apply.h"
|
|
#include "output.h"
|
|
|
|
/* Prototypes for the functions in this file. */
|
|
static Boolean extract_yytext(const unsigned char *symbol_start,
|
|
const unsigned char *linep);
|
|
static int identify_tag(const char *tag_string);
|
|
static TagName make_new_tag(const char *tag);
|
|
static Boolean open_input(const char *infile);
|
|
static Boolean open_input_file(int file_number);
|
|
/* When a move is saved, what is known of its source and destination coordinates
|
|
* should also be saved.
|
|
*/
|
|
static void save_k_castle(void);
|
|
static void save_move(const unsigned char *move);
|
|
static void save_q_castle(void);
|
|
static void save_string(const char *result);
|
|
static void terminate_input(void);
|
|
|
|
static unsigned long line_number = 0;
|
|
/* Keep track of the Recursive Annotation Variation level. */
|
|
static unsigned RAV_level = 0;
|
|
/* Keep track of the last move found. */
|
|
static unsigned char last_move[MAX_MOVE_LEN + 1];
|
|
/* How many games we have extracted from this file. */
|
|
static unsigned games_in_file = 0;
|
|
|
|
/* Provide an input file pointer.
|
|
* This is intialised in init_lex_tables.
|
|
*/
|
|
static FILE *yyin = NULL;
|
|
|
|
/* Define space for holding matched tokens. */
|
|
#define MAX_YYTEXT 100
|
|
static unsigned char yytext[MAX_YYTEXT + 1];
|
|
YYSTYPE yylval;
|
|
|
|
#define MAX_CHAR 256
|
|
#define ALPHA_DIST ('a'-'A')
|
|
/* Table of symbol classifications. */
|
|
static TokenType ChTab[MAX_CHAR];
|
|
/* A boolean array as to whether a character is allowed in a move or not. */
|
|
static short MoveChars[MAX_CHAR];
|
|
|
|
/* Define a table to hold the list of tag strings.
|
|
* This is initialised in init_list_of_known_tags().
|
|
* As new tags are encountered, the list is expanded,
|
|
* and tag_list_length increased.
|
|
*/
|
|
static const char **TagList;
|
|
static unsigned tag_list_length = 0;
|
|
/* Which tags, if any, are to be suppressed in the output.
|
|
* The indices are the same as for TagList.
|
|
*/
|
|
static Boolean *suppressed_tags;
|
|
/* Nested comment depth: GlobalState.allow_nested_comments. */
|
|
static unsigned comment_depth = 0;
|
|
|
|
/* Initialise the TagList. This should be stored in alphabetical order,
|
|
* by virtue of the order in which the _TAG values are defined.
|
|
*/
|
|
static void
|
|
init_list_of_known_tags(void)
|
|
{
|
|
unsigned i;
|
|
tag_list_length = ORIGINAL_NUMBER_OF_TAGS;
|
|
TagList = (const char **) malloc_or_die(tag_list_length * sizeof (*TagList));
|
|
/* FALSE by default. */
|
|
suppressed_tags = (Boolean *) malloc_or_die(tag_list_length * sizeof(*suppressed_tags));
|
|
/* Be paranoid and put a string in every entry. */
|
|
for (i = 0; i < tag_list_length; i++) {
|
|
TagList[i] = "";
|
|
suppressed_tags[i] = FALSE;
|
|
}
|
|
TagList[ANNOTATOR_TAG] = "Annotator";
|
|
TagList[BLACK_TAG] = "Black";
|
|
TagList[BLACK_ELO_TAG] = "BlackElo";
|
|
TagList[BLACK_NA_TAG] = "BlackNA";
|
|
TagList[BLACK_TITLE_TAG] = "BlackTitle";
|
|
TagList[BLACK_TYPE_TAG] = "BlackType";
|
|
TagList[BLACK_USCF_TAG] = "BlackUSCF";
|
|
TagList[BOARD_TAG] = "Board";
|
|
TagList[DATE_TAG] = "Date";
|
|
TagList[ECO_TAG] = "ECO";
|
|
TagList[PSEUDO_ELO_TAG] = "Elo";
|
|
TagList[EVENT_TAG] = "Event";
|
|
TagList[EVENT_DATE_TAG] = "EventDate";
|
|
TagList[EVENT_SPONSOR_TAG] = "EventSponsor";
|
|
TagList[FEN_TAG] = "FEN";
|
|
TagList[PSEUDO_FEN_PATTERN_TAG] = "FENPattern";
|
|
TagList[PSEUDO_FEN_PATTERN_I_TAG] = "FENPatternI";
|
|
TagList[HASHCODE_TAG] = "HashCode";
|
|
TagList[LONG_ECO_TAG] = "LongECO";
|
|
TagList[MATCHLABEL_TAG] = "MatchLabel";
|
|
TagList[MATERIAL_MATCH_TAG] = "MaterialMatch";
|
|
TagList[MODE_TAG] = "Mode";
|
|
TagList[NIC_TAG] = "NIC";
|
|
TagList[OPENING_TAG] = "Opening";
|
|
TagList[PSEUDO_PLAYER_TAG] = "Player";
|
|
TagList[PLY_COUNT_TAG] = "PlyCount";
|
|
TagList[RESULT_TAG] = "Result";
|
|
TagList[ROUND_TAG] = "Round";
|
|
TagList[SECTION_TAG] = "Section";
|
|
TagList[SETUP_TAG] = "SetUp";
|
|
TagList[SITE_TAG] = "Site";
|
|
TagList[STAGE_TAG] = "Stage";
|
|
TagList[SUB_VARIATION_TAG] = "SubVariation";
|
|
TagList[TERMINATION_TAG] = "Termination";
|
|
TagList[TIME_TAG] = "Time";
|
|
TagList[TIME_CONTROL_TAG] = "TimeControl";
|
|
TagList[TOTAL_PLY_COUNT_TAG] = "TotalPlyCount";
|
|
TagList[UTC_DATE_TAG] = "UTCDate";
|
|
TagList[UTC_TIME_TAG] = "UTCTime";
|
|
TagList[VARIANT_TAG] = "Variant";
|
|
TagList[VARIATION_TAG] = "Variation";
|
|
TagList[WHITE_TAG] = "White";
|
|
TagList[WHITE_ELO_TAG] = "WhiteElo";
|
|
TagList[WHITE_NA_TAG] = "WhiteNA";
|
|
TagList[WHITE_TITLE_TAG] = "WhiteTitle";
|
|
TagList[WHITE_TYPE_TAG] = "WhiteType";
|
|
TagList[WHITE_USCF_TAG] = "WhiteUSCF";
|
|
}
|
|
|
|
/* Extend TagList to accomodate a new tag string.
|
|
* Return the current value of tag_list_length as its
|
|
* index, having incremented its value.
|
|
*/
|
|
static TagName
|
|
make_new_tag(const char *tag)
|
|
{
|
|
unsigned tag_index = tag_list_length;
|
|
tag_list_length++;
|
|
TagList = (const char **) realloc_or_die((void *) TagList,
|
|
tag_list_length * sizeof (*TagList));
|
|
suppressed_tags = (Boolean *) realloc_or_die(
|
|
(void *) suppressed_tags,
|
|
tag_list_length * sizeof(*suppressed_tags));
|
|
TagList[tag_index] = copy_string(tag);
|
|
suppressed_tags[tag_index] = FALSE;
|
|
/* Ensure that the game header's tags array can accommodate
|
|
* the new tag.
|
|
*/
|
|
increase_game_header_tags_length(tag_list_length);
|
|
return tag_index;
|
|
}
|
|
|
|
const char *
|
|
tag_header_string(TagName tag)
|
|
{
|
|
if (tag < tag_list_length) {
|
|
return TagList[tag];
|
|
}
|
|
else {
|
|
fprintf(GlobalState.logfile, "Internal error in tag_header_string(%d)\n",
|
|
tag);
|
|
exit(1);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
Boolean
|
|
is_suppressed_tag(TagName tag)
|
|
{
|
|
if (tag < tag_list_length) {
|
|
return suppressed_tags[tag];
|
|
}
|
|
else {
|
|
fprintf(GlobalState.logfile, "Internal error in is_suppressed_tag(%d)\n",
|
|
tag);
|
|
exit(1);
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
/* Don't include the given tag on output. */
|
|
void
|
|
suppress_tag(const char *tag_string)
|
|
{
|
|
int tag_item = identify_tag(tag_string);
|
|
if (tag_item < 0) {
|
|
tag_item = make_new_tag(tag_string);
|
|
}
|
|
suppressed_tags[tag_item] = TRUE;
|
|
}
|
|
|
|
/* Initialise ChTab[], the classification of the initial characters
|
|
* of symbols.
|
|
* Initialise MoveChars, the classification of secondary characters
|
|
* of moves.
|
|
*/
|
|
void
|
|
init_lex_tables(void)
|
|
{
|
|
int i;
|
|
|
|
/* Assume standard input will be used, until we know otherwise. */
|
|
yyin = stdin;
|
|
init_list_of_known_tags();
|
|
/* Initialise ChTab[]. */
|
|
for (i = 0; i < MAX_CHAR; i++) {
|
|
ChTab[i] = ERROR_TOKEN;
|
|
}
|
|
ChTab[' '] = WHITESPACE;
|
|
ChTab['\t'] = WHITESPACE;
|
|
ChTab['\r'] = WHITESPACE;
|
|
ChTab['['] = TAG_START;
|
|
ChTab[']'] = TAG_END;
|
|
ChTab['"'] = DOUBLE_QUOTE;
|
|
ChTab['{'] = COMMENT_START;
|
|
ChTab['}'] = COMMENT_END;
|
|
ChTab['$'] = NAG;
|
|
ChTab['!'] = ANNOTATE;
|
|
ChTab['?'] = ANNOTATE;
|
|
ChTab['+'] = CHECK_SYMBOL;
|
|
ChTab['#'] = CHECK_SYMBOL;
|
|
ChTab['.'] = DOT;
|
|
ChTab['('] = RAV_START;
|
|
ChTab[')'] = RAV_END;
|
|
ChTab['%'] = PERCENT;
|
|
ChTab[';'] = SEMICOLON;
|
|
ChTab['\\'] = ESCAPE;
|
|
ChTab['\0'] = EOS;
|
|
ChTab['*'] = STAR;
|
|
ChTab['-'] = DASH;
|
|
ChTab['/'] = SLASH;
|
|
|
|
/* Operators allowed only in the tag file. */
|
|
ChTab['<'] = OPERATOR;
|
|
ChTab['>'] = OPERATOR;
|
|
ChTab['='] = OPERATOR; /* Overloaded in MoveChars. */
|
|
|
|
for (i = '0'; i <= '9'; i++) {
|
|
ChTab[i] = DIGIT;
|
|
}
|
|
for (i = 'A'; i <= 'Z'; i++) {
|
|
ChTab[i] = ALPHA;
|
|
ChTab[i + ALPHA_DIST] = ALPHA;
|
|
}
|
|
ChTab['_'] = ALPHA;
|
|
|
|
/* Classify the Russian piece letters as ALPHA. */
|
|
ChTab[RUSSIAN_KNIGHT_OR_KING] = ALPHA; /* King and Knight. */
|
|
ChTab[RUSSIAN_KING_SECOND_LETTER] = ALPHA; /* King (second character). */
|
|
ChTab[RUSSIAN_QUEEN] = ALPHA; /* Queen. */
|
|
ChTab[RUSSIAN_ROOK] = ALPHA; /* Rook. */
|
|
ChTab[RUSSIAN_BISHOP] = ALPHA; /* Bishop. */
|
|
|
|
/* Initialise MoveChars[]. */
|
|
for (i = 0; i < MAX_CHAR; i++) {
|
|
MoveChars[i] = 0;
|
|
}
|
|
/* Files. */
|
|
for (i = 'a'; i <= 'h'; i++) {
|
|
MoveChars[i] = 1;
|
|
}
|
|
/* Ranks. */
|
|
for (i = '1'; i <= '8'; i++) {
|
|
MoveChars[i] = 1;
|
|
}
|
|
/* Upper-case pieces. */
|
|
MoveChars['K'] = 1;
|
|
MoveChars['Q'] = 1;
|
|
MoveChars['R'] = 1;
|
|
MoveChars['N'] = 1;
|
|
MoveChars['B'] = 1;
|
|
/* Lower-case pieces. */
|
|
MoveChars['k'] = 1;
|
|
MoveChars['q'] = 1;
|
|
MoveChars['r'] = 1;
|
|
MoveChars['n'] = 1;
|
|
MoveChars['b'] = 1;
|
|
/* Other u-c Dutch/German characters. */
|
|
MoveChars['D'] = 1; /* Queen. */
|
|
MoveChars['T'] = 1; /* Rook. */
|
|
MoveChars['S'] = 1; /* Knight. */
|
|
MoveChars['P'] = 1; /* Knight. */
|
|
MoveChars['L'] = 1; /* Bishop. */
|
|
/* Russian characters. */
|
|
MoveChars[RUSSIAN_KNIGHT_OR_KING] = 1; /* King and Knight. */
|
|
MoveChars[RUSSIAN_KING_SECOND_LETTER] = 1; /* King (second character). */
|
|
MoveChars[RUSSIAN_QUEEN] = 1; /* Queen. */
|
|
MoveChars[RUSSIAN_ROOK] = 1; /* Rook. */
|
|
MoveChars[RUSSIAN_BISHOP] = 1; /* Bishop. */
|
|
|
|
/* Capture and square separators. */
|
|
MoveChars['x'] = 1;
|
|
MoveChars['X'] = 1;
|
|
MoveChars[':'] = 1;
|
|
MoveChars['-'] = 1;
|
|
/* Promotion character. */
|
|
MoveChars['='] = 1;
|
|
/* Castling. */
|
|
MoveChars['O'] = 1;
|
|
MoveChars['o'] = 1;
|
|
MoveChars['0'] = 1;
|
|
/* Allow a trailing p for ep. */
|
|
MoveChars['p'] = 1;
|
|
}
|
|
|
|
/* Starting from linep in line, gather up the string until
|
|
* the closing quote. Skip over the closing quote.
|
|
* NB: This token is only used for tags, which are notoriously
|
|
* error prone, so there is some code attempting recovery
|
|
* if requested.
|
|
*/
|
|
LinePair
|
|
gather_string(char *line, unsigned char *linep)
|
|
{
|
|
LinePair resulting_line;
|
|
char ch;
|
|
unsigned len = 0;
|
|
char *str;
|
|
Boolean end_of_string = FALSE;
|
|
|
|
do {
|
|
ch = *linep++;
|
|
len++;
|
|
if (ch == '\\') {
|
|
/* Escape the next character. */
|
|
ch = *linep++;
|
|
len++;
|
|
if(ch == '\0') {
|
|
fprintf(GlobalState.logfile, "Missing escaped character in string.\n");
|
|
print_error_context(GlobalState.logfile);
|
|
end_of_string = TRUE;
|
|
}
|
|
}
|
|
else if(ch == '"' || ch == '\0') {
|
|
end_of_string = TRUE;
|
|
}
|
|
else {
|
|
/* Ordinary character. */
|
|
}
|
|
} while (!end_of_string);
|
|
|
|
if(GlobalState.fix_tag_strings && ch == '"') {
|
|
/* Look for potentially badly formatted tag strings.
|
|
* Don't assume that the second double-quote character
|
|
* is the termination point.
|
|
*/
|
|
unsigned char *lookahead = linep;
|
|
Boolean malformed = FALSE;
|
|
while(*lookahead != '\0' && ChTab[*lookahead] != TAG_END) {
|
|
TokenType tt = ChTab[*lookahead];
|
|
if(tt != WHITESPACE) {
|
|
malformed = TRUE;
|
|
}
|
|
lookahead++;
|
|
}
|
|
if(malformed) {
|
|
fprintf(GlobalState.logfile, "Malformed tag string.\n");
|
|
print_error_context(GlobalState.logfile);
|
|
lookahead--;
|
|
while(lookahead > linep && ChTab[*lookahead] == WHITESPACE) {
|
|
lookahead--;
|
|
}
|
|
if(*lookahead == '"') {
|
|
/* Likely intended end of string. */
|
|
ch = *lookahead;
|
|
len += lookahead - linep;
|
|
linep = lookahead + 1;
|
|
}
|
|
else {
|
|
/* The closing quote appears to be missing. */
|
|
lookahead++;
|
|
ch = *lookahead;
|
|
len += lookahead - linep;
|
|
linep = lookahead;
|
|
}
|
|
/* Replace any previous closing double quotes with single quotes. */
|
|
str = (char *) malloc_or_die(len + 1);
|
|
unsigned char *p = linep - len - 1;
|
|
int i = 0;
|
|
while(p < linep - 1) {
|
|
if(*p == '"') {
|
|
str[i++] = '\'';
|
|
p++;
|
|
}
|
|
else if(*p == '\\') {
|
|
str[i++] = *p++;
|
|
str[i++] = *p++;
|
|
}
|
|
else {
|
|
str[i++] = *p++;
|
|
}
|
|
}
|
|
str[i] = '\0';
|
|
}
|
|
else {
|
|
/* The last one doesn't belong in the string. */
|
|
len--;
|
|
str = (char *) malloc_or_die(len + 1);
|
|
strncpy(str, (const char *) (linep - len - 1), len);
|
|
str[len] = '\0';
|
|
}
|
|
}
|
|
else {
|
|
/* The last one doesn't belong in the string. */
|
|
len--;
|
|
/* Allocate space for the result. */
|
|
str = (char *) malloc_or_die(len + 1);
|
|
strncpy(str, (const char *) (linep - len - 1), len);
|
|
str[len] = '\0';
|
|
}
|
|
/* Store it in yylval. */
|
|
yylval.token_string = str;
|
|
|
|
/* Make sure that the string was properly terminated, by
|
|
* looking at the last character examined.
|
|
*/
|
|
if (ch == '\0') {
|
|
/* Too far. */
|
|
if (!GlobalState.skipping_current_game) {
|
|
fprintf(GlobalState.logfile, "Missing closing quote in %s\n", line);
|
|
}
|
|
if (len > 1) {
|
|
/* Move back to the null. */
|
|
linep--;
|
|
str[len - 1] = '\0';
|
|
}
|
|
}
|
|
else {
|
|
/* We have already skipped over the closing quote. */
|
|
}
|
|
resulting_line.line = line;
|
|
resulting_line.linep = linep;
|
|
resulting_line.token = STRING;
|
|
return resulting_line;
|
|
}
|
|
|
|
/*
|
|
* Is ch of the given character class?
|
|
* External access to ChTab.
|
|
*/
|
|
Boolean
|
|
is_character_class(unsigned char ch, TokenType character_class)
|
|
{
|
|
return ChTab[ch] == character_class;
|
|
}
|
|
|
|
/* Starting from linep in line, gather up a comment until
|
|
* the END_COMMENT. Skip over the END_COMMENT.
|
|
*/
|
|
static LinePair
|
|
gather_comment(char *line, unsigned char *linep)
|
|
{
|
|
LinePair resulting_line;
|
|
char ch;
|
|
unsigned len = 0;
|
|
/* The string list in which the current comment will be gathered. */
|
|
StringList *current_comment = NULL;
|
|
/* The pointer to be returned. */
|
|
CommentList *comment;
|
|
|
|
/* GlobalState.allow_nested_comments. */
|
|
comment_depth++;
|
|
|
|
do {
|
|
/* Restart a new segment. */
|
|
len = 0;
|
|
do {
|
|
ch = *linep++;
|
|
len++;
|
|
if(ch == '{') {
|
|
if(GlobalState.allow_nested_comments) {
|
|
comment_depth++;
|
|
}
|
|
}
|
|
else if(ch == '}') {
|
|
if(GlobalState.allow_nested_comments) {
|
|
if(comment_depth > 1) {
|
|
comment_depth--;
|
|
/* Prevent this terminating the outer level. */
|
|
ch = ' ';
|
|
}
|
|
}
|
|
}
|
|
else {
|
|
/* No further action. */
|
|
}
|
|
} while ((ch != '}') && (ch != '\0'));
|
|
if(ch == '}') {
|
|
comment_depth--;
|
|
}
|
|
/* The last character doesn't belong in the comment. */
|
|
len--;
|
|
if (GlobalState.keep_comments) {
|
|
char *comment_str;
|
|
|
|
unsigned const char *str = linep - len - 1;
|
|
int numchars = len;
|
|
/* Trim spaces from the end.*/
|
|
int end = numchars - 1;
|
|
while(end >= 0 && str[end] == ' ') {
|
|
end--;
|
|
}
|
|
end++;
|
|
/* Trim spaces from the start. */
|
|
int start = 0;
|
|
while(start < end && str[start] == ' ') {
|
|
start++;
|
|
}
|
|
/* Allocate space for the result. */
|
|
comment_str = (char *) malloc_or_die(end - start + 1);
|
|
strncpy(comment_str, (const char *) (str + start), end - start);
|
|
comment_str[end - start] = '\0';
|
|
current_comment = save_string_list_item(current_comment, comment_str);
|
|
}
|
|
if (ch == '\0') {
|
|
line = next_input_line(yyin);
|
|
linep = (unsigned char *) line;
|
|
}
|
|
} while ((ch != '}') && (line != NULL));
|
|
if(comment_depth > 0) {
|
|
fprintf(GlobalState.logfile, "Missing end of a nested comment.\n");
|
|
report_details(GlobalState.logfile);
|
|
}
|
|
|
|
/* Set up the structure to be returned. */
|
|
comment = (CommentList *) malloc_or_die(sizeof (*comment));
|
|
comment->comment = current_comment;
|
|
comment->next = NULL;
|
|
yylval.comment = comment;
|
|
|
|
resulting_line.line = line;
|
|
resulting_line.linep = linep;
|
|
resulting_line.token = COMMENT;
|
|
return resulting_line;
|
|
}
|
|
|
|
/* Starting from linep in line, gather up a comment until
|
|
* the END_COMMENT. Skip over the END_COMMENT.
|
|
*/
|
|
static LinePair
|
|
gather_single_line_comment(char *line, unsigned char *linep)
|
|
{
|
|
LinePair resulting_line;
|
|
|
|
if (GlobalState.keep_comments) {
|
|
/* The string list in which the current comment will be gathered. */
|
|
StringList *current_comment = NULL;
|
|
/* The pointer to be returned. */
|
|
CommentList *comment;
|
|
char *comment_str;
|
|
int numchars = strlen(line) - (linep - (unsigned char *) line);
|
|
unsigned const char *str = linep;
|
|
|
|
/* Trim spaces from the end.*/
|
|
int end = numchars - 1;
|
|
while(end >= 0 && str[end] == ' ') {
|
|
end--;
|
|
}
|
|
end++;
|
|
/* Trim spaces from the start. */
|
|
int start = 0;
|
|
while(start < end && str[start] == ' ') {
|
|
start++;
|
|
}
|
|
|
|
/* Allocate space for the result. */
|
|
comment_str = (char *) malloc_or_die(end - start + 1);
|
|
/* NB: Single-line comments are currently converted to multi-line
|
|
* comment format.
|
|
* On the off-chance that one might contain a curly bracket, 'escape'
|
|
* those characters by replacing with square brackets.
|
|
*/
|
|
char *cp = comment_str;
|
|
for(int i = start; i < end; i++) {
|
|
char ch = str[i];
|
|
if(ch == '{') {
|
|
ch = '[';
|
|
}
|
|
else if(ch == '}') {
|
|
ch = ']';
|
|
}
|
|
*cp++ = ch;
|
|
}
|
|
*cp = '\0';
|
|
current_comment = save_string_list_item(current_comment, comment_str);
|
|
|
|
/* Set up the comment structure to be returned. */
|
|
comment = (CommentList *) malloc_or_die(sizeof (*comment));
|
|
comment->comment = current_comment;
|
|
comment->next = NULL;
|
|
yylval.comment = comment;
|
|
resulting_line.token = COMMENT;
|
|
}
|
|
else {
|
|
resulting_line.token = NO_TOKEN;
|
|
}
|
|
|
|
resulting_line.line = next_input_line(yyin);
|
|
resulting_line.linep = (unsigned char *) resulting_line.line;
|
|
return resulting_line;
|
|
}
|
|
|
|
/* Remember that 0 can start 0-1 and 0-0.
|
|
* Remember that 1 can start 1-0 and 1/2.
|
|
*/
|
|
static LinePair
|
|
gather_possible_numeric(char *line, unsigned char *linep, char initial_digit)
|
|
{
|
|
LinePair resulting_line;
|
|
TokenType token = MOVE_NUMBER;
|
|
/* Keep a record of where this token started. */
|
|
const unsigned char *symbol_start = linep - 1;
|
|
|
|
if (initial_digit == '0') {
|
|
/* Could be castling or a result. */
|
|
if (strncmp((const char *) linep, "-1", 2) == 0) {
|
|
token = TERMINATING_RESULT;
|
|
save_string("0-1");
|
|
linep += 2;
|
|
}
|
|
else if (strncmp((const char *) linep, "-0-0", 4) == 0) {
|
|
token = MOVE;
|
|
save_q_castle();
|
|
linep += 4;
|
|
}
|
|
else if (strncmp((const char *) linep, "-0", 2) == 0) {
|
|
token = MOVE;
|
|
save_k_castle();
|
|
linep += 2;
|
|
}
|
|
else {
|
|
/* MOVE_NUMBER */
|
|
}
|
|
}
|
|
else if (initial_digit == '1') {
|
|
if (strncmp((const char *) linep, "-0", 2) == 0) {
|
|
token = TERMINATING_RESULT;
|
|
save_string("1-0");
|
|
linep += 2;
|
|
}
|
|
else if (strncmp((const char *) linep, "/2", 2) == 0) {
|
|
token = TERMINATING_RESULT;
|
|
linep += 2;
|
|
/* Check for the full form. */
|
|
if (strncmp((const char *) linep, "-1/2", 4) == 0) {
|
|
token = TERMINATING_RESULT;
|
|
linep += 4;
|
|
}
|
|
/* Make sure that the full form of the draw result
|
|
* is saved.
|
|
*/
|
|
save_string("1/2-1/2");
|
|
}
|
|
else {
|
|
/* MOVE_NUMBER */
|
|
}
|
|
}
|
|
else {
|
|
/* MOVE_NUMBER */
|
|
}
|
|
if (token == MOVE_NUMBER) {
|
|
/* Gather the remaining digits. */
|
|
while (isdigit((unsigned) *linep)) {
|
|
linep++;
|
|
}
|
|
}
|
|
if (token == MOVE_NUMBER) {
|
|
/* Fill out the fields of yylval. */
|
|
if (extract_yytext(symbol_start, linep)) {
|
|
yylval.move_number = 0;
|
|
(void) sscanf((const char *) yytext, "%u", &yylval.move_number);
|
|
/* Skip any trailing dots. */
|
|
while (*linep == '.') {
|
|
linep++;
|
|
}
|
|
}
|
|
else {
|
|
token = NO_TOKEN;
|
|
}
|
|
}
|
|
else {
|
|
/* TERMINATING_RESULT and MOVE have already been dealt with. */
|
|
}
|
|
resulting_line.line = line;
|
|
resulting_line.linep = linep;
|
|
resulting_line.token = token;
|
|
return resulting_line;
|
|
}
|
|
|
|
/* Look up tag_string in TagList[] and return its _TAG
|
|
* value or -1 if it isn't there.
|
|
* Although the strings are sorted initially, further
|
|
* tags identified in the source files will be appended
|
|
* without further sorting. So we cannot use a binary
|
|
* search on the list.
|
|
*/
|
|
static int
|
|
identify_tag(const char *tag_string)
|
|
{
|
|
unsigned tag_index;
|
|
|
|
for (tag_index = 0; tag_index < tag_list_length; tag_index++) {
|
|
if (strcmp(tag_string, TagList[tag_index]) == 0) {
|
|
return tag_index;
|
|
}
|
|
}
|
|
/* Not found. */
|
|
return -1;
|
|
}
|
|
|
|
/* Starting from linep in line, gather up the tag name.
|
|
* Skip over any preceding white space.
|
|
*/
|
|
LinePair
|
|
gather_tag(char *line, unsigned char *linep)
|
|
{
|
|
LinePair resulting_line;
|
|
char ch;
|
|
unsigned len = 0;
|
|
|
|
do {
|
|
/* Check for end of line while skipping white space. */
|
|
if (*linep == '\0') {
|
|
line = next_input_line(yyin);
|
|
linep = (unsigned char *) line;
|
|
}
|
|
if (line != NULL) {
|
|
while (ChTab[(unsigned) *linep] == WHITESPACE) {
|
|
linep++;
|
|
}
|
|
}
|
|
} while ((line != NULL) && (ChTab[(unsigned) *linep] == '\0'));
|
|
|
|
if (line != NULL) {
|
|
ch = *linep++;
|
|
while (isalpha((unsigned) ch) || isdigit((unsigned) ch) || (ch == '_')) {
|
|
len++;
|
|
ch = *linep++;
|
|
}
|
|
/* The last one wasn't part of the tag. */
|
|
linep--;
|
|
if (len > 0) {
|
|
int tag_item;
|
|
char *tag_string;
|
|
|
|
/* Allocate space for the result. */
|
|
tag_string = (char *) malloc_or_die(len + 1);
|
|
strncpy((char *) tag_string, (const char *) (linep - len), len);
|
|
tag_string[len] = '\0';
|
|
tag_item = identify_tag(tag_string);
|
|
if (tag_item < 0) {
|
|
tag_item = make_new_tag(tag_string);
|
|
}
|
|
if (tag_item >= 0 && ((unsigned) tag_item) < tag_list_length) {
|
|
yylval.tag_index = tag_item;
|
|
resulting_line.token = TAG;
|
|
(void) free((void *) tag_string);
|
|
}
|
|
else {
|
|
fprintf(GlobalState.logfile,
|
|
"Internal error: invalid tag index %d in gather_tag.\n",
|
|
tag_item);
|
|
exit(1);
|
|
}
|
|
}
|
|
else {
|
|
resulting_line.token = NO_TOKEN;
|
|
}
|
|
}
|
|
else {
|
|
resulting_line.token = NO_TOKEN;
|
|
}
|
|
resulting_line.line = line;
|
|
resulting_line.linep = linep;
|
|
return resulting_line;
|
|
}
|
|
|
|
static Boolean
|
|
extract_yytext(const unsigned char *symbol_start, const unsigned char *linep)
|
|
{ /* Whether the string fitted. */
|
|
Boolean Ok = TRUE;
|
|
long len = linep - symbol_start;
|
|
|
|
if (len < MAX_YYTEXT) {
|
|
strncpy((char *) yytext, (const char *) symbol_start, len);
|
|
yytext[len] = '\0';
|
|
}
|
|
else {
|
|
strncpy((char *) yytext, (const char *) symbol_start, MAX_YYTEXT);
|
|
yytext[MAX_YYTEXT] = '\0';
|
|
if (!GlobalState.skipping_current_game)
|
|
fprintf(GlobalState.logfile, "Symbol %s exceeds length of %u.\n",
|
|
yytext, MAX_YYTEXT);
|
|
Ok = FALSE;
|
|
}
|
|
return Ok;
|
|
}
|
|
|
|
/* Identify the next symbol.
|
|
* Don't take any action on EOF -- leave that to next_token.
|
|
*/
|
|
static TokenType
|
|
get_next_symbol(void)
|
|
{
|
|
static char *line = NULL;
|
|
static unsigned char *linep = NULL;
|
|
/* The token to be returned. */
|
|
TokenType token;
|
|
LinePair resulting_line;
|
|
|
|
do {
|
|
/* Remember where in line the current symbol starts. */
|
|
const unsigned char *symbol_start;
|
|
|
|
/* Clear any remaining symbol. */
|
|
*yytext = '\0';
|
|
if (line == NULL) {
|
|
line = next_input_line(yyin);
|
|
linep = (unsigned char *) line;
|
|
if (line != NULL) {
|
|
token = NO_TOKEN;
|
|
}
|
|
else {
|
|
token = EOF_TOKEN;
|
|
}
|
|
}
|
|
else {
|
|
int next_char = *linep & 0x0ff;
|
|
|
|
/* Remember where we start. */
|
|
symbol_start = linep;
|
|
linep++;
|
|
token = ChTab[next_char];
|
|
|
|
switch (token) {
|
|
case WHITESPACE:
|
|
while (ChTab[(unsigned) *linep] == WHITESPACE)
|
|
linep++;
|
|
token = NO_TOKEN;
|
|
break;
|
|
case TAG_START:
|
|
resulting_line = gather_tag(line, linep);
|
|
/* Pick up where we are now. */
|
|
line = resulting_line.line;
|
|
linep = resulting_line.linep;
|
|
token = resulting_line.token;
|
|
break;
|
|
case TAG_END:
|
|
token = NO_TOKEN;
|
|
break;
|
|
case DOUBLE_QUOTE:
|
|
resulting_line = gather_string(line, linep);
|
|
/* Pick up where we are now. */
|
|
line = resulting_line.line;
|
|
linep = resulting_line.linep;
|
|
token = resulting_line.token;
|
|
break;
|
|
case COMMENT_START:
|
|
resulting_line = gather_comment(line, linep);
|
|
/* Pick up where we are now. */
|
|
line = resulting_line.line;
|
|
linep = resulting_line.linep;
|
|
token = resulting_line.token;
|
|
break;
|
|
case COMMENT_END:
|
|
if (!GlobalState.skipping_current_game) {
|
|
fprintf(GlobalState.logfile, "Unmatched comment end on line %lu.\n", line_number);
|
|
}
|
|
token = NO_TOKEN;
|
|
break;
|
|
case NAG:
|
|
while (isdigit((unsigned) *linep)) {
|
|
linep++;
|
|
}
|
|
if (extract_yytext(symbol_start, linep)) {
|
|
save_string((const char *) yytext);
|
|
}
|
|
else {
|
|
token = NO_TOKEN;
|
|
}
|
|
break;
|
|
case ANNOTATE:
|
|
/* Don't return anything in case of error. */
|
|
token = NO_TOKEN;
|
|
while (ChTab[(unsigned) *linep] == ANNOTATE) {
|
|
linep++;
|
|
}
|
|
if (extract_yytext(symbol_start, linep)) {
|
|
switch (yytext[0]) {
|
|
case '!':
|
|
switch (yytext[1]) {
|
|
case '!':
|
|
save_string("$3");
|
|
break;
|
|
case '?':
|
|
save_string("$5");
|
|
break;
|
|
default:
|
|
save_string("$1");
|
|
break;
|
|
}
|
|
token = NAG;
|
|
break;
|
|
case '?':
|
|
switch (yytext[1]) {
|
|
case '!':
|
|
save_string("$6");
|
|
break;
|
|
case '?':
|
|
save_string("$4");
|
|
break;
|
|
default:
|
|
save_string("$2");
|
|
break;
|
|
}
|
|
token = NAG;
|
|
break;
|
|
}
|
|
}
|
|
break;
|
|
case CHECK_SYMBOL:
|
|
/* Allow ++ */
|
|
while (ChTab[(unsigned) *linep] == CHECK_SYMBOL) {
|
|
linep++;
|
|
}
|
|
break;
|
|
case DOT:
|
|
while (ChTab[(unsigned) *linep] == DOT)
|
|
linep++;
|
|
token = NO_TOKEN;
|
|
break;
|
|
case SEMICOLON:
|
|
resulting_line = gather_single_line_comment(line, linep);
|
|
/* Pick up where we are now. */
|
|
line = resulting_line.line;
|
|
linep = resulting_line.linep;
|
|
token = resulting_line.token;
|
|
break;
|
|
case PERCENT:
|
|
if(symbol_start == (const unsigned char *) line) {
|
|
/* Discard the rest of the line. */
|
|
line = next_input_line(yyin);
|
|
linep = (unsigned char *) line;
|
|
token = NO_TOKEN;
|
|
}
|
|
else {
|
|
/* Prior to v22-02 the position of % was not checked. */
|
|
}
|
|
break;
|
|
case ESCAPE:
|
|
/* @@@ What to do about this? */
|
|
if (*linep != '\0') {
|
|
linep++;
|
|
}
|
|
token = NO_TOKEN;
|
|
break;
|
|
case ALPHA:
|
|
/* Not all ALPHAs are move characters. */
|
|
if (MoveChars[next_char]) {
|
|
/* Scan through the possible move characters. */
|
|
while (MoveChars[*linep & 0x0ff]) {
|
|
linep++;
|
|
}
|
|
if (extract_yytext(symbol_start, linep)) {
|
|
/* Only classify it as a move if it
|
|
* seems to be a complete move.
|
|
*/
|
|
Boolean ok;
|
|
if (move_seems_valid(yytext)) {
|
|
save_move(yytext);
|
|
token = MOVE;
|
|
ok = TRUE;
|
|
}
|
|
else if(next_char == 'e') {
|
|
/* Consider for possible en passant notation. */
|
|
const int num_ep_strings = 2;
|
|
const char *ep[] = { "e.p.", "ep", };
|
|
int epi = 0;
|
|
while(epi < num_ep_strings &&
|
|
strncmp((const char *) symbol_start, ep[epi], strlen(ep[epi])) != 0) {
|
|
epi++;
|
|
}
|
|
if(epi < num_ep_strings) {
|
|
/* Accept. */
|
|
/* PGN has no representation for ep, so just accept without checking. */
|
|
ok = TRUE;
|
|
token = NO_TOKEN;
|
|
linep = ((unsigned char *) symbol_start) + strlen(ep[epi]);
|
|
}
|
|
else {
|
|
ok = FALSE;
|
|
}
|
|
}
|
|
else {
|
|
ok = FALSE;
|
|
}
|
|
if(! ok) {
|
|
if (!GlobalState.skipping_current_game) {
|
|
print_error_context(GlobalState.logfile);
|
|
fprintf(GlobalState.logfile,
|
|
"Unknown move text %s.\n", yytext);
|
|
}
|
|
token = NO_TOKEN;
|
|
}
|
|
}
|
|
else {
|
|
token = NO_TOKEN;
|
|
}
|
|
}
|
|
else if (next_char == 'Z' && *linep == '0') {
|
|
linep++;
|
|
save_move((const unsigned char *) NULL_MOVE_STRING);
|
|
token = MOVE;
|
|
}
|
|
else {
|
|
if (!GlobalState.skipping_current_game) {
|
|
print_error_context(GlobalState.logfile);
|
|
fprintf(GlobalState.logfile,
|
|
"Unknown character %c (Hex: %x).\n",
|
|
next_char, next_char);
|
|
fprintf(GlobalState.logfile, "%s\n", line);
|
|
unsigned pos = linep - (unsigned char *) line - 1;
|
|
for(unsigned i = 0; i < pos; i++) {
|
|
fputc(' ', GlobalState.logfile);
|
|
}
|
|
fputc('^', GlobalState.logfile);
|
|
fputc('\n', GlobalState.logfile);
|
|
}
|
|
/* Skip any sequence of them. */
|
|
while (ChTab[(unsigned) *linep] == ERROR_TOKEN) {
|
|
linep++;
|
|
}
|
|
}
|
|
break;
|
|
case DIGIT:
|
|
/* Remember that 0 can start 0-1 and 0-0.
|
|
* Remember that 1 can start 1-0 and 1/2.
|
|
*/
|
|
resulting_line = gather_possible_numeric(
|
|
line, linep, next_char);
|
|
/* Pick up where we are now. */
|
|
line = resulting_line.line;
|
|
linep = resulting_line.linep;
|
|
token = resulting_line.token;
|
|
break;
|
|
case EOF_TOKEN:
|
|
break;
|
|
case RAV_START:
|
|
RAV_level++;
|
|
break;
|
|
case RAV_END:
|
|
if (RAV_level > 0) {
|
|
RAV_level--;
|
|
}
|
|
else {
|
|
if (!GlobalState.skipping_current_game) {
|
|
print_error_context(GlobalState.logfile);
|
|
fprintf(GlobalState.logfile, "Too many ')' found.\n");
|
|
}
|
|
token = NO_TOKEN;
|
|
}
|
|
break;
|
|
case STAR:
|
|
save_string("*");
|
|
token = TERMINATING_RESULT;
|
|
break;
|
|
case DASH:
|
|
if (ChTab[(unsigned) *linep] == DASH) {
|
|
linep++;
|
|
save_move((const unsigned char *) NULL_MOVE_STRING);
|
|
token = MOVE;
|
|
}
|
|
else {
|
|
fprintf(GlobalState.logfile, "Single '-' not allowed.\n");
|
|
print_error_context(GlobalState.logfile);
|
|
token = NO_TOKEN;
|
|
}
|
|
break;
|
|
case SLASH:
|
|
/* Possible /ep annotation. */
|
|
if(linep[0] == 'e' && linep[1] == 'p') {
|
|
/* PGN has no representation for ep, so just accept without checking. */
|
|
linep += 2;
|
|
token = NO_TOKEN;
|
|
}
|
|
else {
|
|
token = NO_TOKEN;
|
|
if (!GlobalState.skipping_current_game) {
|
|
print_error_context(GlobalState.logfile);
|
|
fprintf(GlobalState.logfile,
|
|
"Single '/' not allowed.");
|
|
}
|
|
}
|
|
break;
|
|
case EOS:
|
|
/* End of the string. */
|
|
line = next_input_line(yyin);
|
|
linep = (unsigned char *) line;
|
|
token = NO_TOKEN;
|
|
break;
|
|
case ERROR_TOKEN:
|
|
if (!GlobalState.skipping_current_game) {
|
|
print_error_context(GlobalState.logfile);
|
|
fprintf(GlobalState.logfile,
|
|
"Unknown character %c (Hex: %x).\n",
|
|
next_char, next_char);
|
|
}
|
|
/* Skip any sequence of them. */
|
|
while (ChTab[(unsigned) *linep] == ERROR_TOKEN) {
|
|
linep++;
|
|
}
|
|
break;
|
|
case OPERATOR:
|
|
print_error_context(GlobalState.logfile);
|
|
fprintf(GlobalState.logfile,
|
|
"Operator in illegal context: %c.\n", *symbol_start);
|
|
/* Skip any sequence of them. */
|
|
while (ChTab[(unsigned) *linep] == OPERATOR)
|
|
linep++;
|
|
token = NO_TOKEN;
|
|
break;
|
|
default:
|
|
if (!GlobalState.skipping_current_game) {
|
|
print_error_context(GlobalState.logfile);
|
|
fprintf(GlobalState.logfile,
|
|
"Internal error: Missing case for %d on char %x.\n",
|
|
token, next_char);
|
|
}
|
|
token = NO_TOKEN;
|
|
break;
|
|
}
|
|
}
|
|
} while (token == NO_TOKEN);
|
|
return token;
|
|
}
|
|
|
|
TokenType
|
|
next_token(void)
|
|
{
|
|
TokenType token = get_next_symbol();
|
|
|
|
/* Don't call yywrap if parsing the ECO file. */
|
|
while ((token == EOF_TOKEN) && !GlobalState.parsing_ECO_file &&
|
|
!yywrap()) {
|
|
token = get_next_symbol();
|
|
}
|
|
return token;
|
|
}
|
|
|
|
/* Return TRUE if token is one to skip when looking for
|
|
* the start or end of a game.
|
|
*/
|
|
static Boolean
|
|
skip_token(TokenType token)
|
|
{
|
|
switch (token) {
|
|
case TERMINATING_RESULT:
|
|
case TAG:
|
|
case MOVE:
|
|
case EOF_TOKEN:
|
|
return FALSE;
|
|
default:
|
|
return TRUE;
|
|
}
|
|
}
|
|
|
|
/* Skip tokens until the next game looks like it is
|
|
* about to start. This is signalled by
|
|
* a tag section a terminating result from the
|
|
* previous game, or a move.
|
|
*/
|
|
TokenType
|
|
skip_to_next_game(TokenType token)
|
|
{
|
|
if (skip_token(token)) {
|
|
GlobalState.skipping_current_game = TRUE;
|
|
do {
|
|
if (token == COMMENT) {
|
|
/* Free the space. */
|
|
if ((yylval.comment != NULL) &&
|
|
(yylval.comment->comment != NULL)) {
|
|
free_string_list(yylval.comment->comment);
|
|
free((void *) yylval.comment);
|
|
yylval.comment = NULL;
|
|
}
|
|
}
|
|
token = next_token();
|
|
} while (skip_token(token));
|
|
GlobalState.skipping_current_game = FALSE;
|
|
}
|
|
return token;
|
|
}
|
|
|
|
/* Save castling moves in a standard way. */
|
|
static void
|
|
save_q_castle(void)
|
|
{
|
|
save_move((const unsigned char *) "O-O-O");
|
|
}
|
|
|
|
/* Save castling moves in a standard way. */
|
|
static void
|
|
save_k_castle(void)
|
|
{
|
|
save_move((const unsigned char *) "O-O");
|
|
}
|
|
|
|
/* Make a copy of the matched text of the move. */
|
|
static void
|
|
save_move(const unsigned char *move)
|
|
{
|
|
if(strlen((char *) move) > MAX_MOVE_LEN) {
|
|
fprintf(stderr, "Internal error: cannot handle %s (too long)\n", move);
|
|
exit(1);
|
|
}
|
|
/* Decode the move into its components. */
|
|
yylval.move_details = decode_move(move);
|
|
/* Remember the last move. */
|
|
strcpy((char *) last_move, (const char *) move);
|
|
}
|
|
|
|
void
|
|
restart_lex_for_new_game(void)
|
|
{
|
|
*last_move = '\0';
|
|
RAV_level = 0;
|
|
}
|
|
|
|
/* Make it possible to read multiple input files.
|
|
* These are held in list_of_files. The list
|
|
* is built up from the program's arguments.
|
|
*/
|
|
static int current_file_num = 0;
|
|
/* Keep track of the list of PGN files. These will either be the
|
|
* remaining arguments once flags have been dealt with, or
|
|
* those read from -c and -f arguments.
|
|
*/
|
|
static FILE_LIST list_of_files = {
|
|
(const char **) NULL,
|
|
(SourceFileType *) NULL,
|
|
0, 0
|
|
};
|
|
|
|
/* Return the index number of the current input file in list_of_files. */
|
|
unsigned
|
|
current_file_number(void)
|
|
{
|
|
return current_file_num;
|
|
}
|
|
|
|
/* Buffer I/O because it does seem to make a difference to the
|
|
* processing speed of games.
|
|
*/
|
|
|
|
/* It doesn't appear to be necessary for this to be
|
|
* particularly big to make a significant difference
|
|
* to I/O efficiency.
|
|
*/
|
|
#define INPUT_BUFFER_LEN 500
|
|
static size_t input_buffer_index = 0;
|
|
static size_t input_buffer_limit = 0;
|
|
static char input_buffer[INPUT_BUFFER_LEN];
|
|
|
|
/* Fill the input buffer to its limit, if possible. */
|
|
static void fill_input_buffer(FILE *fpin)
|
|
{
|
|
if(! feof(fpin)) {
|
|
input_buffer_limit = fread(input_buffer, sizeof(*input_buffer), INPUT_BUFFER_LEN, fpin);
|
|
}
|
|
else {
|
|
input_buffer_limit = 0;
|
|
}
|
|
input_buffer_index = 0;
|
|
}
|
|
|
|
/* Return the next input character, as an int to
|
|
* support EOF.
|
|
*/
|
|
static int get_next_char(FILE *fpin)
|
|
{
|
|
if(input_buffer_index == input_buffer_limit) {
|
|
fill_input_buffer(fpin);
|
|
}
|
|
if(input_buffer_index != input_buffer_limit) {
|
|
return input_buffer[input_buffer_index++];
|
|
}
|
|
else {
|
|
return EOF;
|
|
}
|
|
}
|
|
|
|
/* Unget the previous input character. */
|
|
static void unget_char(int c, FILE *fpin)
|
|
{
|
|
if(input_buffer_index > 0) {
|
|
if(c != EOF) {
|
|
input_buffer_index--;
|
|
}
|
|
}
|
|
else {
|
|
fprintf(GlobalState.logfile, "Internal error: unget_char(%c)\n", c);
|
|
report_details(GlobalState.logfile);
|
|
exit(1);
|
|
}
|
|
}
|
|
|
|
/* Read a single line of input. */
|
|
#define INIT_LINE_LENGTH 100
|
|
#define LINE_INCREMENT 100
|
|
|
|
char *read_line(FILE *fpin)
|
|
{
|
|
char *line = NULL;
|
|
unsigned len = 0;
|
|
unsigned max_length;
|
|
int ch;
|
|
|
|
ch = get_next_char(fpin);
|
|
if (ch != EOF) {
|
|
line = (char *) malloc_or_die(INIT_LINE_LENGTH + 1);
|
|
max_length = INIT_LINE_LENGTH;
|
|
while ((ch != '\n') && (ch != '\r') && (ch != EOF)) {
|
|
/* Another character to add. */
|
|
if (len == max_length) {
|
|
line = (char *) realloc_or_die((void *) line,
|
|
max_length + LINE_INCREMENT + 1);
|
|
if (line == NULL) {
|
|
return NULL;
|
|
}
|
|
max_length += LINE_INCREMENT;
|
|
}
|
|
line[len] = ch;
|
|
len++;
|
|
ch = get_next_char(fpin);
|
|
}
|
|
line[len] = '\0';
|
|
if (ch == '\r') {
|
|
/* Try to avoid double counting lines in dos-format files. */
|
|
ch = get_next_char(fpin);
|
|
if (ch != '\n' && ch != EOF) {
|
|
unget_char(ch, fpin);
|
|
}
|
|
}
|
|
}
|
|
return line;
|
|
}
|
|
|
|
/* Read a list of lines from fp. These are the names of files
|
|
* to be added to the existing list_of_files.
|
|
* list_of_files.list must have a (char *)NULL on the end.
|
|
*/
|
|
void
|
|
add_filename_list_from_file(FILE *fp, SourceFileType file_type)
|
|
{
|
|
if ((list_of_files.files == NULL) || (list_of_files.max_files == 0)) {
|
|
/* Allocate an initial number of pointers for the lines.
|
|
* This must always include an extra one for terminating NULL.
|
|
*/
|
|
list_of_files.files = (const char **) malloc_or_die((INIT_LIST_SPACE + 1) *
|
|
sizeof (const char *));
|
|
list_of_files.file_type = (SourceFileType *) malloc_or_die((INIT_LIST_SPACE + 1) *
|
|
sizeof (SourceFileType));
|
|
list_of_files.max_files = INIT_LIST_SPACE;
|
|
list_of_files.num_files = 0;
|
|
}
|
|
if (list_of_files.files != NULL) {
|
|
/* Find the first line. */
|
|
char *line = read_line(fp);
|
|
|
|
while (line != NULL) {
|
|
if (non_blank_line(line)) {
|
|
add_filename_to_source_list(line, file_type);
|
|
}
|
|
else {
|
|
(void) free((void *) line);
|
|
}
|
|
line = read_line(fp);
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
add_filename_to_source_list(const char *filename, SourceFileType file_type)
|
|
{ /* Where to put it. */
|
|
unsigned location = list_of_files.num_files;
|
|
|
|
if (access(filename, R_OK) != 0) {
|
|
fprintf(GlobalState.logfile, "Unable to find %s\n", filename);
|
|
exit(1);
|
|
}
|
|
else {
|
|
/* Ok. */
|
|
}
|
|
/* See if there is room. */
|
|
if (list_of_files.num_files == list_of_files.max_files) {
|
|
/* There isn't, so increase the amount of available space,
|
|
* ensuring that there is always an extra slot for the terminating
|
|
* NULL.
|
|
*/
|
|
if ((list_of_files.files == NULL) || (list_of_files.max_files == 0)) {
|
|
/* Allocate an initial number of pointers for the lines.
|
|
* This must always include an extra one for terminating NULL.
|
|
*/
|
|
list_of_files.files = (const char **) malloc_or_die((INIT_LIST_SPACE + 1) *
|
|
sizeof (const char *));
|
|
list_of_files.file_type = (SourceFileType *)
|
|
malloc_or_die((INIT_LIST_SPACE + 1) *
|
|
sizeof (SourceFileType));
|
|
list_of_files.max_files = INIT_LIST_SPACE;
|
|
list_of_files.num_files = 0;
|
|
}
|
|
else {
|
|
list_of_files.files = (const char **) realloc_or_die((void *) list_of_files.files,
|
|
(list_of_files.max_files + MORE_LIST_SPACE + 1) *
|
|
sizeof (const char *));
|
|
list_of_files.file_type = (SourceFileType *)
|
|
realloc_or_die((void *) list_of_files.file_type,
|
|
(list_of_files.max_files + MORE_LIST_SPACE + 1) *
|
|
sizeof (SourceFileType));
|
|
list_of_files.max_files += MORE_LIST_SPACE;
|
|
if ((list_of_files.files == NULL) && (list_of_files.file_type == NULL)) {
|
|
perror("");
|
|
abort();
|
|
}
|
|
}
|
|
}
|
|
/* We know that there is space. Ensure that CHECKFILEs are all
|
|
* stored before NORMALFILEs.
|
|
*/
|
|
if (file_type == CHECKFILE) {
|
|
|
|
for (location = 0; (location < list_of_files.num_files) &&
|
|
(list_of_files.file_type[location] == CHECKFILE); location++) {
|
|
/* Do nothing. */
|
|
}
|
|
if (location < list_of_files.num_files) {
|
|
/* Put the new one here.
|
|
* Move the rest down.
|
|
*/
|
|
unsigned j;
|
|
|
|
for (j = list_of_files.num_files; j > location; j--) {
|
|
list_of_files.files[j] = list_of_files.files[j - 1];
|
|
list_of_files.file_type[j] = list_of_files.file_type[j - 1];
|
|
}
|
|
}
|
|
}
|
|
list_of_files.files[location] = copy_string(filename);
|
|
list_of_files.file_type[location] = file_type;
|
|
list_of_files.num_files++;
|
|
/* Keep the list properly terminated. */
|
|
list_of_files.files[list_of_files.num_files] = (char *) NULL;
|
|
}
|
|
|
|
/* Use infile as the input source. */
|
|
static Boolean
|
|
open_input(const char *infile)
|
|
{
|
|
yyin = fopen(infile, "rb");
|
|
if (yyin != NULL) {
|
|
GlobalState.current_input_file = infile;
|
|
if (GlobalState.verbosity > 1) {
|
|
fprintf(GlobalState.logfile, "Processing %s\n",
|
|
GlobalState.current_input_file);
|
|
}
|
|
}
|
|
return yyin != NULL;
|
|
}
|
|
|
|
/* Simple interface to open_input for the ECO file. */
|
|
Boolean
|
|
open_eco_file(const char *eco_file)
|
|
{
|
|
return open_input(eco_file);
|
|
}
|
|
|
|
/* Open the input file whose number is the argument. */
|
|
static Boolean
|
|
open_input_file(int file_number)
|
|
{
|
|
/* Depending on the type of file, ensure that the
|
|
* current_file_type is set correctly.
|
|
*/
|
|
if (open_input(list_of_files.files[file_number])) {
|
|
GlobalState.current_file_type = list_of_files.file_type[file_number];
|
|
return TRUE;
|
|
}
|
|
else {
|
|
return FALSE;
|
|
}
|
|
}
|
|
|
|
/* Open the first input file. */
|
|
Boolean
|
|
open_first_file(void)
|
|
{
|
|
Boolean ok = TRUE;
|
|
|
|
if (list_of_files.num_files == 0) {
|
|
/* Use standard input. */
|
|
yyin = stdin;
|
|
GlobalState.current_input_file = "stdin";
|
|
/* @@@ Should this be set?
|
|
GlobalState.current_file_type = NORMALFILE;
|
|
*/
|
|
if (GlobalState.verbosity > 1) {
|
|
fprintf(GlobalState.logfile, "Processing %s\n",
|
|
GlobalState.current_input_file);
|
|
}
|
|
}
|
|
else if (open_input_file(0)) {
|
|
}
|
|
else {
|
|
fprintf(GlobalState.logfile,
|
|
"Unable to open the PGN file: %s\n", input_file_name(0));
|
|
ok = FALSE;
|
|
}
|
|
return ok;
|
|
}
|
|
|
|
/* Return the name of the file corresponding to the given
|
|
* file number.
|
|
*/
|
|
const char *
|
|
input_file_name(unsigned file_number)
|
|
{
|
|
if (file_number >= list_of_files.num_files) {
|
|
return NULL;
|
|
}
|
|
else {
|
|
return list_of_files.files[file_number];
|
|
}
|
|
}
|
|
|
|
/* Give some error information. */
|
|
void
|
|
print_error_context(FILE *fp)
|
|
{
|
|
if (GlobalState.current_input_file != NULL) {
|
|
fprintf(fp, "File %s: ", GlobalState.current_input_file);
|
|
}
|
|
fprintf(fp, "Line number: %lu\n", line_number);
|
|
}
|
|
|
|
/* Make the given str accessible. */
|
|
static void
|
|
save_string(const char *str)
|
|
{
|
|
const size_t len = strlen(str);
|
|
char *token;
|
|
|
|
token = (char *) malloc_or_die(len + 1);
|
|
strcpy(token, str);
|
|
yylval.token_string = token;
|
|
}
|
|
|
|
/* Return the next line of input from fp. */
|
|
char *
|
|
next_input_line(FILE *fp)
|
|
{ /* Retain each line in turn, so as to be able to free it. */
|
|
static char *line = NULL;
|
|
|
|
if (line != NULL) {
|
|
(void) free((void *) line);
|
|
}
|
|
|
|
line = read_line(fp);
|
|
|
|
if (line != NULL) {
|
|
line_number++;
|
|
}
|
|
return line;
|
|
}
|
|
|
|
/* Handle the end of a file. */
|
|
int
|
|
yywrap(void)
|
|
{
|
|
int time_to_exit;
|
|
|
|
/* Beware of this being called in inappropriate circumstances. */
|
|
if (list_of_files.files == NULL) {
|
|
/* There are no files. */
|
|
time_to_exit = 1;
|
|
}
|
|
else if (input_file_name(current_file_num) == NULL) {
|
|
/* There was no last file! */
|
|
time_to_exit = 1;
|
|
}
|
|
else {
|
|
/* Close the input files. */
|
|
terminate_input();
|
|
/* See if there is another. */
|
|
current_file_num++;
|
|
if (input_file_name(current_file_num) == NULL) {
|
|
/* We have processed the last file. */
|
|
time_to_exit = 1;
|
|
}
|
|
else if (!open_input_file(current_file_num)) {
|
|
fprintf(GlobalState.logfile, "Unable to open the PGN file: %s\n",
|
|
input_file_name(current_file_num));
|
|
time_to_exit = 1;
|
|
}
|
|
else {
|
|
/* Ok, we opened it. */
|
|
time_to_exit = 0;
|
|
/* Set everything up for a new file. */
|
|
/* Depending on the type of file, ensure that the
|
|
* current_file_type is set correctly.
|
|
*/
|
|
GlobalState.current_file_type =
|
|
list_of_files.file_type[current_file_num];
|
|
restart_lex_for_new_game();
|
|
games_in_file = 0;
|
|
reset_line_number();
|
|
}
|
|
}
|
|
return time_to_exit;
|
|
}
|
|
|
|
/* Return the current line number. */
|
|
unsigned long
|
|
get_line_number(void)
|
|
{
|
|
return line_number;
|
|
}
|
|
|
|
/* Reset the file's line number. */
|
|
void
|
|
reset_line_number(void)
|
|
{
|
|
line_number = 0;
|
|
}
|
|
|
|
static void
|
|
terminate_input(void)
|
|
{
|
|
if ((yyin != stdin) && (yyin != NULL)) {
|
|
(void) fclose(yyin);
|
|
yyin = NULL;
|
|
}
|
|
}
|
|
|