more cleanPgn() cleanup: trim lines, del initial blanks, one space only...
This commit is contained in:
@@ -4,6 +4,7 @@
|
|||||||
class PgnGameParser{
|
class PgnGameParser{
|
||||||
|
|
||||||
private $pgnGame;
|
private $pgnGame;
|
||||||
|
private $moveBuilder;
|
||||||
private $defaultFen = 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1';
|
private $defaultFen = 'rnbqkbnr/pppppppp/8/8/8/8/PPPPPPPP/RNBQKBNR w KQkq - 0 1';
|
||||||
|
|
||||||
private $gameData = array();
|
private $gameData = array();
|
||||||
|
@@ -75,9 +75,21 @@ class PgnParser
|
|||||||
$c = str_replace("0-0-0", "O-O-O", $c);
|
$c = str_replace("0-0-0", "O-O-O", $c);
|
||||||
$c = str_replace("0-0", "O-O", $c);
|
$c = str_replace("0-0", "O-O", $c);
|
||||||
|
|
||||||
|
/* basic trimming:
|
||||||
|
* - remove initial blanks
|
||||||
|
* - keep only one space
|
||||||
|
* - trim lines
|
||||||
|
* - max 2 consecutive '\n'
|
||||||
|
*/
|
||||||
|
$c = preg_replace("/^\s+([^\s])/", "$1", $c);
|
||||||
|
$c = preg_replace("/ +/", " ", $c);
|
||||||
|
$c = preg_replace("/\s*\n\s*/", "\n", $c);
|
||||||
|
$c = preg_replace("/\n{3,}/s", "\n\n", $c);
|
||||||
|
|
||||||
/* replace '[' between '{' and '}' with '//--SB--//'
|
/* replace '[' between '{' and '}' with '//--SB--//'
|
||||||
*/
|
*/
|
||||||
$c = preg_replace('/(?:\G(?!\A)|\{)[^}[]*\K\[/', "//--SB--//", $c);
|
$c = preg_replace('/(?:\G(?!\A)|\{)[^\}\[]*\K\[/', "//--SB--//", $c);
|
||||||
|
$c = preg_replace('/(?:\G(?!\A)|\{)[^\}\]]*\K\]/', "//--SE--//", $c);
|
||||||
|
|
||||||
/* set one '\n' between tags.
|
/* set one '\n' between tags.
|
||||||
* This is possible because brackets within comments are protected above
|
* This is possible because brackets within comments are protected above
|
||||||
@@ -85,25 +97,15 @@ class PgnParser
|
|||||||
$c = preg_replace('/"\]\s*\[/s', "\"]\n[", $c);
|
$c = preg_replace('/"\]\s*\[/s', "\"]\n[", $c);
|
||||||
|
|
||||||
/* set '\n\n' between tags and movetext section
|
/* set '\n\n' between tags and movetext section
|
||||||
|
* set '\n\n' between non tag line and tag line (between two games)
|
||||||
* This is possible because brackets within comments are protected above
|
* This is possible because brackets within comments are protected above
|
||||||
*/
|
*/
|
||||||
$c = preg_replace('/"\]\s*([\.0-9]|{)/s', "\"]\n\n$1", $c);
|
$c = preg_replace('/"\]\s*([\.0-9]|{)/s', "\"]\n\n$1", $c);
|
||||||
|
$c = preg_replace("/([^\]])\n+\[/", "$1\n\n[", $c);
|
||||||
/* remove space before '['
|
|
||||||
* This is possible because brackets within comments are protected above
|
|
||||||
*/
|
|
||||||
$c = str_replace(" +[", "[", $c);
|
|
||||||
|
|
||||||
/* set '\n\n' between non tag line and tag line (between two games)
|
|
||||||
* This is possible because brackets within comments are protected above
|
|
||||||
*/
|
|
||||||
$c = preg_replace("/([^\]])(\n+)\[/si", "$1\n\n[", $c);
|
|
||||||
|
|
||||||
/* revert brackets within movetext comments */
|
/* revert brackets within movetext comments */
|
||||||
$c = str_replace("//--SB--//", "[", $c);
|
$c = str_replace("//--SB--//", "[", $c);
|
||||||
|
$c = str_replace("//--SE--//", "]", $c);
|
||||||
/* max 2 consecutive '\n */
|
|
||||||
$c = preg_replace("/\n{3,}/s", "\n\n", $c);
|
|
||||||
|
|
||||||
return $c;
|
return $c;
|
||||||
}
|
}
|
||||||
@@ -124,6 +126,8 @@ class PgnParser
|
|||||||
$content = "\n\n" . $pgn;
|
$content = "\n\n" . $pgn;
|
||||||
$games = preg_split("/\n\n\[/s", $content, -1, PREG_SPLIT_DELIM_CAPTURE);
|
$games = preg_split("/\n\n\[/s", $content, -1, PREG_SPLIT_DELIM_CAPTURE);
|
||||||
|
|
||||||
|
//file_put_contents("/tmp/parsed.pgn", $content);
|
||||||
|
|
||||||
for ($i = 1, $count = count($games); $i < $count; $i++) {
|
for ($i = 1, $count = count($games); $i < $count; $i++) {
|
||||||
$gameContent = trim("[" . $games[$i]);
|
$gameContent = trim("[" . $games[$i]);
|
||||||
if (strlen($gameContent) > 10) {
|
if (strlen($gameContent) > 10) {
|
||||||
|
Reference in New Issue
Block a user