From 125c4280d6e8c0d1ca16e9ee630f274d25b798e6 Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Thu, 30 Mar 2017 12:00:54 +0200 Subject: [PATCH 1/3] WIP|FEATURE: Try to add tokenizer * First try to add tokenizer to code sniffer Relates: #38 --- composer.json | 8 +- src/CodeSniffer/Tokenizers/TypoScript.php | 179 ++++++++++++++++++++++ 2 files changed, 185 insertions(+), 2 deletions(-) create mode 100644 src/CodeSniffer/Tokenizers/TypoScript.php diff --git a/composer.json b/composer.json index 4f488fb..6ab0404 100644 --- a/composer.json +++ b/composer.json @@ -5,10 +5,14 @@ "autoload": { "psr-4": { "Typo3Update\\": "src/Standards/Typo3Update/" - } + }, + "files": [ + "src/CodeSniffer/Tokenizers/TypoScript.php" + ] }, "require": { - "squizlabs/php_codesniffer": "2.8.*" + "squizlabs/php_codesniffer": "2.8.*", + "helmich/typo3-typoscript-parser": "1.0.*" }, "license": "GPL-2.0+", "authors": [ diff --git a/src/CodeSniffer/Tokenizers/TypoScript.php b/src/CodeSniffer/Tokenizers/TypoScript.php new file mode 100644 index 0000000..e40a795 --- /dev/null +++ b/src/CodeSniffer/Tokenizers/TypoScript.php @@ -0,0 +1,179 @@ + + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version 2 + * of the License, or (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA + * 02110-1301, USA. + */ + +use Helmich\TypoScriptParser\Tokenizer\TokenInterface; +use Helmich\TypoScriptParser\Tokenizer\Tokenizer; + +/** + * Tokenizes a string of TypoScript. + */ +class PHP_CodeSniffer_Tokenizers_TYPOSCRIPT +{ + /** + * If TRUE, files that appear to be minified will not be processed. + * + * @var boolean + */ + public $skipMinified = false; + + /** + * Creates an array of tokens when given some TypoScript code. + * + * @param string $string The string to tokenize. + * @param string $eolChar The EOL character to use for splitting strings. + * + * @return array + */ + public function tokenizeString($string, $eolChar='\n') + { + if (PHP_CODESNIFFER_VERBOSITY > 1) { + echo "\t*** START TypoScript TOKENIZING ***" . PHP_EOL; + } + + $tokenizer = new Tokenizer(); + $tokens = $tokenizer->tokenizeString($string); + $finalTokens = []; + $numTokens = count($tokens); + + /** + * Each token within the stack contains information about itself: + * + * + * array( + * 'code' => 301, // the token type code (see token_get_all()) + * 'content' => 'if', // the token content + * 'type' => 'T_IF', // the token name + * 'line' => 56, // the line number when the token is located + * 'column' => 12, // the column in the line where this token + * // starts (starts from 1) + * 'level' => 2 // the depth a token is within the scopes open + * 'conditions' => array( // a list of scope condition token + * // positions => codes that + * 2 => 50, // opened the scopes that this token exists + * 9 => 353, // in (see conditional tokens section below) + * ), + * ); + * + * + * Conditional Tokens + * + * In addition to the standard token fields, conditions contain information to + * determine where their scope begins and ends: + * + * + * array( + * 'scope_condition' => 38, // the token position of the condition + * 'scope_opener' => 41, // the token position that started the scope + * 'scope_closer' => 70, // the token position that ended the scope + * ); + * + * + * The condition, the scope opener and the scope closer each contain this + * information. + * + * Parenthesis Tokens + * + * Each parenthesis token (T_OPEN_PARENTHESIS and T_CLOSE_PARENTHESIS) has a + * reference to their opening and closing parenthesis, one being itself, the + * other being its opposite. + * + * + * array( + * 'parenthesis_opener' => 34, + * 'parenthesis_closer' => 40, + * ); + * + * + * Some tokens can "own" a set of parenthesis. For example a T_FUNCTION token + * has parenthesis around its argument list. These tokens also have the + * parenthesis_opener and and parenthesis_closer indices. Not all parenthesis + * have owners, for example parenthesis used for arithmetic operations and + * function calls. The parenthesis tokens that have an owner have the following + * auxiliary array indices. + * + * + * array( + * 'parenthesis_opener' => 34, + * 'parenthesis_closer' => 40, + * 'parenthesis_owner' => 33, + * ); + * + * + * Each token within a set of parenthesis also has an array index + * 'nested_parenthesis' which is an array of the + * left parenthesis => right parenthesis token positions. + * + * + * 'nested_parenthesis' => array( + * 12 => 15 + * 11 => 14 + * ); + * + */ + + $level = 0; + for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { + $token = $tokens[$stackPtr]; + $finalTokens[$stackPtr] = [ + 'code' => $this->mapTypeToCode($token), + 'type' => $token->getType(), + 'line' => $token->getLine(), + 'column' => 0, + 'content' => $token->getValue(), + 'level' => $level, + ]; + } + + return $finalTokens; + } + + /** + * Allow the tokenizer to do additional processing if required. + * + * @param array $tokens The array of tokens to process. + * @param string $eolChar The EOL character to use for splitting strings. + * + * @return void + */ + public function processAdditional(&$tokens, $eolChar) + { + return; + } + + /** + * Returns mapped PHP code equivalent for token. + * + * @param Token $token + * @return int + */ + protected function mapTypeToCode(TokenInterface $token) + { + $tokenType = $token->getType(); + $mapping = [ + TokenInterface::TYPE_COMMENT_ONELINE => T_COMMENT, + ]; + + if (!isset($mapping[$tokenType])) { + // TODO: Throw exception?! + } + return $mapping[$tokenType]; + } +} From 55bdb39fa5e563498d5aa34e349051303d0bd603 Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Tue, 4 Apr 2017 13:31:39 +0200 Subject: [PATCH 2/3] WIP|FEATURE: Integrate TypoScript * Provide tokens for TypoScript. * Provide first basic example of a working sniff. Relates: #38 --- composer.json | 8 +- src/CodeSniffer/Tokenizers/TypoScript.php | 113 +----------------- .../TypoScript/ViewConfigurationSniff.php | 55 +++++++++ 3 files changed, 66 insertions(+), 110 deletions(-) create mode 100644 src/Standards/Typo3Update/Sniffs/TypoScript/ViewConfigurationSniff.php diff --git a/composer.json b/composer.json index 6ab0404..aa052de 100644 --- a/composer.json +++ b/composer.json @@ -10,9 +10,15 @@ "src/CodeSniffer/Tokenizers/TypoScript.php" ] }, + "repositories": [ + { + "type": "vcs", + "url": "https://github.com/DanielSiepmann/typo3-typoscript-parser" + } + ], "require": { "squizlabs/php_codesniffer": "2.8.*", - "helmich/typo3-typoscript-parser": "1.0.*" + "helmich/typo3-typoscript-parser": "dev-feature/allow-eol-handling" }, "license": "GPL-2.0+", "authors": [ diff --git a/src/CodeSniffer/Tokenizers/TypoScript.php b/src/CodeSniffer/Tokenizers/TypoScript.php index e40a795..98e2520 100644 --- a/src/CodeSniffer/Tokenizers/TypoScript.php +++ b/src/CodeSniffer/Tokenizers/TypoScript.php @@ -42,103 +42,17 @@ class PHP_CodeSniffer_Tokenizers_TYPOSCRIPT * * @return array */ - public function tokenizeString($string, $eolChar='\n') + public function tokenizeString($string, $eolChar = '\n') { - if (PHP_CODESNIFFER_VERBOSITY > 1) { - echo "\t*** START TypoScript TOKENIZING ***" . PHP_EOL; - } - - $tokenizer = new Tokenizer(); - $tokens = $tokenizer->tokenizeString($string); $finalTokens = []; - $numTokens = count($tokens); + $tokenizer = new Tokenizer($eolChar, false); - /** - * Each token within the stack contains information about itself: - * - * - * array( - * 'code' => 301, // the token type code (see token_get_all()) - * 'content' => 'if', // the token content - * 'type' => 'T_IF', // the token name - * 'line' => 56, // the line number when the token is located - * 'column' => 12, // the column in the line where this token - * // starts (starts from 1) - * 'level' => 2 // the depth a token is within the scopes open - * 'conditions' => array( // a list of scope condition token - * // positions => codes that - * 2 => 50, // opened the scopes that this token exists - * 9 => 353, // in (see conditional tokens section below) - * ), - * ); - * - * - * Conditional Tokens - * - * In addition to the standard token fields, conditions contain information to - * determine where their scope begins and ends: - * - * - * array( - * 'scope_condition' => 38, // the token position of the condition - * 'scope_opener' => 41, // the token position that started the scope - * 'scope_closer' => 70, // the token position that ended the scope - * ); - * - * - * The condition, the scope opener and the scope closer each contain this - * information. - * - * Parenthesis Tokens - * - * Each parenthesis token (T_OPEN_PARENTHESIS and T_CLOSE_PARENTHESIS) has a - * reference to their opening and closing parenthesis, one being itself, the - * other being its opposite. - * - * - * array( - * 'parenthesis_opener' => 34, - * 'parenthesis_closer' => 40, - * ); - * - * - * Some tokens can "own" a set of parenthesis. For example a T_FUNCTION token - * has parenthesis around its argument list. These tokens also have the - * parenthesis_opener and and parenthesis_closer indices. Not all parenthesis - * have owners, for example parenthesis used for arithmetic operations and - * function calls. The parenthesis tokens that have an owner have the following - * auxiliary array indices. - * - * - * array( - * 'parenthesis_opener' => 34, - * 'parenthesis_closer' => 40, - * 'parenthesis_owner' => 33, - * ); - * - * - * Each token within a set of parenthesis also has an array index - * 'nested_parenthesis' which is an array of the - * left parenthesis => right parenthesis token positions. - * - * - * 'nested_parenthesis' => array( - * 12 => 15 - * 11 => 14 - * ); - * - */ - - $level = 0; - for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) { - $token = $tokens[$stackPtr]; + foreach ($tokenizer->tokenizeString($string) as $stackPtr => $token) { $finalTokens[$stackPtr] = [ - 'code' => $this->mapTypeToCode($token), + 'code' => $token->getType(), 'type' => $token->getType(), 'line' => $token->getLine(), - 'column' => 0, 'content' => $token->getValue(), - 'level' => $level, ]; } @@ -157,23 +71,4 @@ class PHP_CodeSniffer_Tokenizers_TYPOSCRIPT { return; } - - /** - * Returns mapped PHP code equivalent for token. - * - * @param Token $token - * @return int - */ - protected function mapTypeToCode(TokenInterface $token) - { - $tokenType = $token->getType(); - $mapping = [ - TokenInterface::TYPE_COMMENT_ONELINE => T_COMMENT, - ]; - - if (!isset($mapping[$tokenType])) { - // TODO: Throw exception?! - } - return $mapping[$tokenType]; - } } diff --git a/src/Standards/Typo3Update/Sniffs/TypoScript/ViewConfigurationSniff.php b/src/Standards/Typo3Update/Sniffs/TypoScript/ViewConfigurationSniff.php new file mode 100644 index 0000000..e101fa4 --- /dev/null +++ b/src/Standards/Typo3Update/Sniffs/TypoScript/ViewConfigurationSniff.php @@ -0,0 +1,55 @@ + + */ + public function register() + { + return [ + TokenInterface::TYPE_OBJECT_IDENTIFIER, + ]; + } + + /** + * Processes the tokens that this sniff is interested in. + * + * This is the default implementation, as most of the time next T_STRING is + * the class name. This way only the register method has to be registered + * in default cases. + * + * @param PhpCsFile $phpcsFile The file where the token was found. + * @param int $stackPtr The position in the stack where + * the token was found. + * + * @return void + */ + public function process(PhpCsFile $phpcsFile, $stackPtr) + { + $tokens = $phpcsFile->getTokens(); + $token = $tokens[$stackPtr]; + + if ($token['content'] === 'layoutRootPath') { + $phpcsFile->addWarning( + 'Do not use %s anymore, use %s instead.', + $stackPtr, + 'legacy', + [ + $token['content'], + 'layoutRootPaths' + ] + ); + } + } +} From 300e3511be942740bb80efc47c226a030a8ff557 Mon Sep 17 00:00:00 2001 From: Daniel Siepmann Date: Tue, 4 Apr 2017 14:45:40 +0200 Subject: [PATCH 3/3] TASK: Remove example sniff * As this is just an example and we want to have a clean state. * Also cleanup imported interface which is not in use anymore. --- src/CodeSniffer/Tokenizers/TypoScript.php | 1 - .../TypoScript/ViewConfigurationSniff.php | 55 ------------------- 2 files changed, 56 deletions(-) delete mode 100644 src/Standards/Typo3Update/Sniffs/TypoScript/ViewConfigurationSniff.php diff --git a/src/CodeSniffer/Tokenizers/TypoScript.php b/src/CodeSniffer/Tokenizers/TypoScript.php index 98e2520..a6b8670 100644 --- a/src/CodeSniffer/Tokenizers/TypoScript.php +++ b/src/CodeSniffer/Tokenizers/TypoScript.php @@ -19,7 +19,6 @@ * 02110-1301, USA. */ -use Helmich\TypoScriptParser\Tokenizer\TokenInterface; use Helmich\TypoScriptParser\Tokenizer\Tokenizer; /** diff --git a/src/Standards/Typo3Update/Sniffs/TypoScript/ViewConfigurationSniff.php b/src/Standards/Typo3Update/Sniffs/TypoScript/ViewConfigurationSniff.php deleted file mode 100644 index e101fa4..0000000 --- a/src/Standards/Typo3Update/Sniffs/TypoScript/ViewConfigurationSniff.php +++ /dev/null @@ -1,55 +0,0 @@ - - */ - public function register() - { - return [ - TokenInterface::TYPE_OBJECT_IDENTIFIER, - ]; - } - - /** - * Processes the tokens that this sniff is interested in. - * - * This is the default implementation, as most of the time next T_STRING is - * the class name. This way only the register method has to be registered - * in default cases. - * - * @param PhpCsFile $phpcsFile The file where the token was found. - * @param int $stackPtr The position in the stack where - * the token was found. - * - * @return void - */ - public function process(PhpCsFile $phpcsFile, $stackPtr) - { - $tokens = $phpcsFile->getTokens(); - $token = $tokens[$stackPtr]; - - if ($token['content'] === 'layoutRootPath') { - $phpcsFile->addWarning( - 'Do not use %s anymore, use %s instead.', - $stackPtr, - 'legacy', - [ - $token['content'], - 'layoutRootPaths' - ] - ); - } - } -}