WIP|FEATURE: Try to add tokenizer
* First try to add tokenizer to code sniffer Relates: #38
This commit is contained in:
parent
6eeeb50ecb
commit
125c4280d6
2 changed files with 185 additions and 2 deletions
|
@ -5,10 +5,14 @@
|
||||||
"autoload": {
|
"autoload": {
|
||||||
"psr-4": {
|
"psr-4": {
|
||||||
"Typo3Update\\": "src/Standards/Typo3Update/"
|
"Typo3Update\\": "src/Standards/Typo3Update/"
|
||||||
}
|
},
|
||||||
|
"files": [
|
||||||
|
"src/CodeSniffer/Tokenizers/TypoScript.php"
|
||||||
|
]
|
||||||
},
|
},
|
||||||
"require": {
|
"require": {
|
||||||
"squizlabs/php_codesniffer": "2.8.*"
|
"squizlabs/php_codesniffer": "2.8.*",
|
||||||
|
"helmich/typo3-typoscript-parser": "1.0.*"
|
||||||
},
|
},
|
||||||
"license": "GPL-2.0+",
|
"license": "GPL-2.0+",
|
||||||
"authors": [
|
"authors": [
|
||||||
|
|
179
src/CodeSniffer/Tokenizers/TypoScript.php
Normal file
179
src/CodeSniffer/Tokenizers/TypoScript.php
Normal file
|
@ -0,0 +1,179 @@
|
||||||
|
<?php
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2017 Daniel Siepmann <coding@daniel-siepmann.de>
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or
|
||||||
|
* modify it under the terms of the GNU General Public License
|
||||||
|
* as published by the Free Software Foundation; either version 2
|
||||||
|
* of the License, or (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
|
||||||
|
* 02110-1301, USA.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use Helmich\TypoScriptParser\Tokenizer\TokenInterface;
|
||||||
|
use Helmich\TypoScriptParser\Tokenizer\Tokenizer;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Tokenizes a string of TypoScript.
|
||||||
|
*/
|
||||||
|
class PHP_CodeSniffer_Tokenizers_TYPOSCRIPT
|
||||||
|
{
|
||||||
|
/**
|
||||||
|
* If TRUE, files that appear to be minified will not be processed.
|
||||||
|
*
|
||||||
|
* @var boolean
|
||||||
|
*/
|
||||||
|
public $skipMinified = false;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates an array of tokens when given some TypoScript code.
|
||||||
|
*
|
||||||
|
* @param string $string The string to tokenize.
|
||||||
|
* @param string $eolChar The EOL character to use for splitting strings.
|
||||||
|
*
|
||||||
|
* @return array
|
||||||
|
*/
|
||||||
|
public function tokenizeString($string, $eolChar='\n')
|
||||||
|
{
|
||||||
|
if (PHP_CODESNIFFER_VERBOSITY > 1) {
|
||||||
|
echo "\t*** START TypoScript TOKENIZING ***" . PHP_EOL;
|
||||||
|
}
|
||||||
|
|
||||||
|
$tokenizer = new Tokenizer();
|
||||||
|
$tokens = $tokenizer->tokenizeString($string);
|
||||||
|
$finalTokens = [];
|
||||||
|
$numTokens = count($tokens);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Each token within the stack contains information about itself:
|
||||||
|
*
|
||||||
|
* <code>
|
||||||
|
* array(
|
||||||
|
* 'code' => 301, // the token type code (see token_get_all())
|
||||||
|
* 'content' => 'if', // the token content
|
||||||
|
* 'type' => 'T_IF', // the token name
|
||||||
|
* 'line' => 56, // the line number when the token is located
|
||||||
|
* 'column' => 12, // the column in the line where this token
|
||||||
|
* // starts (starts from 1)
|
||||||
|
* 'level' => 2 // the depth a token is within the scopes open
|
||||||
|
* 'conditions' => array( // a list of scope condition token
|
||||||
|
* // positions => codes that
|
||||||
|
* 2 => 50, // opened the scopes that this token exists
|
||||||
|
* 9 => 353, // in (see conditional tokens section below)
|
||||||
|
* ),
|
||||||
|
* );
|
||||||
|
* </code>
|
||||||
|
*
|
||||||
|
* <b>Conditional Tokens</b>
|
||||||
|
*
|
||||||
|
* In addition to the standard token fields, conditions contain information to
|
||||||
|
* determine where their scope begins and ends:
|
||||||
|
*
|
||||||
|
* <code>
|
||||||
|
* array(
|
||||||
|
* 'scope_condition' => 38, // the token position of the condition
|
||||||
|
* 'scope_opener' => 41, // the token position that started the scope
|
||||||
|
* 'scope_closer' => 70, // the token position that ended the scope
|
||||||
|
* );
|
||||||
|
* </code>
|
||||||
|
*
|
||||||
|
* The condition, the scope opener and the scope closer each contain this
|
||||||
|
* information.
|
||||||
|
*
|
||||||
|
* <b>Parenthesis Tokens</b>
|
||||||
|
*
|
||||||
|
* Each parenthesis token (T_OPEN_PARENTHESIS and T_CLOSE_PARENTHESIS) has a
|
||||||
|
* reference to their opening and closing parenthesis, one being itself, the
|
||||||
|
* other being its opposite.
|
||||||
|
*
|
||||||
|
* <code>
|
||||||
|
* array(
|
||||||
|
* 'parenthesis_opener' => 34,
|
||||||
|
* 'parenthesis_closer' => 40,
|
||||||
|
* );
|
||||||
|
* </code>
|
||||||
|
*
|
||||||
|
* Some tokens can "own" a set of parenthesis. For example a T_FUNCTION token
|
||||||
|
* has parenthesis around its argument list. These tokens also have the
|
||||||
|
* parenthesis_opener and and parenthesis_closer indices. Not all parenthesis
|
||||||
|
* have owners, for example parenthesis used for arithmetic operations and
|
||||||
|
* function calls. The parenthesis tokens that have an owner have the following
|
||||||
|
* auxiliary array indices.
|
||||||
|
*
|
||||||
|
* <code>
|
||||||
|
* array(
|
||||||
|
* 'parenthesis_opener' => 34,
|
||||||
|
* 'parenthesis_closer' => 40,
|
||||||
|
* 'parenthesis_owner' => 33,
|
||||||
|
* );
|
||||||
|
* </code>
|
||||||
|
*
|
||||||
|
* Each token within a set of parenthesis also has an array index
|
||||||
|
* 'nested_parenthesis' which is an array of the
|
||||||
|
* left parenthesis => right parenthesis token positions.
|
||||||
|
*
|
||||||
|
* <code>
|
||||||
|
* 'nested_parenthesis' => array(
|
||||||
|
* 12 => 15
|
||||||
|
* 11 => 14
|
||||||
|
* );
|
||||||
|
* </code>
|
||||||
|
*/
|
||||||
|
|
||||||
|
$level = 0;
|
||||||
|
for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
|
||||||
|
$token = $tokens[$stackPtr];
|
||||||
|
$finalTokens[$stackPtr] = [
|
||||||
|
'code' => $this->mapTypeToCode($token),
|
||||||
|
'type' => $token->getType(),
|
||||||
|
'line' => $token->getLine(),
|
||||||
|
'column' => 0,
|
||||||
|
'content' => $token->getValue(),
|
||||||
|
'level' => $level,
|
||||||
|
];
|
||||||
|
}
|
||||||
|
|
||||||
|
return $finalTokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Allow the tokenizer to do additional processing if required.
|
||||||
|
*
|
||||||
|
* @param array $tokens The array of tokens to process.
|
||||||
|
* @param string $eolChar The EOL character to use for splitting strings.
|
||||||
|
*
|
||||||
|
* @return void
|
||||||
|
*/
|
||||||
|
public function processAdditional(&$tokens, $eolChar)
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns mapped PHP code equivalent for token.
|
||||||
|
*
|
||||||
|
* @param Token $token
|
||||||
|
* @return int
|
||||||
|
*/
|
||||||
|
protected function mapTypeToCode(TokenInterface $token)
|
||||||
|
{
|
||||||
|
$tokenType = $token->getType();
|
||||||
|
$mapping = [
|
||||||
|
TokenInterface::TYPE_COMMENT_ONELINE => T_COMMENT,
|
||||||
|
];
|
||||||
|
|
||||||
|
if (!isset($mapping[$tokenType])) {
|
||||||
|
// TODO: Throw exception?!
|
||||||
|
}
|
||||||
|
return $mapping[$tokenType];
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in a new issue