diff --git a/composer.json b/composer.json
index 4f488fb..6ab0404 100644
--- a/composer.json
+++ b/composer.json
@@ -5,10 +5,14 @@
"autoload": {
"psr-4": {
"Typo3Update\\": "src/Standards/Typo3Update/"
- }
+ },
+ "files": [
+ "src/CodeSniffer/Tokenizers/TypoScript.php"
+ ]
},
"require": {
- "squizlabs/php_codesniffer": "2.8.*"
+ "squizlabs/php_codesniffer": "2.8.*",
+ "helmich/typo3-typoscript-parser": "1.0.*"
},
"license": "GPL-2.0+",
"authors": [
diff --git a/src/CodeSniffer/Tokenizers/TypoScript.php b/src/CodeSniffer/Tokenizers/TypoScript.php
new file mode 100644
index 0000000..e40a795
--- /dev/null
+++ b/src/CodeSniffer/Tokenizers/TypoScript.php
@@ -0,0 +1,179 @@
+
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+use Helmich\TypoScriptParser\Tokenizer\TokenInterface;
+use Helmich\TypoScriptParser\Tokenizer\Tokenizer;
+
+/**
+ * Tokenizes a string of TypoScript.
+ */
+class PHP_CodeSniffer_Tokenizers_TYPOSCRIPT
+{
+ /**
+ * If TRUE, files that appear to be minified will not be processed.
+ *
+ * @var boolean
+ */
+ public $skipMinified = false;
+
+ /**
+ * Creates an array of tokens when given some TypoScript code.
+ *
+ * @param string $string The string to tokenize.
+ * @param string $eolChar The EOL character to use for splitting strings.
+ *
+ * @return array
+ */
+ public function tokenizeString($string, $eolChar='\n')
+ {
+ if (PHP_CODESNIFFER_VERBOSITY > 1) {
+ echo "\t*** START TypoScript TOKENIZING ***" . PHP_EOL;
+ }
+
+ $tokenizer = new Tokenizer();
+ $tokens = $tokenizer->tokenizeString($string);
+ $finalTokens = [];
+ $numTokens = count($tokens);
+
+ /**
+ * Each token within the stack contains information about itself:
+ *
+ *
+ * array(
+ * 'code' => 301, // the token type code (see token_get_all())
+ * 'content' => 'if', // the token content
+ * 'type' => 'T_IF', // the token name
+ * 'line' => 56, // the line number when the token is located
+ * 'column' => 12, // the column in the line where this token
+ * // starts (starts from 1)
+ * 'level' => 2 // the depth a token is within the scopes open
+ * 'conditions' => array( // a list of scope condition token
+ * // positions => codes that
+ * 2 => 50, // opened the scopes that this token exists
+ * 9 => 353, // in (see conditional tokens section below)
+ * ),
+ * );
+ *
+ *
+ * Conditional Tokens
+ *
+ * In addition to the standard token fields, conditions contain information to
+ * determine where their scope begins and ends:
+ *
+ *
+ * array(
+ * 'scope_condition' => 38, // the token position of the condition
+ * 'scope_opener' => 41, // the token position that started the scope
+ * 'scope_closer' => 70, // the token position that ended the scope
+ * );
+ *
+ *
+ * The condition, the scope opener and the scope closer each contain this
+ * information.
+ *
+ * Parenthesis Tokens
+ *
+ * Each parenthesis token (T_OPEN_PARENTHESIS and T_CLOSE_PARENTHESIS) has a
+ * reference to their opening and closing parenthesis, one being itself, the
+ * other being its opposite.
+ *
+ *
+ * array(
+ * 'parenthesis_opener' => 34,
+ * 'parenthesis_closer' => 40,
+ * );
+ *
+ *
+ * Some tokens can "own" a set of parenthesis. For example a T_FUNCTION token
+ * has parenthesis around its argument list. These tokens also have the
+ * parenthesis_opener and and parenthesis_closer indices. Not all parenthesis
+ * have owners, for example parenthesis used for arithmetic operations and
+ * function calls. The parenthesis tokens that have an owner have the following
+ * auxiliary array indices.
+ *
+ *
+ * array(
+ * 'parenthesis_opener' => 34,
+ * 'parenthesis_closer' => 40,
+ * 'parenthesis_owner' => 33,
+ * );
+ *
+ *
+ * Each token within a set of parenthesis also has an array index
+ * 'nested_parenthesis' which is an array of the
+ * left parenthesis => right parenthesis token positions.
+ *
+ *
+ * 'nested_parenthesis' => array(
+ * 12 => 15
+ * 11 => 14
+ * );
+ *
+ */
+
+ $level = 0;
+ for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
+ $token = $tokens[$stackPtr];
+ $finalTokens[$stackPtr] = [
+ 'code' => $this->mapTypeToCode($token),
+ 'type' => $token->getType(),
+ 'line' => $token->getLine(),
+ 'column' => 0,
+ 'content' => $token->getValue(),
+ 'level' => $level,
+ ];
+ }
+
+ return $finalTokens;
+ }
+
+ /**
+ * Allow the tokenizer to do additional processing if required.
+ *
+ * @param array $tokens The array of tokens to process.
+ * @param string $eolChar The EOL character to use for splitting strings.
+ *
+ * @return void
+ */
+ public function processAdditional(&$tokens, $eolChar)
+ {
+ return;
+ }
+
+ /**
+ * Returns mapped PHP code equivalent for token.
+ *
+ * @param Token $token
+ * @return int
+ */
+ protected function mapTypeToCode(TokenInterface $token)
+ {
+ $tokenType = $token->getType();
+ $mapping = [
+ TokenInterface::TYPE_COMMENT_ONELINE => T_COMMENT,
+ ];
+
+ if (!isset($mapping[$tokenType])) {
+ // TODO: Throw exception?!
+ }
+ return $mapping[$tokenType];
+ }
+}