WIP|FEATURE: Try to add tokenizer

* First try to add tokenizer to code sniffer Relates: #38
2017-03-30 12:00:54 +02:00 · 2017-03-30 12:00:54 +02:00 · 125c4280d6
commit 125c4280d6
parent 6eeeb50ecb
2 changed files with 185 additions and 2 deletions
--- a/composer.json
+++ b/composer.json
@ -5,10 +5,14 @@
    "autoload": {
        "psr-4": {
            "Typo3Update\\": "src/Standards/Typo3Update/"
-        }
+        },
+        "files": [
+            "src/CodeSniffer/Tokenizers/TypoScript.php"
+        ]
    },
    "require": {
-        "squizlabs/php_codesniffer": "2.8.*"
+        "squizlabs/php_codesniffer": "2.8.*",
+        "helmich/typo3-typoscript-parser": "1.0.*"
    },
    "license": "GPL-2.0+",
    "authors": [
--- a/src/CodeSniffer/Tokenizers/TypoScript.php
+++ b/src/CodeSniffer/Tokenizers/TypoScript.php
@ -0,0 +1,179 @@
+<?php
+
+/*
+ * Copyright (C) 2017  Daniel Siepmann <coding@daniel-siepmann.de>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ * 02110-1301, USA.
+ */
+
+use Helmich\TypoScriptParser\Tokenizer\TokenInterface;
+use Helmich\TypoScriptParser\Tokenizer\Tokenizer;
+
+/**
+ * Tokenizes a string of TypoScript.
+ */
+class PHP_CodeSniffer_Tokenizers_TYPOSCRIPT
+{
+    /**
+     * If TRUE, files that appear to be minified will not be processed.
+     *
+     * @var boolean
+     */
+    public $skipMinified = false;
+
+    /**
+     * Creates an array of tokens when given some TypoScript code.
+     *
+     * @param string $string  The string to tokenize.
+     * @param string $eolChar The EOL character to use for splitting strings.
+     *
+     * @return array
+     */
+    public function tokenizeString($string, $eolChar='\n')
+    {
+        if (PHP_CODESNIFFER_VERBOSITY > 1) {
+            echo "\t*** START TypoScript TOKENIZING ***" . PHP_EOL;
+        }
+
+        $tokenizer = new Tokenizer();
+        $tokens = $tokenizer->tokenizeString($string);
+        $finalTokens = [];
+        $numTokens = count($tokens);
+
+        /**
+         * Each token within the stack contains information about itself:
+         *
+         * <code>
+         *   array(
+         *    'code'       => 301,       // the token type code (see token_get_all())
+         *    'content'    => 'if',      // the token content
+         *    'type'       => 'T_IF',    // the token name
+         *    'line'       => 56,        // the line number when the token is located
+         *    'column'     => 12,        // the column in the line where this token
+         *                               // starts (starts from 1)
+         *    'level'      => 2          // the depth a token is within the scopes open
+         *    'conditions' => array(     // a list of scope condition token
+         *                               // positions => codes that
+         *                     2 => 50,  // opened the scopes that this token exists
+         *                     9 => 353, // in (see conditional tokens section below)
+         *                    ),
+         *   );
+         * </code>
+         *
+         * <b>Conditional Tokens</b>
+         *
+         * In addition to the standard token fields, conditions contain information to
+         * determine where their scope begins and ends:
+         *
+         * <code>
+         *   array(
+         *    'scope_condition' => 38, // the token position of the condition
+         *    'scope_opener'    => 41, // the token position that started the scope
+         *    'scope_closer'    => 70, // the token position that ended the scope
+         *   );
+         * </code>
+         *
+         * The condition, the scope opener and the scope closer each contain this
+         * information.
+         *
+         * <b>Parenthesis Tokens</b>
+         *
+         * Each parenthesis token (T_OPEN_PARENTHESIS and T_CLOSE_PARENTHESIS) has a
+         * reference to their opening and closing parenthesis, one being itself, the
+         * other being its opposite.
+         *
+         * <code>
+         *   array(
+         *    'parenthesis_opener' => 34,
+         *    'parenthesis_closer' => 40,
+         *   );
+         * </code>
+         *
+         * Some tokens can "own" a set of parenthesis. For example a T_FUNCTION token
+         * has parenthesis around its argument list. These tokens also have the
+         * parenthesis_opener and and parenthesis_closer indices. Not all parenthesis
+         * have owners, for example parenthesis used for arithmetic operations and
+         * function calls. The parenthesis tokens that have an owner have the following
+         * auxiliary array indices.
+         *
+         * <code>
+         *   array(
+         *    'parenthesis_opener' => 34,
+         *    'parenthesis_closer' => 40,
+         *    'parenthesis_owner'  => 33,
+         *   );
+         * </code>
+         *
+         * Each token within a set of parenthesis also has an array index
+         * 'nested_parenthesis' which is an array of the
+         * left parenthesis => right parenthesis token positions.
+         *
+         * <code>
+         *   'nested_parenthesis' => array(
+         *                             12 => 15
+         *                             11 => 14
+         *                            );
+         * </code>
+         */
+
+        $level = 0;
+        for ($stackPtr = 0; $stackPtr < $numTokens; $stackPtr++) {
+            $token = $tokens[$stackPtr];
+            $finalTokens[$stackPtr] = [
+                'code' => $this->mapTypeToCode($token),
+                'type' => $token->getType(),
+                'line' => $token->getLine(),
+                'column' => 0,
+                'content' => $token->getValue(),
+                'level' => $level,
+            ];
+        }
+
+        return $finalTokens;
+    }
+
+    /**
+     * Allow the tokenizer to do additional processing if required.
+     *
+     * @param array  $tokens  The array of tokens to process.
+     * @param string $eolChar The EOL character to use for splitting strings.
+     *
+     * @return void
+     */
+    public function processAdditional(&$tokens, $eolChar)
+    {
+        return;
+    }
+
+    /**
+     * Returns mapped PHP code equivalent for token.
+     *
+     * @param Token $token
+     * @return int
+     */
+    protected function mapTypeToCode(TokenInterface $token)
+    {
+        $tokenType = $token->getType();
+        $mapping = [
+            TokenInterface::TYPE_COMMENT_ONELINE => T_COMMENT,
+        ];
+
+        if (!isset($mapping[$tokenType])) {
+            // TODO: Throw exception?!
+        }
+        return $mapping[$tokenType];
+    }
+}