From 4f9a84e9bf3df6c5e1ca1544b1b63fcae85f7f9a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B8=D1=85=D0=B0=D0=B8=D0=BB=20=D0=9A=D0=BE=D1=80?=
 =?UTF-8?q?=D0=BC=D0=B0=D0=BD=D0=BE=D0=B2=D1=81=D0=BA=D0=B8=D0=B9?=
 <kormanowsky@gmail.com>
Date: Thu, 13 Mar 2025 00:09:42 +0300
Subject: [PATCH 1/6] feat: added C++ port of TypeScript PythonParserBase for
 Python 3.13

---
 python/python3_13/Cpp/PythonLexerBase.cpp | 754 ++++++++++++++++++++++
 python/python3_13/Cpp/PythonLexerBase.h   | 121 ++++
 2 files changed, 875 insertions(+)
 create mode 100644 python/python3_13/Cpp/PythonLexerBase.cpp
 create mode 100644 python/python3_13/Cpp/PythonLexerBase.h
diff --git a/python/python3_13/Cpp/PythonLexerBase.cpp b/python/python3_13/Cpp/PythonLexerBase.cpp
new file mode 100644
index 0000000000..e35d8ee58a
--- /dev/null
+++ b/python/python3_13/Cpp/PythonLexerBase.cpp
@@ -0,0 +1,754 @@
+#include "PythonLexerBase.h"
+
+using namespace antlr4;
+
+// reading the input stream until a return EOF
+std::unique_ptr<Token> PythonLexerBase::nextToken() {
+    this->checkNextToken();
+    
+    std::unique_ptr<Token> next;
+
+    if (!this->pendingTokens.empty())
+    {
+        next = std::move(*this->pendingTokens.begin()); // add the queued token to the token stream
+        this->pendingTokens.erase(this->pendingTokens.begin()); 
+    }
+
+    return next;
+}
+
+void PythonLexerBase::reset() {
+    this->init();
+    Lexer::reset();
+}
+
+std::unique_ptr<Token> PythonLexerBase::cloneToken(
+    const std::unique_ptr<Token> &source, 
+    size_t channel, 
+    const std::string &text, 
+    size_t type
+) {
+    return this->_factory->create(
+        { this, this->_input }, 
+        type,
+        text, 
+        channel, 
+        source->getStartIndex(), 
+        source->getStopIndex(), 
+        source->getLine(), 
+        source->getCharPositionInLine()
+    );
+}
+
+std::unique_ptr<Token> PythonLexerBase::cloneToken(const std::unique_ptr<Token> &source, size_t channel) {
+    return this->cloneToken(
+        source,
+        channel,
+        source->getText(),
+        source->getType()
+    );
+}
+
+std::unique_ptr<Token> PythonLexerBase::cloneToken(const std::unique_ptr<Token> &source, const std::string &text) {
+    return this->cloneToken(
+        source,
+        source->getChannel(),
+        text,
+        source->getType()
+    );
+}
+
+std::unique_ptr<Token> PythonLexerBase::cloneToken(const std::unique_ptr<Token>& source) {
+    return this->_factory->create(
+        { this, this->_input }, 
+        source->getType(), 
+        source->getText(), 
+        source->getChannel(), 
+        source->getStartIndex(), 
+        source->getStopIndex(), 
+        source->getLine(), 
+        source->getCharPositionInLine()
+    );
+}
+
+void PythonLexerBase::init() {
+    while (!this->indentLengthStack.empty()) {
+        this->indentLengthStack.pop();
+    }
+
+    this->pendingTokens.clear();
+    this->previousPendingTokenType = 0;
+    this->lastPendingTokenTypeFromDefaultChannel = 0;
+    this->opened = 0;
+    this->paren_or_bracket_openedStack.clear();
+    this->braceExpressionStack.clear();
+    this->prevBraceExpression = "";
+    this->curLexerMode = 0;
+    this->lexerModeStack.clear();
+    this->wasSpaceIndentation = false;
+    this->wasTabIndentation = false;
+    this->wasIndentationMixedWithSpacesAndTabs = false;
+    this->curToken = nullptr;
+    this->ffgToken = nullptr;
+}
+
+void PythonLexerBase::checkNextToken() {
+    if (this->previousPendingTokenType == Token::EOF) {
+        return;
+    }
+
+    if (this->indentLengthStack.empty()) { // We're at the first token
+        this->insertENCODINGtoken();
+        this->setCurrentAndFollowingTokens();
+        this->handleStartOfInput();
+    } else {
+        this->setCurrentAndFollowingTokens();
+    }
+
+
+    switch(this->curToken->getType()) {
+        case PythonLexer::NEWLINE:
+            this->handleNEWLINEtoken();
+            break;
+        case PythonLexer::LPAR:
+        case PythonLexer::LSQB:
+        case PythonLexer::LBRACE:
+            this->opened++;
+            this->addPendingToken(this->curToken);
+            break;
+        case PythonLexer::RPAR:
+        case PythonLexer::RSQB:
+        case PythonLexer::RBRACE:
+            this->opened--;
+            this->addPendingToken(this->curToken);
+            break;
+        case PythonLexer::FSTRING_MIDDLE: // does not affect the opened field
+            this->handleFSTRING_MIDDLEtokenWithDoubleBrace();
+            this->addPendingToken(this->curToken);
+            break;
+        case PythonLexer::COLONEQUAL:
+            this->handleCOLONEQUALtokenInFString();
+            break;
+        case PythonLexer::ERRORTOKEN:
+            this->reportLexerError(std::string("token recognition error at: '" + this->curToken->getText() + "'"));
+            this->addPendingToken(this->curToken);
+            break;
+        case PythonLexer::EOF:
+            this->handleEOFtoken();
+            break;
+        default: 
+            this->addPendingToken(this->curToken);
+            break;
+    }
+
+    this->handleFORMAT_SPECIFICATION_MODE();
+}
+
+void PythonLexerBase::setCurrentAndFollowingTokens() {
+    if (this->ffgToken) {
+        this->curToken = this->cloneToken(this->ffgToken);
+    } else {
+        this->curToken = PythonLexer::nextToken();
+    }
+
+    this->checkCurToken(); // ffgToken cannot be used in this method and its sub methods (ffgToken is not yet set)!
+
+    if (this->curToken->getType() == PythonLexer::EOF) {
+        this->ffgToken = this->cloneToken(this->ffgToken);
+    } else {
+        this->ffgToken = PythonLexer::nextToken();
+    }
+}
+
+void PythonLexerBase::insertENCODINGtoken() { // https://peps.python.org/pep-0263/
+    std::string lineBuilder = "";
+    std::string encodingName = "";
+    size_t lineCount = 0;
+    std::regex ws_commentPattern = std::regex("^[ \t\f]*(#.*)?$");
+    auto charStream = this->_input;
+    size_t size = charStream->size();
+    charStream->seek(0);
+
+    for(size_t i = 0; i < size; i++) {
+        auto c = std::to_string(charStream->LA(i + 1)); 
+        lineBuilder += c; 
+
+        if (c == "\n" || i == size - 1) {
+            auto line = std::regex_replace(lineBuilder, std::regex("\r|\n"), "");
+            if (std::regex_match(line, ws_commentPattern)) { // https://peps.python.org/pep-0263/
+                encodingName = this->getEncodingName(line);
+
+                if (encodingName != "") {
+                    break; // encoding found
+                }
+            } else {
+                break; // statement or backslash found (line is not empty, not whitespace(s), not comment)
+            }
+
+            lineCount++;
+
+            if (lineCount >= 2) {
+                break; // check only the first two lines
+            }
+
+            lineBuilder = "";
+        }
+    }
+
+    if (encodingName == "") {
+        encodingName = "utf-8"; // default Python source code encoding
+    }
+
+    std::unique_ptr<Token> encodingToken = this->_factory->create(
+        {this, this->_input},
+        PythonLexer::ENCODING,
+        encodingName,
+        Token::HIDDEN_CHANNEL,
+        0,
+        0,
+        0,
+        -1
+    );
+
+    this->addPendingToken(encodingToken);
+}
+
+std::string PythonLexerBase::getEncodingName(const std::string &commentText) { // https://peps.python.org/pep-0263/#defining-the-encoding
+    std::smatch m;
+    std::regex encodingCommentPattern("^[ \t\f]*#.*?coding[:=][ \t]*([-_.a-zA-Z0-9]+)");
+    if(std::regex_match(commentText, m, encodingCommentPattern))
+    {
+        return m[1];
+    }
+    return "";
+}
+
+// initialize the indentLengthStack
+// hide the leading NEWLINE token(s)
+// if exists, find the first statement (not NEWLINE, not EOF token) that comes from the default channel
+// insert a leading INDENT token if necessary
+void PythonLexerBase::handleStartOfInput() {
+    // initialize the stack with a default 0 indentation length
+    this->indentLengthStack.push(0); // this will never be popped off
+
+    while (this->curToken->getType() != PythonLexer::EOF) {
+        if (this->curToken->getChannel() == Token::DEFAULT_CHANNEL) {
+            if (this->curToken->getType() == PythonLexer::NEWLINE) {
+                // all the NEWLINE tokens must be ignored before the first statement
+                this->hideAndAddPendingToken(this->curToken);
+            } else { // We're at the first statement
+                this->insertLeadingIndentToken();
+                return; // continue the processing of the current token with checkNextToken()
+            }
+        } else {
+            this->addPendingToken(this->curToken); // it can be WS, EXPLICIT_LINE_JOINING or COMMENT token
+        }
+        this->setCurrentAndFollowingTokens();
+    } // continue the processing of the EOF token with checkNextToken()
+}
+
+void PythonLexerBase::insertLeadingIndentToken() {
+    if (this->previousPendingTokenType == PythonLexer::WS) {
+        auto prevToken = std::move(*this->pendingTokens.rbegin()); // WS token
+
+        if (this->getIndentationLength(prevToken->getText()) != 0) { // there is an "indentation" before the first statement
+            std::string errMsg = "first statement indented";
+
+            this->reportLexerError(errMsg);
+
+            // insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser    
+            this->createAndAddPendingToken(
+                PythonLexer::INDENT, 
+                Token::DEFAULT_CHANNEL, 
+                PythonLexerBase::ERR_TXT + errMsg, 
+                this->curToken
+            );
+        }
+    }
+}
+
+void PythonLexerBase::handleNEWLINEtoken() {
+    if (!this->lexerModeStack.empty()) {
+        this->addPendingToken(this->curToken);
+    } else if (this->opened > 0) { // We're in an implicit line joining, ignore the current NEWLINE token
+        this->hideAndAddPendingToken(this->curToken);
+    } else {
+        auto nlToken = this->cloneToken(this->curToken); // save the current NEWLINE token
+        bool isLookingAhead = this->ffgToken->getType() == PythonLexer::WS;
+
+        if (isLookingAhead) {
+            this->setCurrentAndFollowingTokens(); // set the next two tokens
+        }
+
+        switch (this->ffgToken->getType()) {
+            case PythonLexer::NEWLINE: // We're before a blank line
+            case PythonLexer::COMMENT: // We're before a comment
+                this->hideAndAddPendingToken(std::move(nlToken));
+                if (isLookingAhead) { 
+                    this->addPendingToken(this->curToken); // WS token
+                }
+                break;
+            default: 
+                this->addPendingToken(std::move(nlToken));
+                if (isLookingAhead) { // We're on whitespace(s) followed by a statement
+                    auto indentationLength = this->ffgToken->getType() == PythonLexer::EOF ?
+                        0 : this->getIndentationLength(this->curToken->getText());
+                    
+                    if (indentationLength != PythonLexerBase::INVALID_LENGTH) {
+                        this->addPendingToken(this->curToken); // WS token
+                        this->insertIndentOrDedentToken(indentationLength); // may insert INDENT token or DEDENT token(s)
+                    } else {
+                        this->reportError("inconsistent use of tabs and spaces in indentation"); 
+                    }
+                } else { // We're at a newline followed by a statement (there is no whitespace before the statement)
+                    this->insertIndentOrDedentToken(0); // may insert DEDENT token(s)
+                }
+        }
+    }
+}
+
+void PythonLexerBase::insertIndentOrDedentToken(size_t indentLength) {
+    auto prevIndentLength = this->indentLengthStack.top();
+
+    if (indentLength > prevIndentLength) {
+        this->createAndAddPendingToken(PythonLexer::INDENT, Token::DEFAULT_CHANNEL, this->ffgToken);
+        this->indentLengthStack.push(indentLength);
+    } else {
+        while (indentLength < prevIndentLength) { // more than 1 DEDENT token may be inserted to the token stream
+            this->indentLengthStack.pop();
+            prevIndentLength = this->indentLengthStack.top();
+
+            if (indentLength <= prevIndentLength) {
+                this->createAndAddPendingToken(PythonLexer::DEDENT, Token::DEFAULT_CHANNEL, this->ffgToken);
+            } else {
+                this->reportError("inconsistent dedent");
+            }
+        }
+    }
+}
+
+void PythonLexerBase::checkCurToken() {
+    switch (this->curToken->getType()) {
+        case PythonLexer::FSTRING_START:
+            this->setLexerModeByFSTRING_STARTtoken();
+            return;
+        case PythonLexer::FSTRING_MIDDLE:
+            this->handleFSTRING_MIDDLEtokenWithQuoteAndLBrace(); // affect the opened field
+            if (this->curToken->getType() == PythonLexer::FSTRING_MIDDLE) {
+                return;
+            }
+            break;
+        case PythonLexer::FSTRING_END:
+            this->popLexerMode();
+            return;
+        default:
+            if (this->lexerModeStack.empty()) {
+                return;
+            }
+    }
+
+    switch (this->curToken->getType()) { // the following tokens can only come from default mode (after an LBRACE in fstring)
+        case PythonLexer::NEWLINE:
+            // append the current brace expression with the current newline
+            this->appendToBraceExpression(this->curToken->getText());
+            this->curToken = this->cloneToken(this->curToken, Token::HIDDEN_CHANNEL);
+            break;
+        case PythonLexer::LBRACE:
+            // the outermost brace expression cannot be a dictionary comprehension or a set comprehension
+            this->braceExpressionStack.push_back("{");
+            this->paren_or_bracket_openedStack.push_back(0);
+            this->pushLexerMode(Lexer::DEFAULT_MODE);
+            break;
+        case PythonLexer::LPAR:
+        case PythonLexer::LSQB:
+            // append the current brace expression with a "(" or a "["
+            this->appendToBraceExpression(this->curToken->getText());
+            // https://peps.python.org/pep-0498/#lambdas-inside-expressions
+            this->incrementBraceStack();
+            break;
+        case PythonLexer::RPAR:
+        case PythonLexer::RSQB:
+            // append the current brace expression with a ")" or a "]"
+            this->appendToBraceExpression(this->curToken->getText());
+            this->decrementBraceStack();
+            break;
+        case PythonLexer::COLON:
+        case PythonLexer::COLONEQUAL:
+            // append the current brace expression with a ":" or a ":="
+            this->appendToBraceExpression(this->curToken->getText());
+            this->setLexerModeByCOLONorCOLONEQUALtoken();
+            break;
+        case PythonLexer::RBRACE:
+            this->setLexerModeAfterRBRACEtoken();
+            break;
+        default:
+            // append the current brace expression with the current token text
+            this->appendToBraceExpression(this->curToken->getText());
+    }
+}
+
+void PythonLexerBase::appendToBraceExpression(const std::string &text) {
+    *this->braceExpressionStack.rbegin() += text;
+}
+
+void PythonLexerBase::incrementBraceStack() { // increment the last element (peek() + 1)
+    (*this->paren_or_bracket_openedStack.rbegin())++;
+}
+
+void PythonLexerBase::decrementBraceStack() { // decrement the last element (peek() - 1)
+    (*this->paren_or_bracket_openedStack.rbegin())--;
+}
+
+void PythonLexerBase::setLexerModeAfterRBRACEtoken() {
+    switch (this->curLexerMode) {
+        case Lexer::DEFAULT_MODE:
+            this->popLexerMode();
+            this->popByBRACE();
+            break;
+        case PythonLexer::SQ1__FORMAT_SPECIFICATION_MODE:
+        case PythonLexer::SQ1R_FORMAT_SPECIFICATION_MODE:
+        case PythonLexer::DQ1__FORMAT_SPECIFICATION_MODE:
+        case PythonLexer::DQ1R_FORMAT_SPECIFICATION_MODE:
+        case PythonLexer::SQ3__FORMAT_SPECIFICATION_MODE:
+        case PythonLexer::SQ3R_FORMAT_SPECIFICATION_MODE:
+        case PythonLexer::DQ3__FORMAT_SPECIFICATION_MODE:
+        case PythonLexer::DQ3R_FORMAT_SPECIFICATION_MODE:
+            this->popLexerMode();
+            this->popLexerMode();
+            this->popByBRACE();
+            break;
+        default:
+            this->reportLexerError("f-string: single '}' is not allowed");
+    }
+}
+
+void PythonLexerBase::setLexerModeByFSTRING_STARTtoken() {
+    std::string curTokenText = this->curToken->getText();
+    auto text = curTokenText;
+    std::transform(text.cbegin(), text.cend(), text.begin(), [](auto ch) { return std::tolower(ch); });
+    std::map<std::string, size_t> modeMap = {
+        {"f'", PythonLexer::SQ1__FSTRING_MODE},
+        {"rf'", PythonLexer::SQ1R_FSTRING_MODE},
+        {"fr'", PythonLexer::SQ1R_FSTRING_MODE},
+        {"f\"", PythonLexer::DQ1__FSTRING_MODE},
+        {"rf\"", PythonLexer::DQ1R_FSTRING_MODE},
+        {"fr\"", PythonLexer::DQ1R_FSTRING_MODE},
+        {"f'''", PythonLexer::SQ3__FSTRING_MODE},
+        {"rf'''", PythonLexer::SQ3R_FSTRING_MODE},
+        {"fr'''", PythonLexer::SQ3R_FSTRING_MODE},
+        {"f\"\"\"", PythonLexer::DQ3__FSTRING_MODE},
+        {"rf\"\"\"", PythonLexer::DQ3R_FSTRING_MODE},
+        {"fr\"\"\"", PythonLexer::DQ3R_FSTRING_MODE},
+    };
+
+    if (modeMap.find(text) != modeMap.end()) {
+        this->pushLexerMode(modeMap[text]);
+    }
+}
+
+void PythonLexerBase::setLexerModeByCOLONorCOLONEQUALtoken() {
+    if (*this->paren_or_bracket_openedStack.rbegin() == 0) { // stack peek == 0
+        auto previousMode = *this->lexerModeStack.rbegin(); // stack peek
+        switch (previousMode) { // check the previous lexer mode (the current is DEFAULT_MODE)
+            case PythonLexer::SQ1__FSTRING_MODE:
+            case PythonLexer::SQ1__FORMAT_SPECIFICATION_MODE:
+                this->pushLexerMode(PythonLexer::SQ1__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode
+                break;
+            case PythonLexer::SQ1R_FSTRING_MODE:
+            case PythonLexer::SQ1R_FORMAT_SPECIFICATION_MODE:
+                this->pushLexerMode(PythonLexer::SQ1R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode
+                break;
+            case PythonLexer::DQ1__FSTRING_MODE:
+            case PythonLexer::DQ1__FORMAT_SPECIFICATION_MODE:
+                this->pushLexerMode(PythonLexer::DQ1__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode
+                break;
+            case PythonLexer::DQ1R_FSTRING_MODE:
+            case PythonLexer::DQ1R_FORMAT_SPECIFICATION_MODE:
+                this->pushLexerMode(PythonLexer::DQ1R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode
+                break;
+            case PythonLexer::SQ3__FSTRING_MODE:
+            case PythonLexer::SQ3__FORMAT_SPECIFICATION_MODE:
+                this->pushLexerMode(PythonLexer::SQ3__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode
+                break;
+            case PythonLexer::SQ3R_FSTRING_MODE:
+            case PythonLexer::SQ3R_FORMAT_SPECIFICATION_MODE:
+                this->pushLexerMode(PythonLexer::SQ3R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode
+                break;
+            case PythonLexer::DQ3__FSTRING_MODE:
+            case PythonLexer::DQ3__FORMAT_SPECIFICATION_MODE:
+                this->pushLexerMode(PythonLexer::DQ3__FORMAT_SPECIFICATION_MODE); // continue in format spec. mode
+                break;
+            case PythonLexer::DQ3R_FSTRING_MODE:
+            case PythonLexer::DQ3R_FORMAT_SPECIFICATION_MODE:
+                this->pushLexerMode(PythonLexer::DQ3R_FORMAT_SPECIFICATION_MODE); // continue in format spec. mode
+                break;
+        }
+    }
+}
+
+void PythonLexerBase::popByBRACE() {
+    this->paren_or_bracket_openedStack.pop_back();
+    this->prevBraceExpression = *this->braceExpressionStack.rbegin() + "}";
+    this->braceExpressionStack.pop_back();
+
+    if (this->braceExpressionStack.size() > 0) {
+        // append the current brace expression with the previous brace expression
+        (*this->braceExpressionStack.rbegin()) += this->prevBraceExpression;
+    }
+}
+
+void PythonLexerBase::handleFSTRING_MIDDLEtokenWithDoubleBrace() {
+    // Replace the trailing double brace with a single brace and insert a hidden brace token
+    auto lastTwoChars = this->getLastTwoCharsOfTheCurTokenText();
+
+    if (lastTwoChars == "{{") {
+        this->trimLastCharAddPendingTokenSetCurToken(PythonLexer::LBRACE, "{", Token::HIDDEN_CHANNEL);
+    } else if (lastTwoChars == "}}") {
+        this->trimLastCharAddPendingTokenSetCurToken(PythonLexer::RBRACE, "}", Token::HIDDEN_CHANNEL);
+    }
+}
+
+void PythonLexerBase::handleFSTRING_MIDDLEtokenWithQuoteAndLBrace() {
+    // Replace the trailing quote + left_brace with a quote and insert an LBRACE token
+    // Replace the trailing backslash + left_brace with a backslash and insert an LBRACE token
+    auto lastTwoChars = this->getLastTwoCharsOfTheCurTokenText();
+
+    if (lastTwoChars == "\"{" || lastTwoChars == "'{" || lastTwoChars == "\\{") {
+        this->trimLastCharAddPendingTokenSetCurToken(PythonLexer::LBRACE, "{", Token::DEFAULT_CHANNEL);
+    }
+}
+
+std::string PythonLexerBase::getLastTwoCharsOfTheCurTokenText() {
+    return this->curToken->getText().substr(-2);
+}
+
+void PythonLexerBase::trimLastCharAddPendingTokenSetCurToken(
+    size_t number,
+    const std::string &text,
+    size_t channel
+) {
+    // Trim the last char and add the modified curToken to the pendingTokens stack
+    auto tokenTextWithoutLastChar = this->curToken->getText().substr(0, -1);
+
+    this->addPendingToken(this->cloneToken(this->curToken, tokenTextWithoutLastChar));
+
+    this->createNewCurToken(type, text, channel); // Set curToken
+}
+
+void PythonLexerBase::handleCOLONEQUALtokenInFString() {
+    if (
+        this->lexerModeStack.size() > 0 &&
+        *this->paren_or_bracket_openedStack.rbegin() == 0 // stack peek == 0
+    ) {
+        // In fstring, a colonequal (walrus operator) can only be used in parentheses
+        // Not in parentheses, replace COLONEQUAL token with COLON as format specifier
+        // and insert the equal symbol to the following FSTRING_MIDDLE token
+
+        this->curToken = this->cloneToken(this->curToken, PythonLexer::COLON, ":", channel);
+
+        if (this->ffgToken->getType() == PythonLexer::FSTRING_MIDDLE) {
+
+            this->ffgToken = this->_factory->create(
+                {this, this->_input},
+                this->ffgToken->getType(),
+                "=" + this->ffgToken->getText(),
+                channel, 
+                this->ffgToken->getStartIndex() - 1,
+                this->ffgToken->getStartIndex(),
+                this->ffgToken->getLine(),
+                this->ffgToken->getCharPositionInLine() - 1
+            );
+        } else {
+            this->addPendingToken(this->curToken);
+            this->createNewCurToken(PythonLexer::FSTRING_MIDDLE, "=", Token::DEFAULT_CHANNEL);
+        }
+    }
+
+    this->addPendingToken(this->curToken);
+}
+
+void PythonLexerBase::createNewCurToken(
+    size_t type, 
+    const std::string &text, 
+    size_t channel)
+{
+    this->curToken = std::move(this->_factory->create(
+        {this, this->_input},
+        type,
+        text,
+        channel, 
+        this->curToken->getStartIndex() + 1,
+        this->curToken->getStartIndex(),
+        this->curToken->getLine(),
+        this->curToken->getCharPositionInLine()
+    ));
+}
+
+void PythonLexerBase::pushLexerMode(size_t mode) {
+    this->pushMode(mode);
+    this->lexerModeStack.push_back(this->curLexerMode);
+    this->curLexerMode = mode;
+}
+
+void PythonLexerBase::popLexerMode() {
+    this->popMode();
+    this->curLexerMode = *this->lexerModeStack.rbegin();
+    this->lexerModeStack.pop_back();
+}
+
+void PythonLexerBase::handleFORMAT_SPECIFICATION_MODE() {
+    if (this->lexerModeStack.size() > 0 &&
+        this->ffgToken->getType() == PythonLexer::RBRACE) {
+
+        // insert an empty FSTRING_MIDDLE token instead of the missing format specification
+        switch (this->curToken->getType()) {
+            case PythonLexer::COLON:
+                this->createAndAddPendingToken(PythonLexer::FSTRING_MIDDLE, Token::DEFAULT_CHANNEL, "", this->ffgToken);
+                break;
+            case PythonLexer::RBRACE:
+                // only if the previous brace expression is not a dictionary comprehension or set comprehension
+                if (!this->isDictionaryComprehensionOrSetComprehension(this->prevBraceExpression)) {
+                    this->createAndAddPendingToken(PythonLexer::FSTRING_MIDDLE, Token::DEFAULT_CHANNEL, "", this->ffgToken);
+                }
+                break;
+        }
+    }
+}
+
+bool PythonLexerBase::isDictionaryComprehensionOrSetComprehension(const std::string &code) {
+    auto inputStream = std::make_unique<ANTLRInputStream>(code);
+    auto lexer = std::make_unique<PythonLexer>(inputStream.get());
+    auto tokenStream = std::make_unique<CommonTokenStream>(lexer.get());
+    auto parser = std::make_unique<PythonParser>(tokenStream.get());
+
+    // Disable error listeners to suppress console output
+    lexer->removeErrorListeners();
+    parser->removeErrorListeners();
+
+    parser->dictcomp(); // Try parsing as dictionary comprehension
+    if (parser->getNumberOfSyntaxErrors() == 0)
+        return true;
+
+    parser = std::make_unique<PythonParser>(tokenStream.get());
+
+    tokenStream->seek(0);
+    
+    parser->removeErrorListeners();
+    parser->setcomp(); // Try parsing as set comprehension
+    return parser->getNumberOfSyntaxErrors() == 0;
+}
+
+void PythonLexerBase::insertTrailingTokens() {
+    switch (this->lastPendingTokenTypeFromDefaultChannel) {
+        case PythonLexer::NEWLINE:
+        case PythonLexer::DEDENT:
+            break; // no trailing NEWLINE token is needed
+        default:
+            // insert an extra trailing NEWLINE token that serves as the end of the last statement
+            this->createAndAddPendingToken(PythonLexer::NEWLINE, Token::DEFAULT_CHANNEL, this->ffgToken); // ffgToken is EOF
+            break;
+    }
+    this->insertIndentOrDedentToken(0); // Now insert as much trailing DEDENT tokens as needed
+}
+
+void PythonLexerBase::handleEOFtoken() {
+    if (this->lastPendingTokenTypeFromDefaultChannel > 0) {
+        // there was a statement in the input (leading NEWLINE tokens are hidden)
+        this->insertTrailingTokens();
+    }
+    this->addPendingToken(this->curToken);
+}
+
+void PythonLexerBase::hideAndAddPendingToken(const std::unique_ptr<Token> &tkn) {
+    this->addPendingToken(this->cloneToken(tkn, Token::HIDDEN_CHANNEL));
+}
+
+void PythonLexerBase::createAndAddPendingToken(
+    size_t type, 
+    size_t channel, 
+    const std::string &text, 
+    const std::unique_ptr<Token> &sampleToken
+) {
+    this->addPendingToken(
+        this->_factory->create(
+            {this, this->_input},
+            type,
+            text,
+            channel,
+            sampleToken->getStartIndex(),
+            sampleToken->getStartIndex() - 1,
+            sampleToken->getLine(),
+            sampleToken->getCharPositionInLine()
+        )
+    );
+}
+
+void PythonLexerBase::createAndAddPendingToken(
+    size_t type, 
+    size_t channel, 
+    const std::unique_ptr<Token> &sampleToken
+) {
+    this->createAndAddPendingToken(
+        type, 
+        channel, 
+        "<$" + this->getVocabulary().getDisplayName(type) + ">",
+        sampleToken
+    );
+}
+
+void PythonLexerBase::addPendingToken(const std::unique_ptr<Token> &tkn) {
+    // save the last pending token type because the pendingTokens list can be empty by the nextToken()
+    this->previousPendingTokenType = tkn->getType();
+    if (tkn->getChannel() == Token::DEFAULT_CHANNEL) {
+        this->lastPendingTokenTypeFromDefaultChannel = this->previousPendingTokenType;
+    }
+
+    this->pendingTokens.push_back(this->cloneToken(tkn)) /* .addLast(token) */;
+}
+
+size_t PythonLexerBase::getIndentationLength(const std::string &indentText) { // the indentText may contain spaces, tabs or form feeds
+    const size_t TAB_LENGTH = 8; // the standard number of spaces to replace a tab to spaces
+    size_t length = 0;
+    for (const auto &ch : indentText) {
+        switch (ch) {
+            case ' ':
+                this->wasSpaceIndentation = true;
+                length += 1;
+                break;
+            case '\t':
+                this->wasTabIndentation = true;
+                length += TAB_LENGTH - (length % TAB_LENGTH);
+                break;
+            case '\f': // form feed
+                length = 0;
+                break;
+        }
+    }
+
+    if (this->wasTabIndentation && this->wasSpaceIndentation) {
+        if (!this->wasIndentationMixedWithSpacesAndTabs) {
+            this->wasIndentationMixedWithSpacesAndTabs = true;
+            length = PythonLexerBase::INVALID_LENGTH; // only for the first inconsistent indent
+        }
+    }
+
+    return length;
+}
+
+void PythonLexerBase::reportLexerError(const std::string &errMsg) {
+    this->getErrorListenerDispatch().syntaxError(
+        this, 
+        0 /* this->curToken */, 
+        this->curToken->getLine(), 
+        this->curToken->getCharPositionInLine(), 
+        " LEXER" + PythonLexerBase::ERR_TXT + errMsg, 
+        nullptr
+    );
+}
+
+void PythonLexerBase::reportError(const std::string &errMsg) {
+    this->reportLexerError(errMsg);
+
+    // the ERRORTOKEN will raise an error in the parser
+    this->createAndAddPendingToken(PythonLexer::ERRORTOKEN, Token::DEFAULT_CHANNEL, PythonLexerBase::ERR_TXT + errMsg, this->ffgToken);
+}
diff --git a/python/python3_13/Cpp/PythonLexerBase.h b/python/python3_13/Cpp/PythonLexerBase.h
new file mode 100644
index 0000000000..24b6337242
--- /dev/null
+++ b/python/python3_13/Cpp/PythonLexerBase.h
@@ -0,0 +1,121 @@
+/*
+The MIT License (MIT)
+Copyright (c) 2021 Robert Einhorn
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
+ */
+
+/*
+ *
+ * Project      : Python Indent/Dedent handler for ANTLR4 grammars
+ *
+ * Developed by : Robert Einhorn, robert.einhorn.hu@gmail.com
+ *
+ */
+
+#pragma once 
+
+#include <stack>
+#include <memory>
+#include <map>
+#include <vector>
+#include <regex>
+#include "antlr4-runtime.h"
+#include "PythonLexer.h"
+#include "PythonParser.h"
+
+using namespace antlr4;
+
+class PythonLexerBase : public PythonLexer {
+public: 
+    explicit PythonLexerBase(CharStream *input): PythonLexer(input) {
+        this->init();
+    }
+    virtual std::unique_ptr<Token> nextToken() override;
+    virtual void reset() override;
+private:
+    std::unique_ptr<Token> cloneToken(const std::unique_ptr<Token> &source);
+    std::unique_ptr<Token> cloneToken(const std::unique_ptr<Token> &source, size_t channel);
+    std::unique_ptr<Token> cloneToken(const std::unique_ptr<Token> &source, const std::string &text);
+    std::unique_ptr<Token> cloneToken(const std::unique_ptr<Token> &source, size_t channel, const std::string &text, size_t type);
+    void init();
+    void checkNextToken();
+    void setCurrentAndFollowingTokens();
+    void insertENCODINGtoken();
+    std::string getEncodingName(const std::string &commentText);
+    void handleStartOfInput();
+    void insertLeadingIndentToken();
+    void handleNEWLINEtoken();
+    void insertIndentOrDedentToken(size_t indentLength);
+    void checkCurToken();
+    void appendToBraceExpression(const std::string &text);
+    void incrementBraceStack();
+    void decrementBraceStack();
+    void setLexerModeAfterRBRACEtoken();
+    void setLexerModeByFSTRING_STARTtoken();
+    void setLexerModeByCOLONorCOLONEQUALtoken();
+    void popByBRACE();
+    void handleFSTRING_MIDDLEtokenWithDoubleBrace();
+    void handleFSTRING_MIDDLEtokenWithQuoteAndLBrace();
+    std::string getLastTwoCharsOfTheCurTokenText();
+    void trimLastCharAddPendingTokenSetCurToken(size_t type, const std::string &text, size_t channel);
+    void handleCOLONEQUALtokenInFString();
+    void createNewCurToken(size_t type, const std::string &text, size_t channel);
+    void pushLexerMode(size_t mode);
+    void popLexerMode();
+    void handleFORMAT_SPECIFICATION_MODE();
+    bool isDictionaryComprehensionOrSetComprehension(const std::string &code);
+    void insertTrailingTokens();
+    void handleEOFtoken();
+    void hideAndAddPendingToken(const std::unique_ptr<Token> &token);
+    void createAndAddPendingToken(size_t type, size_t channel, const std::string &text, const std::unique_ptr<Token> &sampleToken);
+    void createAndAddPendingToken(size_t type, size_t channel, const std::unique_ptr<Token> &sampleToken);
+    void addPendingToken(const std::unique_ptr<Token> &token);
+    size_t getIndentationLength(const std::string &identText);
+    void reportLexerError(const std::string &errMsg);
+    void reportError(const std::string &errMsg);
+
+    // A stack that keeps track of the indentation lengths
+    std::stack<size_t> indentLengthStack;
+    // A list where tokens are waiting to be loaded into the token stream
+    std::vector<std::unique_ptr<Token>> pendingTokens;
+    // last pending token types
+    size_t previousPendingTokenType;
+    size_t lastPendingTokenTypeFromDefaultChannel;
+
+    // The amount of opened parentheses, square brackets or curly braces
+    size_t opened;
+    //  The amount of opened parentheses and square brackets in the current lexer mode
+    std::vector<size_t> paren_or_bracket_openedStack;
+    // A stack that stores expression(s) between braces in fstring
+    std::vector<std::string> braceExpressionStack;
+    std::string prevBraceExpression;
+
+    // Instead of this._mode      (_mode is not implemented in each ANTLR4 runtime)
+    size_t curLexerMode;
+    // Instead of this._modeStack (_modeStack is not implemented in each ANTLR4 runtime)
+    std::vector<size_t> lexerModeStack;
+    bool wasSpaceIndentation;
+    bool wasTabIndentation;
+    bool wasIndentationMixedWithSpacesAndTabs;
+
+    std::unique_ptr<Token> curToken; // current (under processing) token
+    std::unique_ptr<Token> ffgToken; // following (look ahead) token
+
+    const ssize_t INVALID_LENGTH = -1;
+    const std::string ERR_TXT = " ERROR: ";
+};
\ No newline at end of file

From 65a23a046b43f853dfbc89cfe73b736ee6968f92 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B8=D1=85=D0=B0=D0=B8=D0=BB=20=D0=9A=D0=BE=D1=80?=
 =?UTF-8?q?=D0=BC=D0=B0=D0=BD=D0=BE=D0=B2=D1=81=D0=BA=D0=B8=D0=B9?=
 <kormanowsky@gmail.com>
Date: Fri, 21 Mar 2025 17:25:47 +0300
Subject: [PATCH 2/6] chore: add Cpp to targets

---
 python/python3_13/desc.xml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/python3_13/desc.xml b/python/python3_13/desc.xml
index 8aa6fdea92..78cc3dad3b 100644
--- a/python/python3_13/desc.xml
+++ b/python/python3_13/desc.xml
@@ -1,9 +1,9 @@
 <?xml version="1.0" encoding="UTF-8" ?>
 <desc xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="../_scripts/desc.xsd">
    <antlr-version>^4.13.2</antlr-version>
-   <targets>CSharp;Java;Python3;JavaScript;TypeScript</targets>
+   <targets>Cpp;CSharp;Java;Python3;JavaScript;TypeScript</targets>
    <test>
-      <targets>CSharp;Java;Python3;JavaScript;TypeScript</targets>
+      <targets>Cpp;CSharp;Java;Python3;JavaScript;TypeScript</targets>
       <entry-point>file_input</entry-point>
       <inputs>examples</inputs>
    </test>

From 47590086beda8213ec02275600a7dfdac118fb64 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B8=D1=85=D0=B0=D0=B8=D0=BB=20=D0=9A=D0=BE=D1=80?=
 =?UTF-8?q?=D0=BC=D0=B0=D0=BD=D0=BE=D0=B2=D1=81=D0=BA=D0=B8=D0=B9?=
 <kormanowsky@gmail.com>
Date: Fri, 21 Mar 2025 18:04:46 +0300
Subject: [PATCH 3/6] chore: make PythonLexerBase depend on antlr4::Lexer and
 remoove using stmts

---
 python/python3_13/Cpp/PythonLexerBase.cpp |  54 +++----
 python/python3_13/Cpp/PythonLexerBase.h   | 180 +++++++++++-----------
 2 files changed, 116 insertions(+), 118 deletions(-)

diff --git a/python/python3_13/Cpp/PythonLexerBase.cpp b/python/python3_13/Cpp/PythonLexerBase.cpp
index e35d8ee58a..9fce8af49a 100644
--- a/python/python3_13/Cpp/PythonLexerBase.cpp
+++ b/python/python3_13/Cpp/PythonLexerBase.cpp
@@ -1,12 +1,12 @@
 #include "PythonLexerBase.h"
-
-using namespace antlr4;
+#include "PythonLexer.h"
+#include "PythonParser.h"
 
 // reading the input stream until a return EOF
-std::unique_ptr<Token> PythonLexerBase::nextToken() {
+std::unique_ptr<antlr4::Token> PythonLexerBase::nextToken() {
     this->checkNextToken();
     
-    std::unique_ptr<Token> next;
+    std::unique_ptr<antlr4::Token> next;
 
     if (!this->pendingTokens.empty())
     {
@@ -22,8 +22,8 @@ void PythonLexerBase::reset() {
     Lexer::reset();
 }
 
-std::unique_ptr<Token> PythonLexerBase::cloneToken(
-    const std::unique_ptr<Token> &source, 
+std::unique_ptr<antlr4::Token> PythonLexerBase::cloneToken(
+    const std::unique_ptr<antlr4::Token> &source, 
     size_t channel, 
     const std::string &text, 
     size_t type
@@ -40,7 +40,7 @@ std::unique_ptr<Token> PythonLexerBase::cloneToken(
     );
 }
 
-std::unique_ptr<Token> PythonLexerBase::cloneToken(const std::unique_ptr<Token> &source, size_t channel) {
+std::unique_ptr<antlr4::Token> PythonLexerBase::cloneToken(const std::unique_ptr<antlr4::Token> &source, size_t channel) {
     return this->cloneToken(
         source,
         channel,
@@ -49,7 +49,7 @@ std::unique_ptr<Token> PythonLexerBase::cloneToken(const std::unique_ptr<Token>
     );
 }
 
-std::unique_ptr<Token> PythonLexerBase::cloneToken(const std::unique_ptr<Token> &source, const std::string &text) {
+std::unique_ptr<antlr4::Token> PythonLexerBase::cloneToken(const std::unique_ptr<antlr4::Token> &source, const std::string &text) {
     return this->cloneToken(
         source,
         source->getChannel(),
@@ -58,7 +58,7 @@ std::unique_ptr<Token> PythonLexerBase::cloneToken(const std::unique_ptr<Token>
     );
 }
 
-std::unique_ptr<Token> PythonLexerBase::cloneToken(const std::unique_ptr<Token>& source) {
+std::unique_ptr<antlr4::Token> PythonLexerBase::cloneToken(const std::unique_ptr<antlr4::Token>& source) {
     return this->_factory->create(
         { this, this->_input }, 
         source->getType(), 
@@ -93,7 +93,7 @@ void PythonLexerBase::init() {
 }
 
 void PythonLexerBase::checkNextToken() {
-    if (this->previousPendingTokenType == Token::EOF) {
+    if (this->previousPendingTokenType == antlr4::Token::EOF) {
         return;
     }
 
@@ -154,7 +154,7 @@ void PythonLexerBase::setCurrentAndFollowingTokens() {
     this->checkCurToken(); // ffgToken cannot be used in this method and its sub methods (ffgToken is not yet set)!
 
     if (this->curToken->getType() == PythonLexer::EOF) {
-        this->ffgToken = this->cloneToken(this->ffgToken);
+        this->ffgToken = this->cloneToken(this->curToken);
     } else {
         this->ffgToken = PythonLexer::nextToken();
     }
@@ -199,11 +199,11 @@ void PythonLexerBase::insertENCODINGtoken() { // https://peps.python.org/pep-026
         encodingName = "utf-8"; // default Python source code encoding
     }
 
-    std::unique_ptr<Token> encodingToken = this->_factory->create(
+    std::unique_ptr<antlr4::Token> encodingToken = this->_factory->create(
         {this, this->_input},
         PythonLexer::ENCODING,
         encodingName,
-        Token::HIDDEN_CHANNEL,
+        antlr4::Token::HIDDEN_CHANNEL,
         0,
         0,
         0,
@@ -232,7 +232,7 @@ void PythonLexerBase::handleStartOfInput() {
     this->indentLengthStack.push(0); // this will never be popped off
 
     while (this->curToken->getType() != PythonLexer::EOF) {
-        if (this->curToken->getChannel() == Token::DEFAULT_CHANNEL) {
+        if (this->curToken->getChannel() == antlr4::Token::DEFAULT_CHANNEL) {
             if (this->curToken->getType() == PythonLexer::NEWLINE) {
                 // all the NEWLINE tokens must be ignored before the first statement
                 this->hideAndAddPendingToken(this->curToken);
@@ -514,7 +514,7 @@ void PythonLexerBase::handleFSTRING_MIDDLEtokenWithQuoteAndLBrace() {
     auto lastTwoChars = this->getLastTwoCharsOfTheCurTokenText();
 
     if (lastTwoChars == "\"{" || lastTwoChars == "'{" || lastTwoChars == "\\{") {
-        this->trimLastCharAddPendingTokenSetCurToken(PythonLexer::LBRACE, "{", Token::DEFAULT_CHANNEL);
+        this->trimLastCharAddPendingTokenSetCurToken(PythonLexer::LBRACE, "{", antlr4::Token::DEFAULT_CHANNEL);
     }
 }
 
@@ -560,7 +560,7 @@ void PythonLexerBase::handleCOLONEQUALtokenInFString() {
             );
         } else {
             this->addPendingToken(this->curToken);
-            this->createNewCurToken(PythonLexer::FSTRING_MIDDLE, "=", Token::DEFAULT_CHANNEL);
+            this->createNewCurToken(PythonLexer::FSTRING_MIDDLE, "=", antlr4::Token::DEFAULT_CHANNEL);
         }
     }
 
@@ -603,12 +603,12 @@ void PythonLexerBase::handleFORMAT_SPECIFICATION_MODE() {
         // insert an empty FSTRING_MIDDLE token instead of the missing format specification
         switch (this->curToken->getType()) {
             case PythonLexer::COLON:
-                this->createAndAddPendingToken(PythonLexer::FSTRING_MIDDLE, Token::DEFAULT_CHANNEL, "", this->ffgToken);
+                this->createAndAddPendingToken(PythonLexer::FSTRING_MIDDLE, antlr4::Token::DEFAULT_CHANNEL, "", this->ffgToken);
                 break;
             case PythonLexer::RBRACE:
                 // only if the previous brace expression is not a dictionary comprehension or set comprehension
                 if (!this->isDictionaryComprehensionOrSetComprehension(this->prevBraceExpression)) {
-                    this->createAndAddPendingToken(PythonLexer::FSTRING_MIDDLE, Token::DEFAULT_CHANNEL, "", this->ffgToken);
+                    this->createAndAddPendingToken(PythonLexer::FSTRING_MIDDLE, antlr4::Token::DEFAULT_CHANNEL, "", this->ffgToken);
                 }
                 break;
         }
@@ -616,7 +616,7 @@ void PythonLexerBase::handleFORMAT_SPECIFICATION_MODE() {
 }
 
 bool PythonLexerBase::isDictionaryComprehensionOrSetComprehension(const std::string &code) {
-    auto inputStream = std::make_unique<ANTLRInputStream>(code);
+    auto inputStream = std::make_unique<antlr4::ANTLRInputStream>(code);
     auto lexer = std::make_unique<PythonLexer>(inputStream.get());
     auto tokenStream = std::make_unique<CommonTokenStream>(lexer.get());
     auto parser = std::make_unique<PythonParser>(tokenStream.get());
@@ -645,7 +645,7 @@ void PythonLexerBase::insertTrailingTokens() {
             break; // no trailing NEWLINE token is needed
         default:
             // insert an extra trailing NEWLINE token that serves as the end of the last statement
-            this->createAndAddPendingToken(PythonLexer::NEWLINE, Token::DEFAULT_CHANNEL, this->ffgToken); // ffgToken is EOF
+            this->createAndAddPendingToken(PythonLexer::NEWLINE, antlr4::Token::DEFAULT_CHANNEL, this->ffgToken); // ffgToken is EOF
             break;
     }
     this->insertIndentOrDedentToken(0); // Now insert as much trailing DEDENT tokens as needed
@@ -659,15 +659,15 @@ void PythonLexerBase::handleEOFtoken() {
     this->addPendingToken(this->curToken);
 }
 
-void PythonLexerBase::hideAndAddPendingToken(const std::unique_ptr<Token> &tkn) {
-    this->addPendingToken(this->cloneToken(tkn, Token::HIDDEN_CHANNEL));
+void PythonLexerBase::hideAndAddPendingToken(const std::unique_ptr<antlr4::Token> &tkn) {
+    this->addPendingToken(this->cloneToken(tkn, antlr4::Token::HIDDEN_CHANNEL));
 }
 
 void PythonLexerBase::createAndAddPendingToken(
     size_t type, 
     size_t channel, 
     const std::string &text, 
-    const std::unique_ptr<Token> &sampleToken
+    const std::unique_ptr<antlr4::Token> &sampleToken
 ) {
     this->addPendingToken(
         this->_factory->create(
@@ -686,7 +686,7 @@ void PythonLexerBase::createAndAddPendingToken(
 void PythonLexerBase::createAndAddPendingToken(
     size_t type, 
     size_t channel, 
-    const std::unique_ptr<Token> &sampleToken
+    const std::unique_ptr<antlr4::Token> &sampleToken
 ) {
     this->createAndAddPendingToken(
         type, 
@@ -696,10 +696,10 @@ void PythonLexerBase::createAndAddPendingToken(
     );
 }
 
-void PythonLexerBase::addPendingToken(const std::unique_ptr<Token> &tkn) {
+void PythonLexerBase::addPendingToken(const std::unique_ptr<antlr4::Token> &tkn) {
     // save the last pending token type because the pendingTokens list can be empty by the nextToken()
     this->previousPendingTokenType = tkn->getType();
-    if (tkn->getChannel() == Token::DEFAULT_CHANNEL) {
+    if (tkn->getChannel() == antlr4::Token::DEFAULT_CHANNEL) {
         this->lastPendingTokenTypeFromDefaultChannel = this->previousPendingTokenType;
     }
 
@@ -750,5 +750,5 @@ void PythonLexerBase::reportError(const std::string &errMsg) {
     this->reportLexerError(errMsg);
 
     // the ERRORTOKEN will raise an error in the parser
-    this->createAndAddPendingToken(PythonLexer::ERRORTOKEN, Token::DEFAULT_CHANNEL, PythonLexerBase::ERR_TXT + errMsg, this->ffgToken);
+    this->createAndAddPendingToken(PythonLexer::ERRORTOKEN, antlr4::Token::DEFAULT_CHANNEL, PythonLexerBase::ERR_TXT + errMsg, this->ffgToken);
 }
diff --git a/python/python3_13/Cpp/PythonLexerBase.h b/python/python3_13/Cpp/PythonLexerBase.h
index 24b6337242..761cf5b400 100644
--- a/python/python3_13/Cpp/PythonLexerBase.h
+++ b/python/python3_13/Cpp/PythonLexerBase.h
@@ -27,95 +27,93 @@ THE SOFTWARE.
  *
  */
 
-#pragma once 
+ #pragma once 
 
-#include <stack>
-#include <memory>
-#include <map>
-#include <vector>
-#include <regex>
-#include "antlr4-runtime.h"
-#include "PythonLexer.h"
-#include "PythonParser.h"
-
-using namespace antlr4;
-
-class PythonLexerBase : public PythonLexer {
-public: 
-    explicit PythonLexerBase(CharStream *input): PythonLexer(input) {
-        this->init();
-    }
-    virtual std::unique_ptr<Token> nextToken() override;
-    virtual void reset() override;
-private:
-    std::unique_ptr<Token> cloneToken(const std::unique_ptr<Token> &source);
-    std::unique_ptr<Token> cloneToken(const std::unique_ptr<Token> &source, size_t channel);
-    std::unique_ptr<Token> cloneToken(const std::unique_ptr<Token> &source, const std::string &text);
-    std::unique_ptr<Token> cloneToken(const std::unique_ptr<Token> &source, size_t channel, const std::string &text, size_t type);
-    void init();
-    void checkNextToken();
-    void setCurrentAndFollowingTokens();
-    void insertENCODINGtoken();
-    std::string getEncodingName(const std::string &commentText);
-    void handleStartOfInput();
-    void insertLeadingIndentToken();
-    void handleNEWLINEtoken();
-    void insertIndentOrDedentToken(size_t indentLength);
-    void checkCurToken();
-    void appendToBraceExpression(const std::string &text);
-    void incrementBraceStack();
-    void decrementBraceStack();
-    void setLexerModeAfterRBRACEtoken();
-    void setLexerModeByFSTRING_STARTtoken();
-    void setLexerModeByCOLONorCOLONEQUALtoken();
-    void popByBRACE();
-    void handleFSTRING_MIDDLEtokenWithDoubleBrace();
-    void handleFSTRING_MIDDLEtokenWithQuoteAndLBrace();
-    std::string getLastTwoCharsOfTheCurTokenText();
-    void trimLastCharAddPendingTokenSetCurToken(size_t type, const std::string &text, size_t channel);
-    void handleCOLONEQUALtokenInFString();
-    void createNewCurToken(size_t type, const std::string &text, size_t channel);
-    void pushLexerMode(size_t mode);
-    void popLexerMode();
-    void handleFORMAT_SPECIFICATION_MODE();
-    bool isDictionaryComprehensionOrSetComprehension(const std::string &code);
-    void insertTrailingTokens();
-    void handleEOFtoken();
-    void hideAndAddPendingToken(const std::unique_ptr<Token> &token);
-    void createAndAddPendingToken(size_t type, size_t channel, const std::string &text, const std::unique_ptr<Token> &sampleToken);
-    void createAndAddPendingToken(size_t type, size_t channel, const std::unique_ptr<Token> &sampleToken);
-    void addPendingToken(const std::unique_ptr<Token> &token);
-    size_t getIndentationLength(const std::string &identText);
-    void reportLexerError(const std::string &errMsg);
-    void reportError(const std::string &errMsg);
-
-    // A stack that keeps track of the indentation lengths
-    std::stack<size_t> indentLengthStack;
-    // A list where tokens are waiting to be loaded into the token stream
-    std::vector<std::unique_ptr<Token>> pendingTokens;
-    // last pending token types
-    size_t previousPendingTokenType;
-    size_t lastPendingTokenTypeFromDefaultChannel;
-
-    // The amount of opened parentheses, square brackets or curly braces
-    size_t opened;
-    //  The amount of opened parentheses and square brackets in the current lexer mode
-    std::vector<size_t> paren_or_bracket_openedStack;
-    // A stack that stores expression(s) between braces in fstring
-    std::vector<std::string> braceExpressionStack;
-    std::string prevBraceExpression;
-
-    // Instead of this._mode      (_mode is not implemented in each ANTLR4 runtime)
-    size_t curLexerMode;
-    // Instead of this._modeStack (_modeStack is not implemented in each ANTLR4 runtime)
-    std::vector<size_t> lexerModeStack;
-    bool wasSpaceIndentation;
-    bool wasTabIndentation;
-    bool wasIndentationMixedWithSpacesAndTabs;
-
-    std::unique_ptr<Token> curToken; // current (under processing) token
-    std::unique_ptr<Token> ffgToken; // following (look ahead) token
-
-    const ssize_t INVALID_LENGTH = -1;
-    const std::string ERR_TXT = " ERROR: ";
-};
\ No newline at end of file
+ #include <stack>
+ #include <memory>
+ #include <map>
+ #include <vector>
+ #include <regex>
+ 
+ #include "antlr4-runtime.h"
+ 
+ class PythonLexerBase : public antlr4::Lexer {
+ public: 
+     explicit PythonLexerBase(antlr4::CharStream *input): antlr4::Lexer(input) {
+         this->init();
+     }
+     virtual std::unique_ptr<antlr4::Token> nextToken() override;
+     virtual void reset() override;
+ private:
+     std::unique_ptr<antlr4::Token> cloneToken(const std::unique_ptr<antlr4::Token> &source);
+     std::unique_ptr<antlr4::Token> cloneToken(const std::unique_ptr<antlr4::Token> &source, size_t channel);
+     std::unique_ptr<antlr4::Token> cloneToken(const std::unique_ptr<antlr4::Token> &source, const std::string &text);
+     std::unique_ptr<antlr4::Token> cloneToken(const std::unique_ptr<antlr4::Token> &source, size_t channel, const std::string &text, size_t type);
+     void init();
+     void checkNextToken();
+     void setCurrentAndFollowingTokens();
+     void insertENCODINGtoken();
+     std::string getEncodingName(const std::string &commentText);
+     void handleStartOfInput();
+     void insertLeadingIndentToken();
+     void handleNEWLINEtoken();
+     void insertIndentOrDedentToken(size_t indentLength);
+     void checkCurToken();
+     void appendToBraceExpression(const std::string &text);
+     void incrementBraceStack();
+     void decrementBraceStack();
+     void setLexerModeAfterRBRACEtoken();
+     void setLexerModeByFSTRING_STARTtoken();
+     void setLexerModeByCOLONorCOLONEQUALtoken();
+     void popByBRACE();
+     void handleFSTRING_MIDDLEtokenWithDoubleBrace();
+     void handleFSTRING_MIDDLEtokenWithQuoteAndLBrace();
+     std::string getLastTwoCharsOfTheCurTokenText();
+     void trimLastCharAddPendingTokenSetCurToken(size_t type, const std::string &text, size_t channel);
+     void handleCOLONEQUALtokenInFString();
+     void createNewCurToken(size_t type, const std::string &text, size_t channel);
+     void pushLexerMode(size_t mode);
+     void popLexerMode();
+     void handleFORMAT_SPECIFICATION_MODE();
+     bool isDictionaryComprehensionOrSetComprehension(const std::string &code);
+     void insertTrailingTokens();
+     void handleEOFtoken();
+     void hideAndAddPendingToken(const std::unique_ptr<antlr4::Token> &token);
+     void createAndAddPendingToken(size_t type, size_t channel, const std::string &text, const std::unique_ptr<antlr4::Token> &sampleToken);
+     void createAndAddPendingToken(size_t type, size_t channel, const std::unique_ptr<antlr4::Token> &sampleToken);
+     void addPendingToken(const std::unique_ptr<antlr4::Token> &token);
+     size_t getIndentationLength(const std::string &identText);
+     void reportLexerError(const std::string &errMsg);
+     void reportError(const std::string &errMsg);
+ 
+     // A stack that keeps track of the indentation lengths
+     std::stack<size_t> indentLengthStack;
+     // A list where tokens are waiting to be loaded into the token stream
+     std::vector<std::unique_ptr<antlr4::Token>> pendingTokens;
+     // last pending token types
+     size_t previousPendingTokenType;
+     size_t lastPendingTokenTypeFromDefaultChannel;
+ 
+     // The amount of opened parentheses, square brackets or curly braces
+     size_t opened;
+     //  The amount of opened parentheses and square brackets in the current lexer mode
+     std::vector<size_t> paren_or_bracket_openedStack;
+     // A stack that stores expression(s) between braces in fstring
+     std::vector<std::string> braceExpressionStack;
+     std::string prevBraceExpression;
+ 
+     // Instead of this._mode      (_mode is not implemented in each ANTLR4 runtime)
+     size_t curLexerMode;
+     // Instead of this._modeStack (_modeStack is not implemented in each ANTLR4 runtime)
+     std::vector<size_t> lexerModeStack;
+     bool wasSpaceIndentation;
+     bool wasTabIndentation;
+     bool wasIndentationMixedWithSpacesAndTabs;
+ 
+     std::unique_ptr<antlr4::Token> curToken; // current (under processing) token
+     std::unique_ptr<antlr4::Token> ffgToken; // following (look ahead) token
+ 
+     const ssize_t INVALID_LENGTH = -1;
+     const std::string ERR_TXT = " ERROR: ";
+ };
+ 
\ No newline at end of file

From 3bd33de17c7ecdc5724467babc84788133f0267a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B8=D1=85=D0=B0=D0=B8=D0=BB=20=D0=9A=D0=BE=D1=80?=
 =?UTF-8?q?=D0=BC=D0=B0=D0=BD=D0=BE=D0=B2=D1=81=D0=BA=D0=B8=D0=B9?=
 <kormanowsky@gmail.com>
Date: Fri, 21 Mar 2025 18:06:34 +0300
Subject: [PATCH 4/6] chore: added placeholder for C++ header

---
 python/python3_13/PythonLexer.g4 | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/python3_13/PythonLexer.g4 b/python/python3_13/PythonLexer.g4
index 98b99d4aef..da15a6a9e8 100644
--- a/python/python3_13/PythonLexer.g4
+++ b/python/python3_13/PythonLexer.g4
@@ -32,6 +32,8 @@ lexer grammar PythonLexer;
 
 options { superClass=PythonLexerBase; }
 
+// Insert here @header for C++ lexer.
+
 tokens {
     ENCODING // https://docs.python.org/3.13/reference/lexical_analysis.html#encoding-declarations
   , INDENT, DEDENT // https://docs.python.org/3.13/reference/lexical_analysis.html#indentation

From 8ff7691db33319c872616414dac62820eafa0025 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B8=D1=85=D0=B0=D0=B8=D0=BB=20=D0=9A=D0=BE=D1=80?=
 =?UTF-8?q?=D0=BC=D0=B0=D0=BD=D0=BE=D0=B2=D1=81=D0=BA=D0=B8=D0=B9?=
 <kormanowsky@gmail.com>
Date: Fri, 21 Mar 2025 18:07:43 +0300
Subject: [PATCH 5/6] chore: added transformGrammar.py

---
 python/python3_13/Cpp/transformGrammar.py | 32 +++++++++++++++++++++++
 1 file changed, 32 insertions(+)
 create mode 100644 python/python3_13/Cpp/transformGrammar.py

diff --git a/python/python3_13/Cpp/transformGrammar.py b/python/python3_13/Cpp/transformGrammar.py
new file mode 100644
index 0000000000..7b2f208260
--- /dev/null
+++ b/python/python3_13/Cpp/transformGrammar.py
@@ -0,0 +1,32 @@
+import sys, os, re, shutil
+from glob import glob
+from pathlib import Path
+
+def main(argv):
+    for file in glob("./*.g4"):
+        fix(file)
+
+def fix(file_path):
+    print("Altering " + file_path)
+    if not os.path.exists(file_path):
+        print(f"Could not find file: {file_path}")
+        sys.exit(1)
+    parts = os.path.split(file_path)
+    file_name = parts[-1]
+    shutil.move(file_path, file_path + ".bak")
+    input_file = open(file_path + ".bak",'r')
+    output_file = open(file_path, 'w')
+    for x in input_file:
+        if '// Insert here @header for lexer.' in x:
+            x = x.replace('// Insert here @header for lexer.', '@header {#include "PythonLexerBase.h"}')
+        if 'this.' in x:
+            x = x.replace('this.', 'this->')
+        output_file.write(x)
+        output_file.flush()
+
+    print("Writing ...")
+    input_file.close()
+    output_file.close()
+
+if __name__ == '__main__':
+    main(sys.argv)

From d96cdfa5931f8a5b81b665183e711fcc822284b8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9C=D0=B8=D1=85=D0=B0=D0=B8=D0=BB=20=D0=9A=D0=BE=D1=80?=
 =?UTF-8?q?=D0=BC=D0=B0=D0=BD=D0=BE=D0=B2=D1=81=D0=BA=D0=B8=D0=B9?=
 <kormanowsky@gmail.com>
Date: Fri, 21 Mar 2025 19:02:46 +0300
Subject: [PATCH 6/6] fix: fixed errors due to bad copy-paste

---
 python/python3_13/Cpp/PythonLexerBase.cpp | 18 +++++++++---------
 python/python3_13/Cpp/PythonLexerBase.h   |  1 -
 python/python3_13/PythonLexer.g4          |  2 +-
 3 files changed, 10 insertions(+), 11 deletions(-)

diff --git a/python/python3_13/Cpp/PythonLexerBase.cpp b/python/python3_13/Cpp/PythonLexerBase.cpp
index 9fce8af49a..5672ce8401 100644
--- a/python/python3_13/Cpp/PythonLexerBase.cpp
+++ b/python/python3_13/Cpp/PythonLexerBase.cpp
@@ -148,7 +148,7 @@ void PythonLexerBase::setCurrentAndFollowingTokens() {
     if (this->ffgToken) {
         this->curToken = this->cloneToken(this->ffgToken);
     } else {
-        this->curToken = PythonLexer::nextToken();
+        this->curToken = antlr4::Lexer::nextToken();
     }
 
     this->checkCurToken(); // ffgToken cannot be used in this method and its sub methods (ffgToken is not yet set)!
@@ -156,7 +156,7 @@ void PythonLexerBase::setCurrentAndFollowingTokens() {
     if (this->curToken->getType() == PythonLexer::EOF) {
         this->ffgToken = this->cloneToken(this->curToken);
     } else {
-        this->ffgToken = PythonLexer::nextToken();
+        this->ffgToken = antlr4::Lexer::nextToken();
     }
 }
 
@@ -259,7 +259,7 @@ void PythonLexerBase::insertLeadingIndentToken() {
             // insert an INDENT token before the first statement to raise an 'unexpected indent' error later by the parser    
             this->createAndAddPendingToken(
                 PythonLexer::INDENT, 
-                Token::DEFAULT_CHANNEL, 
+                antlr4::Token::DEFAULT_CHANNEL, 
                 PythonLexerBase::ERR_TXT + errMsg, 
                 this->curToken
             );
@@ -311,7 +311,7 @@ void PythonLexerBase::insertIndentOrDedentToken(size_t indentLength) {
     auto prevIndentLength = this->indentLengthStack.top();
 
     if (indentLength > prevIndentLength) {
-        this->createAndAddPendingToken(PythonLexer::INDENT, Token::DEFAULT_CHANNEL, this->ffgToken);
+        this->createAndAddPendingToken(PythonLexer::INDENT, antlr4::Token::DEFAULT_CHANNEL, this->ffgToken);
         this->indentLengthStack.push(indentLength);
     } else {
         while (indentLength < prevIndentLength) { // more than 1 DEDENT token may be inserted to the token stream
@@ -319,7 +319,7 @@ void PythonLexerBase::insertIndentOrDedentToken(size_t indentLength) {
             prevIndentLength = this->indentLengthStack.top();
 
             if (indentLength <= prevIndentLength) {
-                this->createAndAddPendingToken(PythonLexer::DEDENT, Token::DEFAULT_CHANNEL, this->ffgToken);
+                this->createAndAddPendingToken(PythonLexer::DEDENT, antlr4::Token::DEFAULT_CHANNEL, this->ffgToken);
             } else {
                 this->reportError("inconsistent dedent");
             }
@@ -351,7 +351,7 @@ void PythonLexerBase::checkCurToken() {
         case PythonLexer::NEWLINE:
             // append the current brace expression with the current newline
             this->appendToBraceExpression(this->curToken->getText());
-            this->curToken = this->cloneToken(this->curToken, Token::HIDDEN_CHANNEL);
+            this->curToken = this->cloneToken(this->curToken, antlr4::Token::HIDDEN_CHANNEL);
             break;
         case PythonLexer::LBRACE:
             // the outermost brace expression cannot be a dictionary comprehension or a set comprehension
@@ -502,9 +502,9 @@ void PythonLexerBase::handleFSTRING_MIDDLEtokenWithDoubleBrace() {
     auto lastTwoChars = this->getLastTwoCharsOfTheCurTokenText();
 
     if (lastTwoChars == "{{") {
-        this->trimLastCharAddPendingTokenSetCurToken(PythonLexer::LBRACE, "{", Token::HIDDEN_CHANNEL);
+        this->trimLastCharAddPendingTokenSetCurToken(PythonLexer::LBRACE, "{", antlr4::Token::HIDDEN_CHANNEL);
     } else if (lastTwoChars == "}}") {
-        this->trimLastCharAddPendingTokenSetCurToken(PythonLexer::RBRACE, "}", Token::HIDDEN_CHANNEL);
+        this->trimLastCharAddPendingTokenSetCurToken(PythonLexer::RBRACE, "}", antlr4::Token::HIDDEN_CHANNEL);
     }
 }
 
@@ -618,7 +618,7 @@ void PythonLexerBase::handleFORMAT_SPECIFICATION_MODE() {
 bool PythonLexerBase::isDictionaryComprehensionOrSetComprehension(const std::string &code) {
     auto inputStream = std::make_unique<antlr4::ANTLRInputStream>(code);
     auto lexer = std::make_unique<PythonLexer>(inputStream.get());
-    auto tokenStream = std::make_unique<CommonTokenStream>(lexer.get());
+    auto tokenStream = std::make_unique<antlr4::CommonTokenStream>(lexer.get());
     auto parser = std::make_unique<PythonParser>(tokenStream.get());
 
     // Disable error listeners to suppress console output
diff --git a/python/python3_13/Cpp/PythonLexerBase.h b/python/python3_13/Cpp/PythonLexerBase.h
index 761cf5b400..57828582af 100644
--- a/python/python3_13/Cpp/PythonLexerBase.h
+++ b/python/python3_13/Cpp/PythonLexerBase.h
@@ -116,4 +116,3 @@ THE SOFTWARE.
      const ssize_t INVALID_LENGTH = -1;
      const std::string ERR_TXT = " ERROR: ";
  };
- 
\ No newline at end of file
diff --git a/python/python3_13/PythonLexer.g4 b/python/python3_13/PythonLexer.g4
index da15a6a9e8..f2c036b9ae 100644
--- a/python/python3_13/PythonLexer.g4
+++ b/python/python3_13/PythonLexer.g4
@@ -32,7 +32,7 @@ lexer grammar PythonLexer;
 
 options { superClass=PythonLexerBase; }
 
-// Insert here @header for C++ lexer.
+// Insert here @header for lexer.
 
 tokens {
     ENCODING // https://docs.python.org/3.13/reference/lexical_analysis.html#encoding-declarations