Skip to content
Snippets Groups Projects
Select Git revision
  • 76d4d6bcbfdc89b34ecd231c2f14f20c16b11595
  • master default protected
  • gitkeep
  • dev protected
  • Issue/2583-treeBug
  • Hotfix/2504-formGen
  • Issue/2309-docs
  • Issue/2462-removeTraces
  • Hotfix/2459-EncodingPath
  • Hotfix/2452-linkedDeletion
  • Issue/1792-newMetadataStructure
  • Hotfix/2384-guestsAndLinked
  • v2.8.14-Hotfix2365
  • Hotfix/2365-targetClassWorks
  • Hotfix/2371-fixGitLabinRCV
  • Fix/xxxx-activateGitlab
  • Test/xxxx-enablingGitLab
  • Issue/2349-gitlabHttps
  • Issue/2287-guestRole
  • Hotfix/2296-selectedValuesNotReturned
  • Issue/2102-gitLabResTypeRCV
  • v2.11.5
  • v2.11.4
  • v2.11.3
  • v2.11.2
  • v2.11.1
  • v2.11.0
  • v2.10.3
  • v2.10.2
  • v2.10.1
  • v2.10.0
  • v2.9.4
  • v2.9.3
  • v2.9.2
  • v2.9.1
  • v2.9.0
  • v2.8.16
  • v2.8.15
  • v2.8.14
  • v2.8.13
  • v2.8.12
41 results

Tree.sln

Blame
  • Code owners
    Assign users and groups as approvers for specific file changes. Learn more.
    lexer.cpp 7.56 KiB
    /**********************************************************************************
     * Copyright (c) 2023 Process Systems Engineering (AVT.SVT), RWTH Aachen University
     *
     * This program and the accompanying materials are made available under the
     * terms of the Eclipse Public License 2.0 which is available at
     * http://www.eclipse.org/legal/epl-2.0.
     *
     * SPDX-License-Identifier: EPL-2.0
     *
     **********************************************************************************/
    
    #include "lexer.hpp"
    
    #include <algorithm> // for find
    #include <ctype.h>   // for isdigit, isalpha
    #include <map>       // for map
    #include <regex>     // for regex_search, match_results<>::_Unchecked, smatch, _NFA, regex
    #include <sstream>   // for basic_stringbuf<>::int_type, basic_stringbuf<>::pos_type, basic_stringbuf<>::__streambuf_type
    #include <stdio.h>   // for EOF
    
    namespace ale {
    
    lexer::lexer(std::istream &input) :
        input(input), line(1), col(1) { }
    
    void lexer::reserve_keywords(std::initializer_list<std::string> keys) {
        keywords.insert(keywords.end(), keys);
    }
    
    void lexer::forbid_expressions(std::vector<std::string> expressions) {
        forbidden_expressions.insert(forbidden_expressions.end(), expressions.begin(),
          expressions.end());
    }
    
    void lexer::forbid_keywords(std::vector<std::string> keys) {
        forbidden_keywords.insert(forbidden_keywords.end(), keys.begin(), keys.end());
    }
    
    char lexer::peek(unsigned n) {
        auto pos = input.tellg();
        input.seekg(n - 1, std::ios_base::cur);
        int next = input.peek();
        input.seekg(pos);
        if(next == EOF) {
            return 0;
        }
        return static_cast<char>(next);
    }
    
    void lexer::skip() {
        input.get();
        col++;
    }
    
    void lexer::consume() { lexeme += static_cast<char>(input.get()); }
    
    bool lexer::check(char expect, unsigned n) { return peek(n) == expect; }
    
    bool lexer::match(char expect) {
        if(check(expect)) {
            consume();
            return true;
        }
        return false;
    }
    
    token lexer::make_token(token::token_type type) {
        token tok(type, lexeme, line, col);
        col += lexeme.length();
        lexeme = "";
        return tok;
    }
    
    token lexer::next_token() {
        while(char next = peek()) {
            switch(next) {
                case ' ':
                case '\r':
                case '\t':
                case '\n':
                    skip_space();
                    continue;
                case '#':
                    skip();
                    skip_comment();
                    continue;
                case '\"':
                    return match_literal();
                    continue;
                case '+':
                    consume();
                    return make_token(token::PLUS);
                case '-':
                    consume();
                    return make_token(token::MINUS);
                case '*':
                    consume();
                    return make_token(token::STAR);
                case '/':
                    consume();
                    return make_token(token::SLASH);
                case '^':
                    consume();
                    return make_token(token::HAT);
                case '|':
                    consume();
                    return make_token(token::PIPE);
                case '&':
                    consume();
                    return make_token(token::AND);
                case '!':
                    consume();
                    return make_token(token::BANG);
                case '=':
                    consume();
                    return make_token(token::EQUAL);
                case '(':
                    consume();
                    return make_token(token::LPAREN);
                case ')':
                    consume();
                    return make_token(token::RPAREN);
                case '[':
                    consume();
                    return make_token(token::LBRACK);
                case ']':
                    consume();
                    return make_token(token::RBRACK);
                case '{':
                    consume();
                    return make_token(token::LBRACE);
                case '}':
                    consume();
                    return make_token(token::RBRACE);
                case ',':
                    consume();
                    return make_token(token::COMMA);
                case ';':
                    consume();
                    return make_token(token::SEMICOL);
                case '.':
                    consume();
                    return make_token(match('.') ? token::DOTS : token::DOT);
                case ':':
                    consume();
                    return make_token(match('=') ? token::DEFINE : token::COLON);
                case '<':
                    consume();
                    if(match('=')) {
                        return make_token(token::LEQUAL);
                    }
                    if(match('-')) {
                        return make_token(token::ASSIGN);
                    }
                    return make_token(token::LESS);
                case '>':
                    consume();
                    return make_token(match('=') ? token::GEQUAL : token::GREATER);
                default:
                    if(!((int)next > 0 && (int)next < 255)) {
                        // isdigit throws meaningless error under windows in debug if special
                        // character is detected.
                        // -> Manually create error token for user-friendly error reporting
                        consume();
                        return make_token(token::ERROR);
                    } else if(isdigit(next)) {
                        return match_number();
                    } else if(isalpha(next)) {
                        return match_ident();
                    }
                    consume();
                    return make_token(token::ERROR);
            }
        }
        return make_token(token::END);
    }
    
    void lexer::skip_space() {
        while(check(' ') || check('\r') || check('\t') || check('\n')) {
            if(check('\n')) {
                line++;
                col = 0;
            }
            skip();
        }
    }
    
    void lexer::skip_comment() {
        while(peek() != 0 && !check('\n')) {
            skip();
        }
    }
    
    token lexer::match_number() {
        while(isdigit(peek())) {
            consume();
        }
        if(check('.') && check('.', 2)) {
            return make_token(token::INTEGER);
        }
        if(!check('.') && !check('e') && !check('E')) {
            return make_token(token::INTEGER);
        }
        if(match('.')) {
            while(isdigit(peek())) {
                consume();
            }
        }
        if(match('e') || match('E')) {
            match('-') || match('+');
            if(isdigit(peek())) {
                consume();
            } else {
                return make_token(token::ERROR);
            }
            while(isdigit(peek())) {
                consume();
            }
        }
        return make_token(token::NUMBER);
    }
    
    token lexer::match_ident() {
        if(isalpha(peek())) {
            consume();
        } else {
            return make_token(token::ERROR);
        }
        while(isalpha(peek()) || isdigit(peek()) || peek() == '_') {
            consume();
        }
        // TODO: accept any capitalization
    
        for(auto it = forbidden_expressions.begin();
            it != forbidden_expressions.end(); ++it) {
            std::smatch match;
            std::regex_search(lexeme, match, std::regex(*it));
            if(!match.empty()) {
                return make_token(token::FORBIDDEN_EXPRESSION);
            }
        }
    
        if(std::find(forbidden_keywords.begin(), forbidden_keywords.end(), lexeme) != forbidden_keywords.end()) {
            return make_token(token::FORBIDDEN_KEYWORD);
        }
    
        if(std::find(keywords.begin(), keywords.end(), lexeme) != keywords.end()) {
            return make_token(token::KEYWORD);
        }
        return make_token(token::IDENT);
    }
    
    token lexer::match_literal() {
        skip();
        while((peek() != '\"') && (peek() != '\0')) {
            consume();
        }
        if(check('\"')) {
            auto tok = make_token(token::LITERAL);
            skip();
            return tok;
        }
        return make_token(token::ERROR);
    }
    
    } // namespace ale