File size: 2,291 Bytes
d02548d
 
 
 
 
 
cd817f0
cb531a0
d02548d
 
 
 
cb531a0
 
 
d02548d
 
 
3f612f7
cb531a0
d02548d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
092f72b
d02548d
 
 
 
 
 
 
 
 
 
e854dcc
d02548d
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import re
from typing import List, Tuple

Token = Tuple[str, str]

RESERVED = {
    "int", "float", "boolean",
    "ACTIVATE_SENSOR", "ACTIVATE_ALARM", "BREAK", "CALIBRATE", "CHARGE_BATTERY", "CHECK_BATTERY",
    "CLOSE_DOOR", "CONTINUE", "COPY_FILE", "DEACTIVATE_ALARM", "DEACTIVATE_SENSOR",
    "DECREASE_SPEED", "DELETE_FILE", "DOWNLOAD", "ELSE", "FALSE", "FOR", "IF", "INCREASE_SPEED",
    "INIT", "LOCK", "LOG", "LOW_BATTERY", "MOVE_BACKWARD", "MOVE_FORWARD", "MOVE_TO", "NULL",
    "OPEN_DOOR", "PAUSE", "PRINT", "READ_SENSOR", "REBOOT", "RENAME_FILE", "RESET", "RESUME",
    "REVERSE", "ROTATE", "SAVE_FILE", "SCAN", "SET_SPEED", "SHUTDOWN", "SHUT_OFF", "START",
    "STOP", "STOP_IMMEDIATELY", "THEN", "TOGGLE_LIGHT", "TRUE", "TURN_DOWN", "TURN_LEFT",
    "TURN_RIGHT", "TURN_UP", "UNLOCK", "UPLOAD", "UPLOAD_FILE", "WAIT", "WHILE", "SET"
}

TOKEN_SPEC = [
    ("COMMENT", r"//[^\n]*"),
    ("STRING",       r'"[^"\n]*"'),
    ("FLOAT",        r'\d+\.\d+'),
    ("INT",          r'\d+'),
    ("ASSIGN",       r'='),
    ("PLUS",         r'\+'),
    ("MINUS",        r'-'),
    ("MULTIPLY",     r'\*'),
    ("DIVIDE",       r'/'),
    ("EQUAL",        r'=='),
    ("NOT_EQUAL",    r'!='),
    ("GREATER",      r'>'),
    ("LESS",         r'<'),
    ("OPEN_PAREN",   r'\('),
    ("CLOSE_PAREN",  r'\)'),
    ("OPEN_BRACE",   r'\{'),
    ("CLOSE_BRACE",  r'\}'),
    ("SEMICOLON",    r';'),
    ("COLON",        r':'),
    ("COMMA",        r','),
    ("NEWLINE",      r'\n'),
    ("SKIP",         r'[ \t\r]+'),
    ("IDENTIFIER",   r'[a-zA-Z_][a-zA-Z0-9_]*'),
    ("MISMATCH",     r'.')
]

TOK_REGEX = '|'.join(f'(?P<{name}>{pattern})' for name, pattern in TOKEN_SPEC)
token_re = re.compile(TOK_REGEX)

def lexer(code: str) -> List[Token]:
    tokens = []
    for match in token_re.finditer(code):
        kind = match.lastgroup
        value = match.group()
        if kind in ("NEWLINE", "SKIP", "COMMENT"):
            continue
        elif kind == "IDENTIFIER":
            if value in RESERVED:
                tokens.append((value, value))
            else:
                tokens.append(("IDENTIFIER", value))
        elif kind == "MISMATCH":
            raise RuntimeError(f"Token no reconocido: {value}")
        else:
            tokens.append((kind, value))
    return tokens