import re from typing import List, Tuple Token = Tuple[str, str] RESERVED = { "int", "float", "boolean", "ACTIVATE_SENSOR", "ACTIVATE_ALARM", "BREAK", "CALIBRATE", "CHARGE_BATTERY", "CHECK_BATTERY", "CLOSE_DOOR", "CONTINUE", "COPY_FILE", "DEACTIVATE_ALARM", "DEACTIVATE_SENSOR", "DECREASE_SPEED", "DELETE_FILE", "DOWNLOAD", "ELSE", "FALSE", "FOR", "IF", "INCREASE_SPEED", "INIT", "LOCK", "LOG", "LOW_BATTERY", "MOVE_BACKWARD", "MOVE_FORWARD", "MOVE_TO", "NULL", "OPEN_DOOR", "PAUSE", "PRINT", "READ_SENSOR", "REBOOT", "RENAME_FILE", "RESET", "RESUME", "REVERSE", "ROTATE", "SAVE_FILE", "SCAN", "SET_SPEED", "SHUTDOWN", "SHUT_OFF", "START", "STOP", "STOP_IMMEDIATELY", "THEN", "TOGGLE_LIGHT", "TRUE", "TURN_DOWN", "TURN_LEFT", "TURN_RIGHT", "TURN_UP", "UNLOCK", "UPLOAD", "UPLOAD_FILE", "WAIT", "WHILE", "SET" } TOKEN_SPEC = [ ("COMMENT", r"//[^\n]*"), ("STRING", r'"[^"\n]*"'), ("FLOAT", r'\d+\.\d+'), ("INT", r'\d+'), ("ASSIGN", r'='), ("PLUS", r'\+'), ("MINUS", r'-'), ("MULTIPLY", r'\*'), ("DIVIDE", r'/'), ("EQUAL", r'=='), ("NOT_EQUAL", r'!='), ("GREATER", r'>'), ("LESS", r'<'), ("OPEN_PAREN", r'\('), ("CLOSE_PAREN", r'\)'), ("OPEN_BRACE", r'\{'), ("CLOSE_BRACE", r'\}'), ("SEMICOLON", r';'), ("COLON", r':'), ("COMMA", r','), ("NEWLINE", r'\n'), ("SKIP", r'[ \t\r]+'), ("IDENTIFIER", r'[a-zA-Z_][a-zA-Z0-9_]*'), ("MISMATCH", r'.') ] TOK_REGEX = '|'.join(f'(?P<{name}>{pattern})' for name, pattern in TOKEN_SPEC) token_re = re.compile(TOK_REGEX) def lexer(code: str) -> List[Token]: tokens = [] for match in token_re.finditer(code): kind = match.lastgroup value = match.group() if kind in ("NEWLINE", "SKIP", "COMMENT"): continue elif kind == "IDENTIFIER": if value in RESERVED: tokens.append((value, value)) else: tokens.append(("IDENTIFIER", value)) elif kind == "MISMATCH": raise RuntimeError(f"Token no reconocido: {value}") else: tokens.append((kind, value)) return tokens