diff --git a/examples/variables.fem b/examples/variables.fem new file mode 100644 index 0000000..00606cd --- /dev/null +++ b/examples/variables.fem @@ -0,0 +1,2 @@ +my_variable is "Hello, again!" +UwU Boy my_variable \ No newline at end of file diff --git a/src/lexer.py b/src/lexer.py index e6aa2a5..645c520 100644 --- a/src/lexer.py +++ b/src/lexer.py @@ -20,11 +20,14 @@ class Lexer: if self.pos > len(self.text) - 1: return Token('EOF', None) - current_char = self.text[self.pos] - - if current_char.isspace(): + # Skip whitespace + while self.pos < len(self.text) and self.text[self.pos].isspace(): self.pos += 1 - return self.get_next_token() + + if self.pos > len(self.text) - 1: + return Token('EOF', None) + + current_char = self.text[self.pos] if current_char == '"': self.pos += 1 @@ -35,9 +38,20 @@ class Lexer: self.pos = string_end + 1 return Token('STRING', string) + # Match keywords if re.match(r'\bUwU Boy\b', self.text[self.pos:]): self.pos += 7 return Token('PRINT', 'UwU Boy') + if re.match(r'\bis\b', self.text[self.pos:]): + self.pos += 2 + return Token('ASSIGN', 'is') + + # Match identifiers + match = re.match(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', self.text[self.pos:]) + if match: + value = match.group(0) + self.pos += len(value) + return Token('ID', value) self.error() diff --git a/src/main.py b/src/main.py index 404c4ed..e205f27 100644 --- a/src/main.py +++ b/src/main.py @@ -3,7 +3,7 @@ from parser import Parser from interpreter import Interpreter def main(): - with open('../examples/hello_world.fem', 'r') as f: + with open('../examples/variables.fem', 'r') as f: text = f.read() lexer = Lexer(text) diff --git a/src/parser.py b/src/parser.py index 8c8a731..b51df69 100644 --- a/src/parser.py +++ b/src/parser.py @@ -5,6 +5,17 @@ class Print(AST): def __init__(self, value): self.value = value +class Assign(AST): + def __init__(self, left, op, right): + self.left = left + self.op = op + self.right = right + +class Variable(AST): + def __init__(self, token): + self.token = token + self.value = token.value + class Parser: def __init__(self, tokens): self.tokens = tokens @@ -15,22 +26,50 @@ class Parser: token = self.tokens[self.pos] self.pos += 1 return token - return None + return Token('EOF', None) + + def peek_next_token(self): + if self.pos < len(self.tokens): + return self.tokens[self.pos] + return Token('EOF', None) def parse(self): statements = [] - while True: - token = self.get_next_token() - if token is None or token.type == 'EOF': - break - - if token.type == 'PRINT': - next_token = self.get_next_token() - if next_token.type == 'STRING': - statements.append(Print(next_token.value)) - else: - raise Exception("Expected a string after 'UwU Boy'") - else: - raise Exception(f"Unexpected token: {token.type}") - + while self.peek_next_token().type != 'EOF': + statements.append(self.parse_statement()) return statements + + def parse_statement(self): + token = self.peek_next_token() + + if token.type == 'PRINT': + return self.parse_print_statement() + + if token.type == 'ID' and self.pos + 1 < len(self.tokens) and self.tokens[self.pos + 1].type == 'ASSIGN': + return self.parse_assignment_statement() + elif token.type == 'ID': + raise Exception(f"Unexpected identifier '{token.value}' without assignment.") + + raise Exception(f"Invalid statement starting with token {token.type}") + + def parse_print_statement(self): + self.get_next_token() # Consume PRINT token + token = self.get_next_token() + if token.type == 'STRING': + return Print(token.value) + elif token.type == 'ID': + return Print(Variable(token)) + else: + raise Exception("Expected a string or variable after 'UwU Boy'") + + def parse_assignment_statement(self): + var_token = self.get_next_token() + var_node = Variable(var_token) + + assign_token = self.get_next_token() + + value_token = self.get_next_token() + if value_token.type == 'STRING': + return Assign(left=var_node, op=assign_token, right=value_token.value) + else: + raise Exception("Expected a string value for assignment")