From 72e0f66d1796d48cf0c299e479396561160b90db Mon Sep 17 00:00:00 2001 From: Alvin <524715@vistacollege.nl> Date: Tue, 22 Jul 2025 20:48:54 +0200 Subject: [PATCH] feat: Implement compound assignment, increment, and decrement operators --- examples/syntactic_sugar.fem | 18 +++ src/interpreter.py | 14 ++ src/lexer.py | 270 ++++++++++++++--------------------- src/parser.py | 55 +++++-- 4 files changed, 185 insertions(+), 172 deletions(-) create mode 100644 examples/syntactic_sugar.fem diff --git a/examples/syntactic_sugar.fem b/examples/syntactic_sugar.fem new file mode 100644 index 0000000..4f9c24e --- /dev/null +++ b/examples/syntactic_sugar.fem @@ -0,0 +1,18 @@ +count is 5 +count += 3 # Equivalent to: count is count + 3 +UwU Boy count # Prints 8 + +count -= 2 # Equivalent to: count is count - 2 +UwU Boy count # Prints 6 + +count *= 4 # Equivalent to: count is count * 4 +UwU Boy count # Prints 24 + +count /= 3 # Equivalent to: count is count / 3 +UwU Boy count # Prints 8.0 + +value is 10 +value++ # Equivalent to: value is value + 1 +UwU Boy value # Prints 11 +value-- # Equivalent to: value is value - 1 +UwU Boy value # Prints 10 \ No newline at end of file diff --git a/src/interpreter.py b/src/interpreter.py index c7f31bd..ed7288d 100644 --- a/src/interpreter.py +++ b/src/interpreter.py @@ -83,6 +83,20 @@ class Interpreter: value = self.visit(node.right) self.current_scope[var_name] = value + def visit_Increment(self, node): + var_name = node.var_name.value + current_value = self.visit(node.var_name) + if not isinstance(current_value, (int, float)): + raise TypeError(f"Cannot increment non-numeric type {type(current_value).__name__}") + self.current_scope[var_name] = current_value + 1 + + def visit_Decrement(self, node): + var_name = node.var_name.value + current_value = self.visit(node.var_name) + if not isinstance(current_value, (int, float)): + raise TypeError(f"Cannot decrement non-numeric type {type(current_value).__name__}") + self.current_scope[var_name] = current_value - 1 + def visit_Variable(self, node): var_name = node.value # Search up the scope stack for the variable diff --git a/src/lexer.py b/src/lexer.py index 49ee22c..5e5951f 100644 --- a/src/lexer.py +++ b/src/lexer.py @@ -12,182 +12,124 @@ class Lexer: def __init__(self, text): self.text = text self.pos = 0 + self.token_patterns = [ + (r'"[^"]*"', 'STRING'), # Double-quoted string literals + (r"'[^']*'", 'STRING'), # Single-quoted string literals + (r'\d+\.\d+', 'FLOAT'), # Floating-point numbers + (r'\d+', 'INTEGER'), # Integer numbers + + (r'\+\+', 'INCREMENT'), + (r'--', 'DECREMENT'), + (r'\+=', 'PLUS_ASSIGN'), + (r'-=', 'MINUS_ASSIGN'), + (r'\*=', 'MUL_ASSIGN'), + (r'/=', 'DIV_ASSIGN'), + (r'==', 'EQ'), + (r'!=', 'NEQ'), + (r'>=', 'GTE'), + (r'<=', 'LTE'), + + (r'\(', 'LPAREN'), + (r'\)', 'RPAREN'), + (r'\[', 'LBRACKET'), + (r'\]', 'RBRACKET'), + (r'{', 'LBRACE'), + (r'}', 'RBRACE'), + (r':', 'COLON'), + (r'\.', 'DOT'), + (r'\+', 'PLUS'), + (r'-', 'MINUS'), + (r'\*', 'MUL'), + (r'/', 'DIV'), + (r',', 'COMMA'), + (r'=', 'ASSIGN'), + (r'!', 'NOT'), + (r'>', 'GT'), + (r'<', 'LT'), + + (r'\bFemboy Feminine\b', 'FEMBOY_FEMININE'), + (r'\bUwU Boy\b', 'PRINT'), + (r'\bAndrogyny\b', 'ANDROGYNY'), + (r'\bOtokonoko\b', 'OTOKONOKO'), + (r'\bFemboy\b', 'FUNCTION_DEF'), + (r'\bFemme\b', 'RETURN'), + (r'\bFemboycore\b', 'FEMBOYCORE'), + (r'\bPeriodt\b', 'PERIODT'), + (r'\bKawaii\b', 'KAWAII'), + (r'\bCringe\b', 'CRINGE'), + (r'\bGhosted\b', 'NULL'), + (r'\bTomgirl\b', 'FOR'), + (r'\bSlay\b', 'PASS'), + (r'\bBreak\b', 'BREAK'), + (r'\bContinue\b', 'CONTINUE'), + (r'\bTwink\b', 'TRY'), + (r'\bBimboy\b', 'EXCEPT'), + (r'\band\b', 'AND'), + (r'\bor\b', 'OR'), + (r'\bnot\b', 'NOT'), + (r'\bis\b', 'ASSIGN'), # 'is' is now a keyword for assignment + (r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', 'ID'), # Identifiers + ] def error(self, message="Invalid character"): raise Exception(f"{message} at position {self.pos}: '{self.text[self.pos]}'") def get_next_token(self): - if self.pos > len(self.text) - 1: - return Token('EOF', None) + while self.pos < len(self.text): + # 1. Consume whitespace and comments + self.skip_whitespace_and_comments() - # Skip whitespace - while self.pos < len(self.text) and self.text[self.pos].isspace(): - self.pos += 1 + # If we've reached the end after skipping, return EOF + if self.pos >= len(self.text): + return Token('EOF', None) - if self.pos > len(self.text) - 1: - return Token('EOF', None) + longest_match = None + matched_type = None - current_char = self.text[self.pos] - - # Handle comments - if current_char == '#': - while self.pos < len(self.text) and self.text[self.pos] != '\n': - self.pos += 1 - return self.get_next_token() # Recursively call to get the next actual token - - if current_char == '"': - self.pos += 1 - string_start = self.pos - while self.pos < len(self.text) and self.text[self.pos] != '"': - self.pos += 1 - string_value = self.text[string_start:self.pos] - self.pos += 1 # Consume closing quote - return Token('STRING', string_value) - - if current_char.isdigit(): - start_pos = self.pos - while self.pos < len(self.text) and self.text[self.pos].isdigit(): - self.pos += 1 - return Token('INTEGER', int(self.text[start_pos:self.pos])) - - # Parentheses - if current_char == '(': - self.pos += 1 - return Token('LPAREN', '(') - if current_char == ')': - self.pos += 1 - return Token('RPAREN', ')') - if current_char == '[': - self.pos += 1 - return Token('LBRACKET', '[') - if current_char == ']': - self.pos += 1 - return Token('RBRACKET', ']') - if current_char == '{': - self.pos += 1 - return Token('LBRACE', '{') - if current_char == '}': - self.pos += 1 - return Token('RBRACE', '}') - if current_char == ':': - self.pos += 1 - return Token('COLON', ':') - if current_char == '.': - self.pos += 1 - return Token('DOT', '.') - - # Operators - if current_char == '+': - self.pos += 1 - return Token('PLUS', '+') - if current_char == '-': - self.pos += 1 - return Token('MINUS', '-') - if current_char == '*': - self.pos += 1 - return Token('MUL', '*') - if current_char == '/': - self.pos += 1 - return Token('DIV', '/') - if current_char == ',': - self.pos += 1 - return Token('COMMA', ',') - if current_char == '=': - if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == '=': - self.pos += 2 - return Token('EQ', '==') - if current_char == '!': - if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == '=': - self.pos += 2 - return Token('NEQ', '!=') - if current_char == '>': - if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == '=': - self.pos += 2 - return Token('GTE', '>=') + # 2. Match tokens + for pattern, token_type in self.token_patterns: + match = re.match(pattern, self.text[self.pos:], re.IGNORECASE if token_type == 'ID' else 0) + if match: + if longest_match is None or len(match.group(0)) > len(longest_match.group(0)): + longest_match = match + matched_type = token_type + + if longest_match: + value = longest_match.group(0) + self.pos += len(value) + if matched_type == 'INTEGER': + return Token(matched_type, int(value)) + elif matched_type == 'FLOAT': + return Token(matched_type, float(value)) + elif matched_type == 'KAWAII': + return Token(matched_type, True) + elif matched_type == 'CRINGE': + return Token(matched_type, False) + elif matched_type == 'NULL': + return Token(matched_type, None) + else: + return Token(matched_type, value) else: - self.pos += 1 - return Token('GT', '>') - if current_char == '<': - if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == '=': - self.pos += 2 - return Token('LTE', '<=') - else: - self.pos += 1 - return Token('LT', '<') + self.error() - # Match keywords (longer ones first) - if re.match(r'\bFemboy Feminine\b', self.text[self.pos:]): - self.pos += len('Femboy Feminine') - return Token('FEMBOY_FEMININE', 'Femboy Feminine') - if re.match(r'\bUwU Boy\b', self.text[self.pos:]): - self.pos += 7 - return Token('PRINT', 'UwU Boy') - if re.match(r'\bAndrogyny\b', self.text[self.pos:]): - self.pos += len('Androgyny') - return Token('ANDROGYNY', 'Androgyny') - if re.match(r'\bOtokonoko\b', self.text[self.pos:]): - self.pos += len('Otokonoko') - return Token('OTOKONOKO', 'Otokonoko') - if re.match(r'\bFemboy\b', self.text[self.pos:]): - self.pos += len('Femboy') - return Token('FUNCTION_DEF', 'Femboy') - if re.match(r'\bFemme\b', self.text[self.pos:]): - self.pos += len('Femme') - return Token('RETURN', 'Femme') - if re.match(r'\bis\b', self.text[self.pos:]): - self.pos += 2 - return Token('ASSIGN', 'is') - if re.match(r'\bFemboycore\b', self.text[self.pos:]): - self.pos += len('Femboycore') - return Token('FEMBOYCORE', 'Femboycore') - if re.match(r'\bPeriodt\b', self.text[self.pos:]): - self.pos += len('Periodt') - return Token('PERIODT', 'Periodt') - if re.match(r'\bKawaii\b', self.text[self.pos:]): - self.pos += len('Kawaii') - return Token('KAWAII', True) - if re.match(r'\bCringe\b', self.text[self.pos:]): - self.pos += len('Cringe') - return Token('CRINGE', False) - if re.match(r'\bGhosted\b', self.text[self.pos:]): - self.pos += len('Ghosted') - return Token('NULL', None) - if re.match(r'\bTomgirl\b', self.text[self.pos:]): - self.pos += len('Tomgirl') - return Token('FOR', 'Tomgirl') - if re.match(r'\bSlay\b', self.text[self.pos:]): - self.pos += len('Slay') - return Token('PASS', 'Slay') - if re.match(r'\bBreak\b', self.text[self.pos:]): - self.pos += len('Break') - return Token('BREAK', 'Break') - if re.match(r'\bContinue\b', self.text[self.pos:]): - self.pos += len('Continue') - return Token('CONTINUE', 'Continue') - if re.match(r'\bTwink\b', self.text[self.pos:]): - self.pos += len('Twink') - return Token('TRY', 'Twink') - if re.match(r'\bBimboy\b', self.text[self.pos:]): - self.pos += len('Bimboy') - return Token('EXCEPT', 'Bimboy') - if re.match(r'\band\b', self.text[self.pos:]): - self.pos += len('and') - return Token('AND', 'and') - if re.match(r'\bor\b', self.text[self.pos:]): - self.pos += len('or') - return Token('OR', 'or') - if re.match(r'\bnot\b', self.text[self.pos:]): - self.pos += len('not') - return Token('NOT', 'not') + return Token('EOF', None) - # Match identifiers - match = re.match(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', self.text[self.pos:]) - if match: - value = match.group(0) - self.pos += len(value) - return Token('ID', value) + def skip_whitespace_and_comments(self): + while self.pos < len(self.text): + # Try to match whitespace + whitespace_match = re.match(r'\s+', self.text[self.pos:]) + if whitespace_match: + self.pos += len(whitespace_match.group(0)) + continue - self.error() + # Try to match comments + comment_match = re.match(r'#.*(?:\n|$)', self.text[self.pos:]) + if comment_match: + self.pos += len(comment_match.group(0)) + continue + + # If neither whitespace nor comment, break the loop + break def tokenize(self): tokens = [] diff --git a/src/parser.py b/src/parser.py index c643c03..0cb4f4b 100644 --- a/src/parser.py +++ b/src/parser.py @@ -1,3 +1,5 @@ +from lexer import Token + class AST: pass @@ -86,6 +88,14 @@ class TryExceptStatement(AST): self.try_block = try_block self.except_block = except_block +class Increment(AST): + def __init__(self, var_name): + self.var_name = var_name + +class Decrement(AST): + def __init__(self, var_name): + self.var_name = var_name + class FunctionDefinition(AST): def __init__(self, name, parameters, body): self.name = name @@ -149,8 +159,18 @@ class Parser: if token.type == 'ID': # Check for assignment - if self.pos + 1 < len(self.tokens) and self.tokens[self.pos + 1].type == 'ASSIGN': + if self.pos + 1 < len(self.tokens) and self.tokens[self.pos + 1].type in ('ASSIGN', 'PLUS_ASSIGN', 'MINUS_ASSIGN', 'MUL_ASSIGN', 'DIV_ASSIGN'): return self.parse_assignment_statement() + # Check for increment/decrement as a statement + if self.pos + 1 < len(self.tokens) and self.tokens[self.pos + 1].type in ('INCREMENT', 'DECREMENT'): + var_token = self.get_current_token() + self.consume('ID') + op_token = self.get_current_token() + self.consume(op_token.type) + if op_token.type == 'INCREMENT': + return Increment(Variable(var_token)) + else: + return Decrement(Variable(var_token)) # Check for function call as a statement if self.pos + 1 < len(self.tokens) and self.tokens[self.pos + 1].type == 'LPAREN': # Consume the ID token first, then parse the function call @@ -200,11 +220,23 @@ class Parser: self.consume('ID') var_node = Variable(var_token) - self.consume('ASSIGN') + assign_op_token = self.get_current_token() + if assign_op_token.type in ('ASSIGN', 'PLUS_ASSIGN', 'MINUS_ASSIGN', 'MUL_ASSIGN', 'DIV_ASSIGN'): + self.consume(assign_op_token.type) + else: + raise Exception(f"Expected assignment operator, got {assign_op_token.type}") right_expr = self.expression() - assign_token = self.tokens[self.pos - 1] # Get the consumed ASSIGN token - return Assign(left=var_node, op=assign_token, right=right_expr) + + if assign_op_token.type == 'ASSIGN': + return Assign(left=var_node, op=assign_op_token, right=right_expr) + else: + # For compound assignments, create a BinOp as the right-hand side of the Assign + # The operation is derived from the compound assignment token type + op_type = assign_op_token.type.replace('_ASSIGN', '') # e.g., PLUS_ASSIGN -> PLUS + op_token = Token(op_type, assign_op_token.value[0]) # e.g., Token(PLUS, '+') + bin_op_node = BinOp(left=var_node, op=op_token, right=right_expr) + return Assign(left=var_node, op=Token('ASSIGN', 'is'), right=bin_op_node) def parse_if_statement(self): self.consume('FEMBOY_FEMININE') @@ -326,21 +358,28 @@ class Parser: return Null() elif token.type == 'ID': # Consume the ID token first + id_token = token self.consume('ID') # Now check what follows the ID next_token = self.get_current_token() if next_token.type == 'LPAREN': # It's a function call - return self.parse_function_call(token) + return self.parse_function_call(id_token) elif next_token.type == 'DOT': # It's a property access - return self.parse_property_access(Variable(token)) # Pass Variable node as target + return self.parse_property_access(Variable(id_token)) # Pass Variable node as target elif next_token.type == 'LBRACKET': # It's an index access - return self.parse_index_access(Variable(token)) + return self.parse_index_access(Variable(id_token)) + elif next_token.type == 'INCREMENT': + self.consume('INCREMENT') + return Increment(Variable(id_token)) + elif next_token.type == 'DECREMENT': + self.consume('DECREMENT') + return Decrement(Variable(id_token)) else: # It's a simple variable - return Variable(token) + return Variable(id_token) elif token.type == 'LPAREN': # Handle parenthesized expressions self.consume('LPAREN') node = self.expression()