feat: Implement compound assignment, increment, and decrement operators

This commit is contained in:
Alvin
2025-07-22 20:48:54 +02:00
parent 03f6ae691b
commit 72e0f66d17
4 changed files with 185 additions and 172 deletions

View File

@@ -0,0 +1,18 @@
count is 5
count += 3 # Equivalent to: count is count + 3
UwU Boy count # Prints 8
count -= 2 # Equivalent to: count is count - 2
UwU Boy count # Prints 6
count *= 4 # Equivalent to: count is count * 4
UwU Boy count # Prints 24
count /= 3 # Equivalent to: count is count / 3
UwU Boy count # Prints 8.0
value is 10
value++ # Equivalent to: value is value + 1
UwU Boy value # Prints 11
value-- # Equivalent to: value is value - 1
UwU Boy value # Prints 10

View File

@@ -83,6 +83,20 @@ class Interpreter:
value = self.visit(node.right) value = self.visit(node.right)
self.current_scope[var_name] = value self.current_scope[var_name] = value
def visit_Increment(self, node):
var_name = node.var_name.value
current_value = self.visit(node.var_name)
if not isinstance(current_value, (int, float)):
raise TypeError(f"Cannot increment non-numeric type {type(current_value).__name__}")
self.current_scope[var_name] = current_value + 1
def visit_Decrement(self, node):
var_name = node.var_name.value
current_value = self.visit(node.var_name)
if not isinstance(current_value, (int, float)):
raise TypeError(f"Cannot decrement non-numeric type {type(current_value).__name__}")
self.current_scope[var_name] = current_value - 1
def visit_Variable(self, node): def visit_Variable(self, node):
var_name = node.value var_name = node.value
# Search up the scope stack for the variable # Search up the scope stack for the variable

View File

@@ -12,182 +12,124 @@ class Lexer:
def __init__(self, text): def __init__(self, text):
self.text = text self.text = text
self.pos = 0 self.pos = 0
self.token_patterns = [
(r'"[^"]*"', 'STRING'), # Double-quoted string literals
(r"'[^']*'", 'STRING'), # Single-quoted string literals
(r'\d+\.\d+', 'FLOAT'), # Floating-point numbers
(r'\d+', 'INTEGER'), # Integer numbers
(r'\+\+', 'INCREMENT'),
(r'--', 'DECREMENT'),
(r'\+=', 'PLUS_ASSIGN'),
(r'-=', 'MINUS_ASSIGN'),
(r'\*=', 'MUL_ASSIGN'),
(r'/=', 'DIV_ASSIGN'),
(r'==', 'EQ'),
(r'!=', 'NEQ'),
(r'>=', 'GTE'),
(r'<=', 'LTE'),
(r'\(', 'LPAREN'),
(r'\)', 'RPAREN'),
(r'\[', 'LBRACKET'),
(r'\]', 'RBRACKET'),
(r'{', 'LBRACE'),
(r'}', 'RBRACE'),
(r':', 'COLON'),
(r'\.', 'DOT'),
(r'\+', 'PLUS'),
(r'-', 'MINUS'),
(r'\*', 'MUL'),
(r'/', 'DIV'),
(r',', 'COMMA'),
(r'=', 'ASSIGN'),
(r'!', 'NOT'),
(r'>', 'GT'),
(r'<', 'LT'),
(r'\bFemboy Feminine\b', 'FEMBOY_FEMININE'),
(r'\bUwU Boy\b', 'PRINT'),
(r'\bAndrogyny\b', 'ANDROGYNY'),
(r'\bOtokonoko\b', 'OTOKONOKO'),
(r'\bFemboy\b', 'FUNCTION_DEF'),
(r'\bFemme\b', 'RETURN'),
(r'\bFemboycore\b', 'FEMBOYCORE'),
(r'\bPeriodt\b', 'PERIODT'),
(r'\bKawaii\b', 'KAWAII'),
(r'\bCringe\b', 'CRINGE'),
(r'\bGhosted\b', 'NULL'),
(r'\bTomgirl\b', 'FOR'),
(r'\bSlay\b', 'PASS'),
(r'\bBreak\b', 'BREAK'),
(r'\bContinue\b', 'CONTINUE'),
(r'\bTwink\b', 'TRY'),
(r'\bBimboy\b', 'EXCEPT'),
(r'\band\b', 'AND'),
(r'\bor\b', 'OR'),
(r'\bnot\b', 'NOT'),
(r'\bis\b', 'ASSIGN'), # 'is' is now a keyword for assignment
(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', 'ID'), # Identifiers
]
def error(self, message="Invalid character"): def error(self, message="Invalid character"):
raise Exception(f"{message} at position {self.pos}: '{self.text[self.pos]}'") raise Exception(f"{message} at position {self.pos}: '{self.text[self.pos]}'")
def get_next_token(self): def get_next_token(self):
if self.pos > len(self.text) - 1: while self.pos < len(self.text):
return Token('EOF', None) # 1. Consume whitespace and comments
self.skip_whitespace_and_comments()
# Skip whitespace # If we've reached the end after skipping, return EOF
while self.pos < len(self.text) and self.text[self.pos].isspace(): if self.pos >= len(self.text):
self.pos += 1 return Token('EOF', None)
if self.pos > len(self.text) - 1: longest_match = None
return Token('EOF', None) matched_type = None
current_char = self.text[self.pos] # 2. Match tokens
for pattern, token_type in self.token_patterns:
# Handle comments match = re.match(pattern, self.text[self.pos:], re.IGNORECASE if token_type == 'ID' else 0)
if current_char == '#': if match:
while self.pos < len(self.text) and self.text[self.pos] != '\n': if longest_match is None or len(match.group(0)) > len(longest_match.group(0)):
self.pos += 1 longest_match = match
return self.get_next_token() # Recursively call to get the next actual token matched_type = token_type
if current_char == '"': if longest_match:
self.pos += 1 value = longest_match.group(0)
string_start = self.pos self.pos += len(value)
while self.pos < len(self.text) and self.text[self.pos] != '"': if matched_type == 'INTEGER':
self.pos += 1 return Token(matched_type, int(value))
string_value = self.text[string_start:self.pos] elif matched_type == 'FLOAT':
self.pos += 1 # Consume closing quote return Token(matched_type, float(value))
return Token('STRING', string_value) elif matched_type == 'KAWAII':
return Token(matched_type, True)
if current_char.isdigit(): elif matched_type == 'CRINGE':
start_pos = self.pos return Token(matched_type, False)
while self.pos < len(self.text) and self.text[self.pos].isdigit(): elif matched_type == 'NULL':
self.pos += 1 return Token(matched_type, None)
return Token('INTEGER', int(self.text[start_pos:self.pos])) else:
return Token(matched_type, value)
# Parentheses
if current_char == '(':
self.pos += 1
return Token('LPAREN', '(')
if current_char == ')':
self.pos += 1
return Token('RPAREN', ')')
if current_char == '[':
self.pos += 1
return Token('LBRACKET', '[')
if current_char == ']':
self.pos += 1
return Token('RBRACKET', ']')
if current_char == '{':
self.pos += 1
return Token('LBRACE', '{')
if current_char == '}':
self.pos += 1
return Token('RBRACE', '}')
if current_char == ':':
self.pos += 1
return Token('COLON', ':')
if current_char == '.':
self.pos += 1
return Token('DOT', '.')
# Operators
if current_char == '+':
self.pos += 1
return Token('PLUS', '+')
if current_char == '-':
self.pos += 1
return Token('MINUS', '-')
if current_char == '*':
self.pos += 1
return Token('MUL', '*')
if current_char == '/':
self.pos += 1
return Token('DIV', '/')
if current_char == ',':
self.pos += 1
return Token('COMMA', ',')
if current_char == '=':
if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == '=':
self.pos += 2
return Token('EQ', '==')
if current_char == '!':
if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == '=':
self.pos += 2
return Token('NEQ', '!=')
if current_char == '>':
if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == '=':
self.pos += 2
return Token('GTE', '>=')
else: else:
self.pos += 1 self.error()
return Token('GT', '>')
if current_char == '<':
if self.pos + 1 < len(self.text) and self.text[self.pos + 1] == '=':
self.pos += 2
return Token('LTE', '<=')
else:
self.pos += 1
return Token('LT', '<')
# Match keywords (longer ones first) return Token('EOF', None)
if re.match(r'\bFemboy Feminine\b', self.text[self.pos:]):
self.pos += len('Femboy Feminine')
return Token('FEMBOY_FEMININE', 'Femboy Feminine')
if re.match(r'\bUwU Boy\b', self.text[self.pos:]):
self.pos += 7
return Token('PRINT', 'UwU Boy')
if re.match(r'\bAndrogyny\b', self.text[self.pos:]):
self.pos += len('Androgyny')
return Token('ANDROGYNY', 'Androgyny')
if re.match(r'\bOtokonoko\b', self.text[self.pos:]):
self.pos += len('Otokonoko')
return Token('OTOKONOKO', 'Otokonoko')
if re.match(r'\bFemboy\b', self.text[self.pos:]):
self.pos += len('Femboy')
return Token('FUNCTION_DEF', 'Femboy')
if re.match(r'\bFemme\b', self.text[self.pos:]):
self.pos += len('Femme')
return Token('RETURN', 'Femme')
if re.match(r'\bis\b', self.text[self.pos:]):
self.pos += 2
return Token('ASSIGN', 'is')
if re.match(r'\bFemboycore\b', self.text[self.pos:]):
self.pos += len('Femboycore')
return Token('FEMBOYCORE', 'Femboycore')
if re.match(r'\bPeriodt\b', self.text[self.pos:]):
self.pos += len('Periodt')
return Token('PERIODT', 'Periodt')
if re.match(r'\bKawaii\b', self.text[self.pos:]):
self.pos += len('Kawaii')
return Token('KAWAII', True)
if re.match(r'\bCringe\b', self.text[self.pos:]):
self.pos += len('Cringe')
return Token('CRINGE', False)
if re.match(r'\bGhosted\b', self.text[self.pos:]):
self.pos += len('Ghosted')
return Token('NULL', None)
if re.match(r'\bTomgirl\b', self.text[self.pos:]):
self.pos += len('Tomgirl')
return Token('FOR', 'Tomgirl')
if re.match(r'\bSlay\b', self.text[self.pos:]):
self.pos += len('Slay')
return Token('PASS', 'Slay')
if re.match(r'\bBreak\b', self.text[self.pos:]):
self.pos += len('Break')
return Token('BREAK', 'Break')
if re.match(r'\bContinue\b', self.text[self.pos:]):
self.pos += len('Continue')
return Token('CONTINUE', 'Continue')
if re.match(r'\bTwink\b', self.text[self.pos:]):
self.pos += len('Twink')
return Token('TRY', 'Twink')
if re.match(r'\bBimboy\b', self.text[self.pos:]):
self.pos += len('Bimboy')
return Token('EXCEPT', 'Bimboy')
if re.match(r'\band\b', self.text[self.pos:]):
self.pos += len('and')
return Token('AND', 'and')
if re.match(r'\bor\b', self.text[self.pos:]):
self.pos += len('or')
return Token('OR', 'or')
if re.match(r'\bnot\b', self.text[self.pos:]):
self.pos += len('not')
return Token('NOT', 'not')
# Match identifiers def skip_whitespace_and_comments(self):
match = re.match(r'\b[a-zA-Z_][a-zA-Z0-9_]*\b', self.text[self.pos:]) while self.pos < len(self.text):
if match: # Try to match whitespace
value = match.group(0) whitespace_match = re.match(r'\s+', self.text[self.pos:])
self.pos += len(value) if whitespace_match:
return Token('ID', value) self.pos += len(whitespace_match.group(0))
continue
self.error() # Try to match comments
comment_match = re.match(r'#.*(?:\n|$)', self.text[self.pos:])
if comment_match:
self.pos += len(comment_match.group(0))
continue
# If neither whitespace nor comment, break the loop
break
def tokenize(self): def tokenize(self):
tokens = [] tokens = []

View File

@@ -1,3 +1,5 @@
from lexer import Token
class AST: class AST:
pass pass
@@ -86,6 +88,14 @@ class TryExceptStatement(AST):
self.try_block = try_block self.try_block = try_block
self.except_block = except_block self.except_block = except_block
class Increment(AST):
def __init__(self, var_name):
self.var_name = var_name
class Decrement(AST):
def __init__(self, var_name):
self.var_name = var_name
class FunctionDefinition(AST): class FunctionDefinition(AST):
def __init__(self, name, parameters, body): def __init__(self, name, parameters, body):
self.name = name self.name = name
@@ -149,8 +159,18 @@ class Parser:
if token.type == 'ID': if token.type == 'ID':
# Check for assignment # Check for assignment
if self.pos + 1 < len(self.tokens) and self.tokens[self.pos + 1].type == 'ASSIGN': if self.pos + 1 < len(self.tokens) and self.tokens[self.pos + 1].type in ('ASSIGN', 'PLUS_ASSIGN', 'MINUS_ASSIGN', 'MUL_ASSIGN', 'DIV_ASSIGN'):
return self.parse_assignment_statement() return self.parse_assignment_statement()
# Check for increment/decrement as a statement
if self.pos + 1 < len(self.tokens) and self.tokens[self.pos + 1].type in ('INCREMENT', 'DECREMENT'):
var_token = self.get_current_token()
self.consume('ID')
op_token = self.get_current_token()
self.consume(op_token.type)
if op_token.type == 'INCREMENT':
return Increment(Variable(var_token))
else:
return Decrement(Variable(var_token))
# Check for function call as a statement # Check for function call as a statement
if self.pos + 1 < len(self.tokens) and self.tokens[self.pos + 1].type == 'LPAREN': if self.pos + 1 < len(self.tokens) and self.tokens[self.pos + 1].type == 'LPAREN':
# Consume the ID token first, then parse the function call # Consume the ID token first, then parse the function call
@@ -200,11 +220,23 @@ class Parser:
self.consume('ID') self.consume('ID')
var_node = Variable(var_token) var_node = Variable(var_token)
self.consume('ASSIGN') assign_op_token = self.get_current_token()
if assign_op_token.type in ('ASSIGN', 'PLUS_ASSIGN', 'MINUS_ASSIGN', 'MUL_ASSIGN', 'DIV_ASSIGN'):
self.consume(assign_op_token.type)
else:
raise Exception(f"Expected assignment operator, got {assign_op_token.type}")
right_expr = self.expression() right_expr = self.expression()
assign_token = self.tokens[self.pos - 1] # Get the consumed ASSIGN token
return Assign(left=var_node, op=assign_token, right=right_expr) if assign_op_token.type == 'ASSIGN':
return Assign(left=var_node, op=assign_op_token, right=right_expr)
else:
# For compound assignments, create a BinOp as the right-hand side of the Assign
# The operation is derived from the compound assignment token type
op_type = assign_op_token.type.replace('_ASSIGN', '') # e.g., PLUS_ASSIGN -> PLUS
op_token = Token(op_type, assign_op_token.value[0]) # e.g., Token(PLUS, '+')
bin_op_node = BinOp(left=var_node, op=op_token, right=right_expr)
return Assign(left=var_node, op=Token('ASSIGN', 'is'), right=bin_op_node)
def parse_if_statement(self): def parse_if_statement(self):
self.consume('FEMBOY_FEMININE') self.consume('FEMBOY_FEMININE')
@@ -326,21 +358,28 @@ class Parser:
return Null() return Null()
elif token.type == 'ID': elif token.type == 'ID':
# Consume the ID token first # Consume the ID token first
id_token = token
self.consume('ID') self.consume('ID')
# Now check what follows the ID # Now check what follows the ID
next_token = self.get_current_token() next_token = self.get_current_token()
if next_token.type == 'LPAREN': if next_token.type == 'LPAREN':
# It's a function call # It's a function call
return self.parse_function_call(token) return self.parse_function_call(id_token)
elif next_token.type == 'DOT': elif next_token.type == 'DOT':
# It's a property access # It's a property access
return self.parse_property_access(Variable(token)) # Pass Variable node as target return self.parse_property_access(Variable(id_token)) # Pass Variable node as target
elif next_token.type == 'LBRACKET': elif next_token.type == 'LBRACKET':
# It's an index access # It's an index access
return self.parse_index_access(Variable(token)) return self.parse_index_access(Variable(id_token))
elif next_token.type == 'INCREMENT':
self.consume('INCREMENT')
return Increment(Variable(id_token))
elif next_token.type == 'DECREMENT':
self.consume('DECREMENT')
return Decrement(Variable(id_token))
else: else:
# It's a simple variable # It's a simple variable
return Variable(token) return Variable(id_token)
elif token.type == 'LPAREN': # Handle parenthesized expressions elif token.type == 'LPAREN': # Handle parenthesized expressions
self.consume('LPAREN') self.consume('LPAREN')
node = self.expression() node = self.expression()