Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implement Parsing, resolves #6 #70

Merged
merged 14 commits into from
Jul 1, 2024
Merged
6 changes: 6 additions & 0 deletions include/ast.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#ifndef REGEX_AST_H

Check notice on line 1 in include/ast.h

View workflow job for this annotation

GitHub Actions / linter

Run clang-format on include/ast.h

File include/ast.h does not conform to Custom style guidelines. (lines 2, 8, 9, 10, 11, 12, 13, 15, 28, 29, 30, 31, 33, 34, 35, 36, 38, 39, 40, 41, 42, 43, 45, 46, 48, 53, 59, 70, 78, 85)
#define REGEX_AST_H


Expand Down Expand Up @@ -77,4 +77,10 @@
*/
void ast_node_free(ASTNode* node);

/**
* @param node The node to convert to string
*
* @return A string description of the given AST Node
*/
char* str_ast_node(ASTNode* node);
#endif // REGEX_AST
14 changes: 14 additions & 0 deletions src/ast.c
Original file line number Diff line number Diff line change
@@ -1,7 +1,17 @@
#include <stdlib.h>

Check notice on line 1 in src/ast.c

View workflow job for this annotation

GitHub Actions / linter

Run clang-format on src/ast.c

File src/ast.c does not conform to Custom style guidelines. (lines 3, 6, 7, 8, 9, 10, 11, 16, 17, 19, 20, 21, 22, 23, 26, 30, 31, 32, 33, 35, 36, 37, 38, 39, 42, 46, 47, 48, 49, 51, 52, 53, 54, 56, 58, 59, 60, 65, 66)

#include "ast.h"

Check failure on line 3 in src/ast.c

View workflow job for this annotation

GitHub Actions / linter

src/ast.c:3:10 [clang-diagnostic-error]

'ast.h' file not found


char* ast_str[] = {

Check warning on line 6 in src/ast.c

View workflow job for this annotation

GitHub Actions / linter

src/ast.c:6:7 [cppcoreguidelines-avoid-non-const-global-variables]

variable 'ast_str' is non-const and globally accessible, consider making it const
"CharNode",
"StarNode",
"PlusNode",
"QuestionNode",
"OrNode",
"ConcatNode",
};

// Allocate a new AST Node, and initialize it with the given type
ASTNode* ast_node_create(ASTNodeType type) {
ASTNode* node = malloc(sizeof(ASTNode));
Expand Down Expand Up @@ -51,3 +61,7 @@

free(node);
}

char* str_ast_node(ASTNode* node) {
return ast_str[node->type];
}
167 changes: 153 additions & 14 deletions src/parser.c
Original file line number Diff line number Diff line change
@@ -1,8 +1,14 @@
#include "ast.h"

Check notice on line 1 in src/parser.c

View workflow job for this annotation

GitHub Actions / linter

Run clang-format on src/parser.c

File src/parser.c does not conform to Custom style guidelines. (lines 1, 7, 8, 9, 10, 19, 20, 21, 22, 24, 25, 26, 29, 39, 40, 41, 42, 44, 45, 46, 49, 59, 60, 61, 62, 64, 65, 66, 68, 69, 70, 71, 74, 90, 91, 92, 93, 95, 96, 98, 99, 100, 101, 102, 104, 105, 106, 107, 108, 110, 112, 113, 114, 115, 117, 119, 120, 121, 122, 125, 141, 142, 143, 144, 146, 148, 150, 151, 153, 154, 155, 156, 158, 159, 160, 162, 163, 164, 166, 167, 168, 170, 171, 172, 173, 176, 192, 193, 194, 195, 197, 199, 201, 202, 203, 204, 206, 207, 208, 209, 210, 212, 213, 214, 215, 216, 217, 220, 221, 223, 224, 227, 243, 244, 245, 246, 249, 251, 253, 254, 255, 257, 258, 259, 260, 261, 263, 264, 265, 266, 267, 268, 271, 272, 274, 275, 281, 282, 284, 285, 286, 287, 293, 294, 295, 296, 298, 300, 301, 302, 308, 309, 310, 311, 317, 318)

Check failure on line 1 in src/parser.c

View workflow job for this annotation

GitHub Actions / linter

src/parser.c:1:10 [clang-diagnostic-error]

'ast.h' file not found
#include "lexer.h"
#include "parser.h"
#include "token.h"

// Forward declaration of parsing functions
ASTNode* parse_base(Parser* parser);
ASTNode* parse_factor(Parser* parser);
ASTNode* parse_term(Parser* parser);
ASTNode* parse_expr(Parser* parser);

/**
* Returns the next token with advancing the parser's position
*
Expand Down Expand Up @@ -59,12 +65,16 @@
return -1;
}

return parser->tokens[parser->position].type == type ? 0 : -1;
Token* token = next(parser);
if (token == NULL) {
return type == EOF_ ? 0 : -1;
}

return token->type == type ? 0 : -1;
}


/**
* TODO: WIP
* Parse the `base` non-terminal.
*
* See the regex CFG on
Expand All @@ -77,32 +87,96 @@
*
* @return The root of the AST created by parsing the `base` non-terminal
*/
ASTNode* parse_base(Parser* parser){
return parser != NULL ? NULL : NULL;
ASTNode* parse_base(Parser* parser) {
if (parser == NULL) {
return NULL;
}

Token* token = next(parser);
ASTNode* node = NULL;

switch (token->type) {
case CHAR:
node = ast_node_create(CHAR_NODE);
node->extra.character = token->value;
break;

case LPAREN:;
Token* next = peek(parser);
if (next != NULL && next->type == RPAREN) {
return NULL;
}

node = parse_expr(parser);

if (expect(parser, RPAREN) < 0) {
ast_node_free(node);
return NULL;
}

break;

default:
// Error
return NULL;
}

return node;
}


/**
* TODO: WIP
* Parse the `factor` non-terminal.
*
* See the regex CFG on
* https://github.com/mkpro118/Regex-Engine/issues/6#issue-2337160940
*
* The relevant production is reproduced below
* non terminal :: factor -> base op | factor op
* non terminal :: factor -> base op | epsilon
*
* @param parser The parser to operate on
*
* @return The root of the AST created by parsing the `factor` non-terminal
*/
ASTNode* parse_factor(Parser* parser){
return parser != NULL ? NULL : NULL;
ASTNode* parse_factor(Parser* parser) {
if (parser == NULL) {
return NULL;
}

ASTNode* node = parse_base(parser);

Token* token;

while ((token = peek(parser))) {
ASTNode* parent = NULL;

switch (token->type) {
case STAR:
parent = ast_node_create(STAR_NODE);
break;

case PLUS:
parent = ast_node_create(PLUS_NODE);
break;

case QUESTION:
parent = ast_node_create(QUESTION_NODE);
break;

default:
return node;
}

parent->child1 = node;
node = parent;
next(parser);
}

return node;
}


/**
* TODO: WIP
* Parse the `term` non-terminal.
*
* See the regex CFG on
Expand All @@ -115,13 +189,45 @@
*
* @return The root of the AST created by parsing the `term` non-terminal
*/
ASTNode* parse_term(Parser* parser){
return parser != NULL ? NULL : NULL;
ASTNode* parse_term(Parser* parser) {
if (parser == NULL) {
return NULL;
}

ASTNode* left = parse_factor(parser);

Token* token;

while ((token = peek(parser))) {
if (token->type == OR || token->type == RPAREN) {
return left;
}

ASTNode* right = parse_factor(parser);
if (right == NULL) {
ast_node_free(left);
return NULL;
}

ASTNode* concat = ast_node_create(CONCAT_NODE);
if (concat == NULL) {
ast_node_free(left);
ast_node_free(right);
return NULL;
}


concat->child1 = left;
concat->extra.child2 = right;

left = concat;
}

return left;
}


/**
* TODO: WIP
* Parse the `expr` non-terminal.
*
* See the regex CFG on
Expand All @@ -135,7 +241,40 @@
* @return The root of the AST created by parsing the `expr` non-terminal
*/
ASTNode* parse_expr(Parser* parser){
return parser != NULL ? NULL : NULL;
if (parser == NULL) {
return NULL;
}


ASTNode* left = parse_term(parser);

Token* token;

while ((token = peek(parser)) && token->type == OR) {
// This is to consume the OR token
next(parser);

ASTNode* right = parse_term(parser);
if (right == NULL) {
ast_node_free(left);
return NULL;
}

ASTNode* or = ast_node_create(OR_NODE);
if (or == NULL) {
ast_node_free(left);
ast_node_free(right);
return NULL;
}


or->child1 = left;
or->extra.child2 = right;

left = or;
}

return left;
}

// Create a heap allocated parser from the given Lexer
Expand Down Expand Up @@ -176,5 +315,5 @@

// Create a AST by parsing the tokens in the given parser
ASTNode* parse(Parser* parser) {
return parser != NULL ? NULL : NULL;
return parse_expr(parser);
}
Loading
Loading