diff --git a/.vscode/c_cpp_properties.json b/.vscode/c_cpp_properties.json new file mode 100644 index 0000000..834113c --- /dev/null +++ b/.vscode/c_cpp_properties.json @@ -0,0 +1,19 @@ +{ + "configurations": [ + { + "name": "Mac", + "includePath": [ + "${workspaceFolder}/**" + ], + "defines": [], + "macFrameworkPath": [ + "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/System/Library/Frameworks" + ], + "compilerPath": "/usr/bin/clang", + "cStandard": "c17", + "cppStandard": "c++17", + "intelliSenseMode": "macos-clang-arm64" + } + ], + "version": 4 +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index d8cb326..7ae686b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,5 +1,7 @@ { "files.associations": { - "string": "cpp" + "string": "cpp", + "token.h": "c", + "token_type.h": "c" } } \ No newline at end of file diff --git a/Makefile b/Makefile index 3cdc525..b080298 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ all: output execute output: - gcc src/main.c -o build/main + gcc src/main.c src/utils/ints.c src/utils/strings.c src/lexer/lexer.c -o build/main execute: ./build/main \ No newline at end of file diff --git a/build/main b/build/main index 013ca82..5a947f3 100755 Binary files a/build/main and b/build/main differ diff --git a/src/common/types.h b/src/common/types.h new file mode 100644 index 0000000..fb8b356 --- /dev/null +++ b/src/common/types.h @@ -0,0 +1,6 @@ +#ifndef COMMON_TYPES_H +#define COMMON_TYPES_H + +typedef char* string; + +#endif // TCOMMON_YPES_H \ No newline at end of file diff --git a/src/lexer/lexer.c b/src/lexer/lexer.c index 64b11d3..bdae384 100644 --- a/src/lexer/lexer.c +++ b/src/lexer/lexer.c @@ -1,8 +1,134 @@ #ifndef LEXER_C #define LEXER_C -#include "token.h" -#include "token_type.h" -#include "types.h" +#include "token/token.h" +#include "token/token_type.h" + +#include "../common/types.h" +#include "../utils/strings.h" +#include "../utils/ints.h" + +#include +#include + +/** + * @brief Pushes a token to the back of a token array. + * + * @param tokens The token array. + * @param token The token to push. + */ +void push_back(Token tokens[], Token token) +{ + tokens[sizeof(tokens) + 1] = token; +} + +/** + * @brief Pops a token from the front of a token array. + * + * @param tokens The token array. + * @return Token The popped token. + */ +string pop(string *tokens) +{ + string res = tokens[0]; + for (int i = 1; i < sizeof(tokens); i++) + { + tokens[i - 1] = tokens[i]; + } + return res; +} + +/** + * @brief Tokenizes a string. + * + * @param src The string to tokenize. + * @return const Token* The tokens. + */ +Token *tokenize(string src) +{ + // The list of tokens + Token tokens[] = {}; + + // Split the src + string *split_src = split(src, " "); + + // Build each token + while (sizeof(split_src) > 0) + { + // Pop the token + string token = pop(split_src); + + // Check the token type + if (token == ' ' || token == '\n' || token == '\t') + { + continue; + } + else if (token == '(') + { + push_back(tokens, (Token){TOKEN_TYPE_LEFT_PAREN, "("}); + } + else if (token == ')') + { + push_back(tokens, (Token){TOKEN_TYPE_RIGHT_PAREN, ")"}); + } + else if (token == '=') + { + push_back(tokens, (Token){TOKEN_TYPE_EQUAL, "="}); + } + else if (token == '+') + { + push_back(tokens, (Token){TOKEN_TYPE_PLUS, "+"}); + } + else if (token == '-') + { + push_back(tokens, (Token){TOKEN_TYPE_MINUS, "-"}); + } + else if (token == '*') + { + push_back(tokens, (Token){TOKEN_TYPE_MULTIPLY, "*"}); + } + else if (token == '/') + { + Token token = {TOKEN_TYPE_DIVIDE, "/"}; + push_back(tokens, token); + } + else + { + if (is_int(token)) + { + string num = ""; + while (sizeof(split_src) > 0 && is_int(split_src[0])) + { + string n = pop(split_src); + num = strcat(num, n); + } + push_back(tokens, (Token){TOKEN_TYPE_NUMBER, num}); + } + else if (is_alpha(token)) + { + string str = ""; + while (sizeof(split_src) > 0 && is_alpha(split_src[0])) + { + string n = pop(split_src); + str = strcat(str, n); + } + if (token == "let") + { + push_back(tokens, (Token){TOKEN_TYPE_LET, "let"}); + } + else + { + push_back(tokens, (Token){TOKEN_TYPE_IDENTIFIER, str}); + } + } + else + { + return tokens; + } + } + } + + return tokens; +} #endif // LEXER_C \ No newline at end of file diff --git a/src/lexer/lexer.h b/src/lexer/lexer.h index 9bc37ce..cf4cefe 100644 --- a/src/lexer/lexer.h +++ b/src/lexer/lexer.h @@ -1,5 +1,15 @@ -#ifndef LEXER_h -#define LEXER_h +#ifndef LEXER_H +#define LEXER_H +#include "token/token.h" +#include "../common/types.h" -#endif // LEXER_h \ No newline at end of file +/** + * @brief Tokenizes a string. + * + * @param src The string to tokenize. + * @return const Token* The tokens. + */ +Token *tokenize(string src); + +#endif // LEXER_H \ No newline at end of file diff --git a/src/lexer/token.h b/src/lexer/token/token.h similarity index 90% rename from src/lexer/token.h rename to src/lexer/token/token.h index ff791a9..5b3df39 100644 --- a/src/lexer/token.h +++ b/src/lexer/token/token.h @@ -2,7 +2,7 @@ #define TOKEN_H #include "token_type.h" -#include "types.h" +#include "../../common/types.h" /** * @brief A token is a pair of a token type and a value. diff --git a/src/lexer/token_type.h b/src/lexer/token/token_type.h similarity index 98% rename from src/lexer/token_type.h rename to src/lexer/token/token_type.h index 66737b8..30003cf 100644 --- a/src/lexer/token_type.h +++ b/src/lexer/token/token_type.h @@ -14,6 +14,7 @@ typedef enum TokenType { TOKEN_TYPE_EOF, + TOKEN_TYPE_LET, TOKEN_TYPE_NUMBER, TOKEN_TYPE_PLUS, TOKEN_TYPE_MINUS, diff --git a/src/lexer/types.h b/src/lexer/types.h deleted file mode 100644 index f160bac..0000000 --- a/src/lexer/types.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef TYPES_H -#define TYPES_H - -typedef char* string; - -#endif // TYPES_H \ No newline at end of file diff --git a/src/main.c b/src/main.c index 72b9b43..ee80b0b 100644 --- a/src/main.c +++ b/src/main.c @@ -1,7 +1,14 @@ #include +#include "lexer/lexer.h" +#include "common/types.h" + int main(void) { - printf("Hello, world!\n"); - return 0; + string src = "let x = 5"; + Token *tokens = tokenize(src); + for (int i = 0; i < sizeof(tokens); i++) + { + printf("Token: %s\n", tokens[i].value); + } } \ No newline at end of file diff --git a/src/main.simpl b/src/main.simpl new file mode 100644 index 0000000..edacaa6 --- /dev/null +++ b/src/main.simpl @@ -0,0 +1 @@ +let x = 45 \ No newline at end of file diff --git a/src/utils/ints.c b/src/utils/ints.c new file mode 100644 index 0000000..78aa8ab --- /dev/null +++ b/src/utils/ints.c @@ -0,0 +1,18 @@ +#ifndef UTILS_INTS_C +#define UTILS_INTS_C + +#include + +/** + * @brief Checks if a character is an integer. + * + * @param c The character to check. + * @return true If the character is an integer. + * @return false If the character is not an integer. + */ +bool is_int(char c) +{ + return c >= '0' && c <= '9'; +} + +#endif // UTILS_INTS_C \ No newline at end of file diff --git a/src/utils/ints.h b/src/utils/ints.h new file mode 100644 index 0000000..f4a92cf --- /dev/null +++ b/src/utils/ints.h @@ -0,0 +1,15 @@ +#ifndef UTILS_INTS_H +#define UTILS_INTS_H + +#include + +/** + * @brief Checks if a character is an integer. + * + * @param c The character to check. + * @return true If the character is an integer. + * @return false If the character is not an integer. + */ +bool is_int(char c); + +#endif // UTILS_INTS_H \ No newline at end of file diff --git a/src/utils/strings.c b/src/utils/strings.c new file mode 100644 index 0000000..9c396f4 --- /dev/null +++ b/src/utils/strings.c @@ -0,0 +1,42 @@ +#ifndef UTILS_STRINGS_H +#define UTILS_STRINGS_H + +#include "../common/types.h" + +#include +#include +#include + +/** + * @brief Splits a string by a delimiter. + * + * @param src The string to split. + * @param delim The delimiter to split by. + * @return string* The split string. + */ +string *split(string src, string delim) +{ + string *res = malloc(sizeof(string)); + string token = strtok(src, delim); + + while (token != NULL) + { + res[sizeof(res) + 1] = token; + token = strtok(NULL, delim); + } + return res; +} + +/** + * @brief Checks if a character is a digit. + * + * @param c The character to check. + * @return true If the character is a digit. + * @return false If the character is not a digit. + */ +bool is_alpha(char c) +{ + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +#endif // UTILS_STRING_H \ No newline at end of file diff --git a/src/utils/strings.h b/src/utils/strings.h new file mode 100644 index 0000000..908d3f4 --- /dev/null +++ b/src/utils/strings.h @@ -0,0 +1,25 @@ +#ifndef UTILS_STRINGS_H +#define UTILS_STRINGS_H + +#include "../common/types.h" +#include + +/** + * @brief Splits a string by a delimiter. + * + * @param src The string to split. + * @param delim The delimiter to split by. + * @return string* The split string. + */ +string *split(string src, string delim); + +/** + * @brief Checks if a character is a digit. + * + * @param c The character to check. + * @return true If the character is a digit. + * @return false If the character is not a digit. + */ +bool is_alpha(char c); + +#endif // UTILS_STRING_H \ No newline at end of file