From f2d4efe17ac41dae23561e10c40b304f5a1ccb9d Mon Sep 17 00:00:00 2001 From: Jared O'Connell <46976761+jaredoconnell@users.noreply.github.com> Date: Fri, 9 Feb 2024 12:53:49 -0500 Subject: [PATCH] Raw Strings and Schema Fix (#32) * Use correct parser for 64 bit integers * Added step to remove non-applicable schema restrictions * Added raw string * Added test for schema cleanup --- expression_data_test.go | 15 +++++++++- expression_dependencies.go | 37 ++++++++++++++++++++---- expression_type_test.go | 12 ++++++++ internal/ast/recursive_descent_parser.go | 8 +++-- internal/ast/tokenizer.go | 6 ++-- internal/ast/tokenizer_test.go | 7 ++++- 6 files changed, 73 insertions(+), 12 deletions(-) diff --git a/expression_data_test.go b/expression_data_test.go index 2e3accf..57041af 100644 --- a/expression_data_test.go +++ b/expression_data_test.go @@ -1,6 +1,9 @@ package expressions_test -import "go.flow.arcalot.io/pluginsdk/schema" +import ( + "go.flow.arcalot.io/pluginsdk/schema" + "regexp" +) var testScope = schema.NewScopeSchema( schema.NewObjectSchema( @@ -57,6 +60,16 @@ var testScope = schema.NewScopeSchema( nil, nil, ), + "restrictive_str": schema.NewPropertySchema( + schema.NewStringSchema(nil, nil, regexp.MustCompile(`^a$`)), + nil, + true, + nil, + nil, + nil, + nil, + nil, + ), "simple_int": schema.NewPropertySchema( schema.NewIntSchema(nil, nil, nil), nil, diff --git a/expression_dependencies.go b/expression_dependencies.go index 36f15ed..58aef66 100644 --- a/expression_dependencies.go +++ b/expression_dependencies.go @@ -144,7 +144,10 @@ func (c *dependencyContext) binaryOperationDependencies( rightResult.resolvedType.TypeID(), []schema.TypeID{schema.TypeIDInt, schema.TypeIDFloat, schema.TypeIDString}, ) - resultType = leftResult.resolvedType + if err != nil { + return nil, err + } + resultType = cleanType(leftResult.resolvedType.TypeID()) case ast.Subtract, ast.Multiply, ast.Divide, ast.Modulus, ast.Power: // Math. Same as type going in. Plus validate that it's numeric. err = validateValidBinaryOpTypes( @@ -153,7 +156,10 @@ func (c *dependencyContext) binaryOperationDependencies( rightResult.resolvedType.TypeID(), []schema.TypeID{schema.TypeIDInt, schema.TypeIDFloat}, ) - resultType = leftResult.resolvedType + if err != nil { + return nil, err + } + resultType = cleanType(leftResult.resolvedType.TypeID()) case ast.And, ast.Or: // Boolean operations. Bool in and out. err = validateValidBinaryOpTypes( @@ -162,6 +168,9 @@ func (c *dependencyContext) binaryOperationDependencies( rightResult.resolvedType.TypeID(), []schema.TypeID{schema.TypeIDBool}, ) + if err != nil { + return nil, err + } resultType = schema.NewBoolSchema() case ast.GreaterThan, ast.LessThan, ast.GreaterThanEqualTo, ast.LessThanEqualTo: // Inequality. Int, float, or string in; bool out. @@ -171,6 +180,9 @@ func (c *dependencyContext) binaryOperationDependencies( rightResult.resolvedType.TypeID(), []schema.TypeID{schema.TypeIDInt, schema.TypeIDString, schema.TypeIDFloat}, ) + if err != nil { + return nil, err + } resultType = schema.NewBoolSchema() case ast.EqualTo, ast.NotEqualTo: // Equality comparison. Any supported type in. Bool out. @@ -180,15 +192,15 @@ func (c *dependencyContext) binaryOperationDependencies( rightResult.resolvedType.TypeID(), []schema.TypeID{schema.TypeIDInt, schema.TypeIDString, schema.TypeIDFloat, schema.TypeIDBool}, ) + if err != nil { + return nil, err + } resultType = schema.NewBoolSchema() case ast.Invalid: panic(fmt.Errorf("attempted to perform invalid operation (binary operation type invalid)")) default: panic(fmt.Errorf("bug: binary operation %s missing from dependency evaluation code", node.Operation)) } - if err != nil { - return nil, err - } // Combine the left and right dependencies. finalDependencies := append(leftResult.completedPaths, rightResult.completedPaths...) return &dependencyResult{ @@ -198,6 +210,21 @@ func (c *dependencyContext) binaryOperationDependencies( }, nil } +// Returns a version of ths schema without limiting details. +// Used for when an expression is modifying the type, invalidating the restrictions. +func cleanType(inputType schema.TypeID) schema.Type { + switch inputType { + case schema.TypeIDInt: + return schema.NewIntSchema(nil, nil, nil) + case schema.TypeIDFloat: + return schema.NewFloatSchema(nil, nil, nil) + case schema.TypeIDString: + return schema.NewStringSchema(nil, nil, nil) + default: + panic(fmt.Errorf("bug: case missing from cleanType: %s", inputType)) + } +} + func validateValidBinaryOpTypes( node *ast.BinaryOperation, leftType schema.TypeID, diff --git a/expression_type_test.go b/expression_type_test.go index 55b9a83..1f60229 100644 --- a/expression_type_test.go +++ b/expression_type_test.go @@ -235,6 +235,18 @@ func TestTypeResolution_BinaryConcatenateStrings(t *testing.T) { assert.Equals[schema.Type](t, typeResult, schema.NewStringSchema(nil, nil, nil)) } +func TestTypeResolution_WithStrictSchemas(t *testing.T) { + // In this example, we're going to reference schemas that have regular expressions that + // no longer apply when appended together. + // Use the strict schema for both left and right sides to ensure neither the left nor the right's + // strict schema is retained. + expr, err := expressions.New(`$.restrictive_str + $.restrictive_str`) + assert.NoError(t, err) + typeResult, err := expr.Type(testScope, nil, nil) + assert.NoError(t, err) + assert.Equals[schema.Type](t, typeResult, schema.NewStringSchema(nil, nil, nil)) +} + func TestTypeResolution_BinaryMathHomogeneousIntReference(t *testing.T) { // Two ints added should give an int. One int is a reference. expr, err := expressions.New("5 + $.simple_int") diff --git a/internal/ast/recursive_descent_parser.go b/internal/ast/recursive_descent_parser.go index c97d0a9..7596d84 100644 --- a/internal/ast/recursive_descent_parser.go +++ b/internal/ast/recursive_descent_parser.go @@ -164,7 +164,9 @@ func (p *Parser) parseStringLiteral() (*StringLiteral, error) { // The literal token includes the "", so trim the ends off. parsedString := p.currentToken.Value[1 : len(p.currentToken.Value)-1] // Replace escaped characters - parsedString = escapeReplacer.Replace(parsedString) + if p.currentToken.TokenID != RawStringLiteralToken { + parsedString = escapeReplacer.Replace(parsedString) + } // Now create the literal itself and advance the token. literal := &StringLiteral{StrValue: parsedString} err := p.advanceToken() @@ -495,7 +497,7 @@ func (p *Parser) parseNegationOperation() (Node, error) { return p.parseLeftUnaryExpression([]TokenID{NegationToken}, p.parseValueOrAccessExpression) } -var literalTokens = []TokenID{StringLiteralToken, IntLiteralToken, BooleanLiteralToken, FloatLiteralToken} +var literalTokens = []TokenID{StringLiteralToken, RawStringLiteralToken, IntLiteralToken, BooleanLiteralToken, FloatLiteralToken} var identifierTokens = []TokenID{IdentifierToken, RootAccessToken} var validRootValueOrAccessStartTokens = append(literalTokens, identifierTokens...) var validValueOrAccessStartTokens = append(validRootValueOrAccessStartTokens, CurrentObjectAccessToken) @@ -515,7 +517,7 @@ func (p *Parser) parseValueOrAccessExpression() (Node, error) { // A value or access expression can start with a literal, or an identifier. // If an identifier, it can lead to a chain or a function. switch p.currentToken.TokenID { - case StringLiteralToken: + case StringLiteralToken, RawStringLiteralToken: literalNode, err = p.parseStringLiteral() case IntLiteralToken: literalNode, err = p.parseIntLiteral() diff --git a/internal/ast/tokenizer.go b/internal/ast/tokenizer.go index 9c89099..1152545 100644 --- a/internal/ast/tokenizer.go +++ b/internal/ast/tokenizer.go @@ -16,7 +16,8 @@ const ( // Supports the string format used in golang, and will include // the " before and after the contents of the string. // Characters can be escaped the common way with a backslash. - StringLiteralToken TokenID = "string" + StringLiteralToken TokenID = "string" + RawStringLiteralToken TokenID = "raw-string" // IntLiteralToken represents an integer token. Must not start with 0. IntLiteralToken TokenID = "int" // FloatLiteralToken represents a float token. @@ -108,7 +109,8 @@ var tokenPatterns = []tokenPattern{ {FloatLiteralToken, regexp.MustCompile(`^\d+\.\d*(?:[eE][+-]?\d+)?$`)}, // Like an integer, but with a period and digits after. {IntLiteralToken, regexp.MustCompile(`^(?:0|[1-9]\d*)$`)}, // Note: numbers that start with 0 are identifiers. {IdentifierToken, regexp.MustCompile(`^\w+$`)}, // Any valid object name - {StringLiteralToken, regexp.MustCompile(`^(?:".*"|'.*')$`)}, // "string example" + {StringLiteralToken, regexp.MustCompile(`^(?:".*"|'.*')$`)}, // "string example" 'alternative' + {RawStringLiteralToken, regexp.MustCompile("^`.*`$")}, // `raw string` {BracketAccessDelimiterStartToken, regexp.MustCompile(`^\[$`)}, // the [ in map["key"] {BracketAccessDelimiterEndToken, regexp.MustCompile(`^]$`)}, // the ] in map["key"] {ParenthesesStartToken, regexp.MustCompile(`^\($`)}, // ( diff --git a/internal/ast/tokenizer_test.go b/internal/ast/tokenizer_test.go index 22c127b..e36b2d6 100644 --- a/internal/ast/tokenizer_test.go +++ b/internal/ast/tokenizer_test.go @@ -216,7 +216,7 @@ func TestTokenizer_BooleanLiterals(t *testing.T) { } func TestTokenizer_StringLiteral(t *testing.T) { - input := `"" "a" "a\"b"` + input := `"" "a" "a\"b"` + " `raw_str/\\`" tokenizer := initTokenizer(input, filename) assert.Equals(t, tokenizer.hasNextToken(), true) tokenVal, err := tokenizer.getNext() @@ -233,6 +233,11 @@ func TestTokenizer_StringLiteral(t *testing.T) { assert.NoError(t, err) assert.Equals(t, tokenVal.TokenID, StringLiteralToken) assert.Equals(t, tokenVal.Value, `"a\"b"`) + assert.Equals(t, tokenizer.hasNextToken(), true) + tokenVal, err = tokenizer.getNext() + assert.NoError(t, err) + assert.Equals(t, tokenVal.TokenID, RawStringLiteralToken) + assert.Equals(t, tokenVal.Value, "`raw_str/\\`") assert.Equals(t, tokenizer.hasNextToken(), false) }