From f2d4efe17ac41dae23561e10c40b304f5a1ccb9d Mon Sep 17 00:00:00 2001
From: Jared O'Connell <46976761+jaredoconnell@users.noreply.github.com>
Date: Fri, 9 Feb 2024 12:53:49 -0500
Subject: [PATCH] Raw Strings and Schema Fix (#32)

* Use correct parser for 64 bit integers

* Added step to remove non-applicable schema restrictions

* Added raw string

* Added test for schema cleanup
---
 expression_data_test.go                  | 15 +++++++++-
 expression_dependencies.go               | 37 ++++++++++++++++++++----
 expression_type_test.go                  | 12 ++++++++
 internal/ast/recursive_descent_parser.go |  8 +++--
 internal/ast/tokenizer.go                |  6 ++--
 internal/ast/tokenizer_test.go           |  7 ++++-
 6 files changed, 73 insertions(+), 12 deletions(-)

diff --git a/expression_data_test.go b/expression_data_test.go
index 2e3accf..57041af 100644
--- a/expression_data_test.go
+++ b/expression_data_test.go
@@ -1,6 +1,9 @@
 package expressions_test
 
-import "go.flow.arcalot.io/pluginsdk/schema"
+import (
+	"go.flow.arcalot.io/pluginsdk/schema"
+	"regexp"
+)
 
 var testScope = schema.NewScopeSchema(
 	schema.NewObjectSchema(
@@ -57,6 +60,16 @@ var testScope = schema.NewScopeSchema(
 				nil,
 				nil,
 			),
+			"restrictive_str": schema.NewPropertySchema(
+				schema.NewStringSchema(nil, nil, regexp.MustCompile(`^a$`)),
+				nil,
+				true,
+				nil,
+				nil,
+				nil,
+				nil,
+				nil,
+			),
 			"simple_int": schema.NewPropertySchema(
 				schema.NewIntSchema(nil, nil, nil),
 				nil,
diff --git a/expression_dependencies.go b/expression_dependencies.go
index 36f15ed..58aef66 100644
--- a/expression_dependencies.go
+++ b/expression_dependencies.go
@@ -144,7 +144,10 @@ func (c *dependencyContext) binaryOperationDependencies(
 			rightResult.resolvedType.TypeID(),
 			[]schema.TypeID{schema.TypeIDInt, schema.TypeIDFloat, schema.TypeIDString},
 		)
-		resultType = leftResult.resolvedType
+		if err != nil {
+			return nil, err
+		}
+		resultType = cleanType(leftResult.resolvedType.TypeID())
 	case ast.Subtract, ast.Multiply, ast.Divide, ast.Modulus, ast.Power:
 		// Math. Same as type going in. Plus validate that it's numeric.
 		err = validateValidBinaryOpTypes(
@@ -153,7 +156,10 @@ func (c *dependencyContext) binaryOperationDependencies(
 			rightResult.resolvedType.TypeID(),
 			[]schema.TypeID{schema.TypeIDInt, schema.TypeIDFloat},
 		)
-		resultType = leftResult.resolvedType
+		if err != nil {
+			return nil, err
+		}
+		resultType = cleanType(leftResult.resolvedType.TypeID())
 	case ast.And, ast.Or:
 		// Boolean operations. Bool in and out.
 		err = validateValidBinaryOpTypes(
@@ -162,6 +168,9 @@ func (c *dependencyContext) binaryOperationDependencies(
 			rightResult.resolvedType.TypeID(),
 			[]schema.TypeID{schema.TypeIDBool},
 		)
+		if err != nil {
+			return nil, err
+		}
 		resultType = schema.NewBoolSchema()
 	case ast.GreaterThan, ast.LessThan, ast.GreaterThanEqualTo, ast.LessThanEqualTo:
 		// Inequality. Int, float, or string in; bool out.
@@ -171,6 +180,9 @@ func (c *dependencyContext) binaryOperationDependencies(
 			rightResult.resolvedType.TypeID(),
 			[]schema.TypeID{schema.TypeIDInt, schema.TypeIDString, schema.TypeIDFloat},
 		)
+		if err != nil {
+			return nil, err
+		}
 		resultType = schema.NewBoolSchema()
 	case ast.EqualTo, ast.NotEqualTo:
 		// Equality comparison. Any supported type in. Bool out.
@@ -180,15 +192,15 @@ func (c *dependencyContext) binaryOperationDependencies(
 			rightResult.resolvedType.TypeID(),
 			[]schema.TypeID{schema.TypeIDInt, schema.TypeIDString, schema.TypeIDFloat, schema.TypeIDBool},
 		)
+		if err != nil {
+			return nil, err
+		}
 		resultType = schema.NewBoolSchema()
 	case ast.Invalid:
 		panic(fmt.Errorf("attempted to perform invalid operation (binary operation type invalid)"))
 	default:
 		panic(fmt.Errorf("bug: binary operation %s missing from dependency evaluation code", node.Operation))
 	}
-	if err != nil {
-		return nil, err
-	}
 	// Combine the left and right dependencies.
 	finalDependencies := append(leftResult.completedPaths, rightResult.completedPaths...)
 	return &dependencyResult{
@@ -198,6 +210,21 @@ func (c *dependencyContext) binaryOperationDependencies(
 	}, nil
 }
 
+// Returns a version of ths schema without limiting details.
+// Used for when an expression is modifying the type, invalidating the restrictions.
+func cleanType(inputType schema.TypeID) schema.Type {
+	switch inputType {
+	case schema.TypeIDInt:
+		return schema.NewIntSchema(nil, nil, nil)
+	case schema.TypeIDFloat:
+		return schema.NewFloatSchema(nil, nil, nil)
+	case schema.TypeIDString:
+		return schema.NewStringSchema(nil, nil, nil)
+	default:
+		panic(fmt.Errorf("bug: case missing from cleanType: %s", inputType))
+	}
+}
+
 func validateValidBinaryOpTypes(
 	node *ast.BinaryOperation,
 	leftType schema.TypeID,
diff --git a/expression_type_test.go b/expression_type_test.go
index 55b9a83..1f60229 100644
--- a/expression_type_test.go
+++ b/expression_type_test.go
@@ -235,6 +235,18 @@ func TestTypeResolution_BinaryConcatenateStrings(t *testing.T) {
 	assert.Equals[schema.Type](t, typeResult, schema.NewStringSchema(nil, nil, nil))
 }
 
+func TestTypeResolution_WithStrictSchemas(t *testing.T) {
+	// In this example, we're going to reference schemas that have regular expressions that
+	// no longer apply when appended together.
+	// Use the strict schema for both left and right sides to ensure neither the left nor the right's
+	// strict schema is retained.
+	expr, err := expressions.New(`$.restrictive_str + $.restrictive_str`)
+	assert.NoError(t, err)
+	typeResult, err := expr.Type(testScope, nil, nil)
+	assert.NoError(t, err)
+	assert.Equals[schema.Type](t, typeResult, schema.NewStringSchema(nil, nil, nil))
+}
+
 func TestTypeResolution_BinaryMathHomogeneousIntReference(t *testing.T) {
 	// Two ints added should give an int. One int is a reference.
 	expr, err := expressions.New("5 + $.simple_int")
diff --git a/internal/ast/recursive_descent_parser.go b/internal/ast/recursive_descent_parser.go
index c97d0a9..7596d84 100644
--- a/internal/ast/recursive_descent_parser.go
+++ b/internal/ast/recursive_descent_parser.go
@@ -164,7 +164,9 @@ func (p *Parser) parseStringLiteral() (*StringLiteral, error) {
 	// The literal token includes the "", so trim the ends off.
 	parsedString := p.currentToken.Value[1 : len(p.currentToken.Value)-1]
 	// Replace escaped characters
-	parsedString = escapeReplacer.Replace(parsedString)
+	if p.currentToken.TokenID != RawStringLiteralToken {
+		parsedString = escapeReplacer.Replace(parsedString)
+	}
 	// Now create the literal itself and advance the token.
 	literal := &StringLiteral{StrValue: parsedString}
 	err := p.advanceToken()
@@ -495,7 +497,7 @@ func (p *Parser) parseNegationOperation() (Node, error) {
 	return p.parseLeftUnaryExpression([]TokenID{NegationToken}, p.parseValueOrAccessExpression)
 }
 
-var literalTokens = []TokenID{StringLiteralToken, IntLiteralToken, BooleanLiteralToken, FloatLiteralToken}
+var literalTokens = []TokenID{StringLiteralToken, RawStringLiteralToken, IntLiteralToken, BooleanLiteralToken, FloatLiteralToken}
 var identifierTokens = []TokenID{IdentifierToken, RootAccessToken}
 var validRootValueOrAccessStartTokens = append(literalTokens, identifierTokens...)
 var validValueOrAccessStartTokens = append(validRootValueOrAccessStartTokens, CurrentObjectAccessToken)
@@ -515,7 +517,7 @@ func (p *Parser) parseValueOrAccessExpression() (Node, error) {
 	// A value or access expression can start with a literal, or an identifier.
 	// If an identifier, it can lead to a chain or a function.
 	switch p.currentToken.TokenID {
-	case StringLiteralToken:
+	case StringLiteralToken, RawStringLiteralToken:
 		literalNode, err = p.parseStringLiteral()
 	case IntLiteralToken:
 		literalNode, err = p.parseIntLiteral()
diff --git a/internal/ast/tokenizer.go b/internal/ast/tokenizer.go
index 9c89099..1152545 100644
--- a/internal/ast/tokenizer.go
+++ b/internal/ast/tokenizer.go
@@ -16,7 +16,8 @@ const (
 	// Supports the string format used in golang, and will include
 	// the " before and after the contents of the string.
 	// Characters can be escaped the common way with a backslash.
-	StringLiteralToken TokenID = "string"
+	StringLiteralToken    TokenID = "string"
+	RawStringLiteralToken TokenID = "raw-string"
 	// IntLiteralToken represents an integer token. Must not start with 0.
 	IntLiteralToken TokenID = "int"
 	// FloatLiteralToken represents a float token.
@@ -108,7 +109,8 @@ var tokenPatterns = []tokenPattern{
 	{FloatLiteralToken, regexp.MustCompile(`^\d+\.\d*(?:[eE][+-]?\d+)?$`)}, // Like an integer, but with a period and digits after.
 	{IntLiteralToken, regexp.MustCompile(`^(?:0|[1-9]\d*)$`)},              // Note: numbers that start with 0 are identifiers.
 	{IdentifierToken, regexp.MustCompile(`^\w+$`)},                         // Any valid object name
-	{StringLiteralToken, regexp.MustCompile(`^(?:".*"|'.*')$`)},            // "string example"
+	{StringLiteralToken, regexp.MustCompile(`^(?:".*"|'.*')$`)},            // "string example" 'alternative'
+	{RawStringLiteralToken, regexp.MustCompile("^`.*`$")},                  // `raw string`
 	{BracketAccessDelimiterStartToken, regexp.MustCompile(`^\[$`)},         // the [ in map["key"]
 	{BracketAccessDelimiterEndToken, regexp.MustCompile(`^]$`)},            // the ] in map["key"]
 	{ParenthesesStartToken, regexp.MustCompile(`^\($`)},                    // (
diff --git a/internal/ast/tokenizer_test.go b/internal/ast/tokenizer_test.go
index 22c127b..e36b2d6 100644
--- a/internal/ast/tokenizer_test.go
+++ b/internal/ast/tokenizer_test.go
@@ -216,7 +216,7 @@ func TestTokenizer_BooleanLiterals(t *testing.T) {
 }
 
 func TestTokenizer_StringLiteral(t *testing.T) {
-	input := `"" "a" "a\"b"`
+	input := `"" "a" "a\"b"` + " `raw_str/\\`"
 	tokenizer := initTokenizer(input, filename)
 	assert.Equals(t, tokenizer.hasNextToken(), true)
 	tokenVal, err := tokenizer.getNext()
@@ -233,6 +233,11 @@ func TestTokenizer_StringLiteral(t *testing.T) {
 	assert.NoError(t, err)
 	assert.Equals(t, tokenVal.TokenID, StringLiteralToken)
 	assert.Equals(t, tokenVal.Value, `"a\"b"`)
+	assert.Equals(t, tokenizer.hasNextToken(), true)
+	tokenVal, err = tokenizer.getNext()
+	assert.NoError(t, err)
+	assert.Equals(t, tokenVal.TokenID, RawStringLiteralToken)
+	assert.Equals(t, tokenVal.Value, "`raw_str/\\`")
 	assert.Equals(t, tokenizer.hasNextToken(), false)
 }