Skip to content

Commit

Permalink
Merge pull request #9 from awslabs/update-dec-2024
Browse files Browse the repository at this point in the history
Update master branch with latest commits
  • Loading branch information
rdsharma26 authored Dec 23, 2024
2 parents 64d7036 + 97bd885 commit c6c7515
Show file tree
Hide file tree
Showing 56 changed files with 4,619 additions and 337 deletions.
12 changes: 11 additions & 1 deletion configuration/dqdl/CommonLexerRules.g4
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ LPAREN: '(';
RPAREN: ')';
AND: 'and' | 'AND';
OR: 'or' | 'OR';
OF: 'of' | 'OF';
NULL: 'null' | 'NULL';
EMPTY: 'empty' | 'EMPTY';
WHITESPACES_ONLY: 'whitespaces_only' | 'WHITESPACES_ONLY';

BETWEEN: 'between';
EQUAL_TO: '=';
Expand All @@ -20,18 +24,24 @@ GREATER_THAN_EQUAL_TO: '>=';
LESS_THAN: '<';
LESS_THAN_EQUAL_TO: '<=';
IN: 'in';
NOT: 'not';
NEGATION: '!';

DIGIT: [0-9];
DATE:
QUOTE DIGIT DIGIT DIGIT DIGIT '-' DIGIT DIGIT '-' DIGIT DIGIT QUOTE;
TIME:
QUOTE (DIGIT | DIGIT DIGIT) ':' DIGIT DIGIT (' AM' | ' PM') QUOTE;
MIL_TIME:
QUOTE DIGIT DIGIT ':' DIGIT DIGIT QUOTE;
INT: DIGIT+;
DECIMAL: INT '.' INT;
QUOTED_STRING: QUOTE (ESC | .)*? QUOTE;
NEGATIVE: '-';

LINE_COMMENT: '#' .*? '\r'? '\n' -> skip; // Match "#" stuff '\n'

IDENTIFIER: [a-zA-Z0-9]+;
IDENTIFIER: [a-zA-Z0-9_.]+;

WS: [ \t\n]+ -> skip;

Expand Down
125 changes: 100 additions & 25 deletions configuration/dqdl/DataQualityDefinitionLanguage.g4
Original file line number Diff line number Diff line change
@@ -1,98 +1,172 @@
grammar DataQualityDefinitionLanguage; // "parser grammars for DQDL"
grammar DataQualityDefinitionLanguage; // "parser grammars for DQDL"
import CommonLexerRules;

// Sections
metadataSectionStart: 'Metadata';
dataSourcesSectionStart: 'DataSources';
rulesSectionStart: 'Rules';
analyzersSectionStart: 'Analyzers';

// Expressions
dateNow: 'now()';

durationUnit: 'days' | 'hours';
durationUnit: 'days' | 'hours' | 'minutes';

durationExpression: (DIGIT | INT) durationUnit;

sizeUnit:
'B'
| 'KB'
| 'MB'
| 'GB'
| 'TB';

sizeExpression: (DIGIT | INT) sizeUnit;

timeExpression: TIME | MIL_TIME;

dateExpressionOp: ('-' | '+');
dateExpression:
DATE
| dateNow
| LPAREN dateNow dateExpressionOp durationExpression RPAREN;
| LPAREN dateNow dateExpressionOp durationExpression RPAREN
| timeExpression
| NULL;

number:
atomicNumber:
DIGIT
| NEGATIVE DIGIT
| INT
| NEGATIVE INT
| DECIMAL
| NEGATIVE DECIMAL;

functionParameters:
number
| number (COMMA number)*;

functionCall:
IDENTIFIER LPAREN RPAREN
| IDENTIFIER LPAREN functionParameters RPAREN;

numberOp: '+' | '-' | '/' | '*';

number:
number numberOp number
| functionCall
| LPAREN number RPAREN
| atomicNumber
| NULL;

quotedString: QUOTED_STRING;

matchesRegexCondition: 'matches' quotedString;

numberArray: LBRAC number (COMMA number)* RBRAC;
numberBasedCondition:
BETWEEN number AND number
NOT? BETWEEN number AND number
| GREATER_THAN number
| GREATER_THAN_EQUAL_TO number
| LESS_THAN number
| LESS_THAN_EQUAL_TO number
| EQUAL_TO number
| IN numberArray;
| NEGATION? EQUAL_TO number
| NOT? IN numberArray;

variableDereference: '$' IDENTIFIER;

stringValues:
quotedString
| variableDereference
| NULL
| EMPTY
| WHITESPACES_ONLY;

quotedStringArray: LBRAC quotedString (COMMA quotedString)* RBRAC;
stringValuesArray: LBRAC stringValues (COMMA stringValues)* RBRAC;
stringBasedCondition:
EQUAL_TO quotedString
| IN quotedStringArray
| matchesRegexCondition;
NEGATION? EQUAL_TO stringValues
| NOT? IN stringValuesArray
| NOT? IN variableDereference
| NOT? matchesRegexCondition;
tagValues: IDENTIFIER;

dateExpressionArray: LBRAC dateExpression (COMMA dateExpression)* RBRAC;
dateBasedCondition:
BETWEEN dateExpression AND dateExpression
NOT? BETWEEN dateExpression AND dateExpression
| GREATER_THAN dateExpression
| GREATER_THAN_EQUAL_TO dateExpression
| LESS_THAN dateExpression
| LESS_THAN_EQUAL_TO dateExpression
| EQUAL_TO dateExpression
| IN dateExpressionArray;
| NEGATION? EQUAL_TO dateExpression
| NOT? IN dateExpressionArray;

durationExpressionArray: LBRAC durationExpression (COMMA durationExpression)* RBRAC;
durationBasedCondition:
BETWEEN durationExpression AND durationExpression
NOT? BETWEEN durationExpression AND durationExpression
| GREATER_THAN durationExpression
| GREATER_THAN_EQUAL_TO durationExpression
| LESS_THAN durationExpression
| LESS_THAN_EQUAL_TO durationExpression
| EQUAL_TO durationExpression
| IN durationExpressionArray;
| NEGATION? EQUAL_TO durationExpression
| NOT? IN durationExpressionArray;

sizeExpressionArray: LBRAC sizeExpression (COMMA sizeExpression)* RBRAC;
sizeBasedCondition:
NOT? BETWEEN sizeExpression AND sizeExpression
| GREATER_THAN sizeExpression
| GREATER_THAN_EQUAL_TO sizeExpression
| LESS_THAN sizeExpression
| LESS_THAN_EQUAL_TO sizeExpression
| NEGATION? EQUAL_TO sizeExpression
| NOT? IN sizeExpressionArray;

ruleType: IDENTIFIER;
parameter: (QUOTED_STRING | INT | DIGIT);
analyzerType: IDENTIFIER;
parameter: QUOTED_STRING
| IDENTIFIER;
connectorWord: OF | AND;
parameterWithConnectorWord: connectorWord? parameter;
tagWithCondition: 'with' tagValues (stringBasedCondition | numberBasedCondition);

condition:
numberBasedCondition
| stringBasedCondition
| dateBasedCondition
| durationBasedCondition;
| durationBasedCondition
| sizeBasedCondition;

withThresholdCondition: 'with' 'threshold' numberBasedCondition;
whereClause: 'where' quotedString;

dqRule: ruleType parameter* condition? withThresholdCondition?;
dqRule: ruleType parameterWithConnectorWord* condition? whereClause? tagWithCondition*;
dqAnalyzer: analyzerType parameterWithConnectorWord*;

// Variable Declarations
expression:
stringValues
| stringValuesArray;

variableDeclaration:
IDENTIFIER EQUAL_TO expression;
variableDeclarations: variableDeclaration*;

topLevelRule:
dqRule
| '(' dqRule ')' (AND '(' dqRule ')')*
| '(' dqRule ')' (OR '(' dqRule ')')*;
LPAREN topLevelRule RPAREN
| topLevelRule AND topLevelRule
| topLevelRule OR topLevelRule
| dqRule;

// Rules Definition
dqRules: topLevelRule (COMMA topLevelRule)*;
dqAnalyzers: dqAnalyzer (COMMA dqAnalyzer)*;

// Top Level Document
rules:
rulesSectionStart EQUAL_TO LBRAC dqRules RBRAC
| rulesSectionStart EQUAL_TO LBRAC RBRAC; // empty array

analyzers:
analyzersSectionStart EQUAL_TO LBRAC dqAnalyzers RBRAC
| analyzersSectionStart EQUAL_TO LBRAC RBRAC; // empty array

// This dictionary does not support nested dictionaries. Just strings and arrays.
dictionary: LCURL pair (COMMA pair)* RCURL;
pair: QUOTED_STRING COLON pairValue;
Expand All @@ -101,5 +175,6 @@ array: LBRAC QUOTED_STRING (COMMA QUOTED_STRING)* RBRAC;

metadata: metadataSectionStart EQUAL_TO dictionary;
dataSources: dataSourcesSectionStart EQUAL_TO dictionary;
rulesOrAnalyzers: rules | analyzers | rules analyzers;

document: metadata? dataSources? rules;
document: metadata? dataSources? variableDeclarations? rulesOrAnalyzers;
Loading

0 comments on commit c6c7515

Please sign in to comment.