-
Notifications
You must be signed in to change notification settings - Fork 25
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Improve parsing of SPL2 modules for statement names to handle strings…
…, fields, functions, comments. (#131)
- Loading branch information
Showing
9 changed files
with
189 additions
and
13 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
File renamed without changes.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,97 @@ | ||
/** | ||
* This helper function retrieves the names of all module-level search statements | ||
* | ||
* @param spl2Module module contents | ||
* @returns array of regex matches of statements capturing names of each statement | ||
*/ | ||
export function getModuleStatements(spl2Module: string): string[] { | ||
// Remove anything within comments, field literals, string | ||
// literals, or between braces { .. } which will eliminate | ||
// function/lambda params like `$it -> { $p = 1 }` | ||
// and commented-out statements like /* $out = from [{}] */ | ||
let inBlockComment = false; // /* .. */ | ||
let inField = false; // ' .. ' | ||
let inString = false; // " .. " | ||
let inLineComment = false; // // .. <EOL> | ||
let braceLevel = 0; // { .. } | ||
|
||
let newModule = ''; | ||
let prev = ''; | ||
for (let indx = 0; indx < spl2Module.length; indx++) { | ||
let next = spl2Module[indx]; | ||
let peeked = peek(spl2Module, indx + 1); | ||
let crlf = (next === '\r' && peeked === '\n'); | ||
let newLine = crlf || (next === '\n'); | ||
if (inBlockComment) { | ||
if (next === '*' && peeked === '/') { | ||
inBlockComment = false; // exit block comment | ||
indx++; // move past */ | ||
} | ||
} else if (inField) { | ||
if (next === '\'' && prev !== '\\') { // ignore \' | ||
inField = false; // exit field literal | ||
} | ||
} else if (inString) { | ||
if (newLine || (next === '"' && prev !== '\\')) { // ignore \" | ||
inString = false; // exit string literal | ||
if (crlf) { | ||
indx++; // move past \r\n | ||
} | ||
} | ||
} else if (inLineComment) { | ||
if (newLine) { | ||
inLineComment = false; // exit line comment | ||
if (crlf) { | ||
indx++; // move past \r\n | ||
} | ||
} | ||
} else if (braceLevel > 0) { | ||
if (next === '{') { | ||
braceLevel++; | ||
} else if (next === '}') { | ||
braceLevel--; | ||
} | ||
if (braceLevel === 0) { | ||
// insert newlines after blocks like function and dataset declarations | ||
// to start new statements/declarations on new lines when possible | ||
newModule += '\n'; | ||
} | ||
} else { | ||
// Check for entering new block | ||
switch (next) { | ||
case '/': | ||
if (peeked === '/') { | ||
inLineComment = true; | ||
indx++; // move past // | ||
} else if (peeked === '*') { | ||
inBlockComment = true; | ||
indx++; // move past /* | ||
} | ||
break; | ||
case '\'': | ||
inField = true; | ||
break; | ||
case '"': | ||
inString = true; | ||
break; | ||
case '{': | ||
braceLevel++; | ||
break; | ||
} | ||
// if we're not in one of the blocks above, write to cleaned module | ||
if (!inBlockComment && !inField && !inString && !inLineComment && braceLevel === 0) { | ||
newModule += next; | ||
} | ||
} | ||
prev = next; | ||
} | ||
|
||
// Match anything that looks like `$statement_1 = ...` and return the statement names | ||
return [...newModule.matchAll(/^\s*\$([a-zA-Z0-9_]+)[\s]*=/gm)] | ||
.map(group => (group.length > 1) ? group[1] : null) | ||
.filter(val => (val !== null)); | ||
} | ||
|
||
function peek(str: string, i: number): string { | ||
return (str.length > i) ? str.charAt(i) : ""; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,79 @@ | ||
const { assert } = require('chai'); | ||
const { getModuleStatements } = require("../out/notebooks/utils/parsing"); | ||
|
||
describe('splunk', () => { | ||
describe('getModuleStatements()', () => { | ||
it('should find a single statement', () => { | ||
const module = ` | ||
$out = from a; | ||
`; | ||
const statements = getModuleStatements(module); | ||
assert.equal(statements.length, 1); | ||
assert.equal(statements[0], 'out'); | ||
}); | ||
it('should find each statement when several specified', () => { | ||
const module = ` | ||
$out1 = from a; | ||
$out2 = from b; | ||
$out3 = from c; | ||
`; | ||
const statements = getModuleStatements(module); | ||
assert.equal(statements.length, 3); | ||
assert.equal(statements[0], 'out1'); | ||
assert.equal(statements[1], 'out2'); | ||
assert.equal(statements[2], 'out3'); | ||
}); | ||
it('should ignore single line comments', () => { | ||
const module = ` | ||
//$out1 = from a; | ||
$out2 = from b; // $out3 = from c; | ||
// $out4 = from c; | ||
`; | ||
const statements = getModuleStatements(module); | ||
assert.equal(statements.length, 1); | ||
assert.equal(statements[0], 'out2'); | ||
}); | ||
it('should ignore block comments', () => { | ||
const module = ` | ||
/*$out1 = from a; | ||
*/$out2 /* * */= from b; | ||
/* $out3 = from c;*/ | ||
`; | ||
const statements = getModuleStatements(module); | ||
assert.equal(statements.length, 1); | ||
assert.equal(statements[0], 'out2'); | ||
}); | ||
it('should handle complex comment, field, and function scenarios', () => { | ||
const module = ` | ||
$out1 = from [{s:1}] | eval ' | ||
$fieldtemp1 = ' = value1 | eval ' \\' | ||
$fieldtemp2 = ' = value2 | eval field1 = | ||
" \\" $stringtemp1 = value3" | ||
| eval foo = map([1,2], $it -> { | ||
$lp1 = 1; | ||
return $f; | ||
}); | ||
function func1() | ||
dataset ds1 { | ||
' | ||
$dsfield = ': "value" | ||
} | ||
function func2() { | ||
$p1 = 1; | ||
$p2 = $p1 + 1; | ||
return $p2 | ||
} $out2 = from [{s:2}] | where '$foo=bar'=2; | ||
$out3 /* $f1 = 1; | ||
$f2 = 2 | ||
*/ = from [{s:3}]; | ||
$out4 = from [{' | ||
$fieldval = ': "error"}];`; | ||
const statements = getModuleStatements(module); | ||
assert.equal(statements.length, 4); | ||
assert.equal(statements[0], 'out1'); | ||
assert.equal(statements[1], 'out2'); | ||
assert.equal(statements[2], 'out3'); | ||
assert.equal(statements[3], 'out4'); | ||
}); | ||
}); | ||
}); |