-
Notifications
You must be signed in to change notification settings - Fork 64
/
Copy pathparser.ts
2224 lines (2018 loc) · 81.2 KB
/
parser.ts
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// Copyright (c) Microsoft Corporation.
// Licensed under the MIT license.
// TODO: support also the scenario of parsing a build log,
// to overcome some of --dry-run limitations
// (like some exceptions to the 'do not execute' rule
// or dependencies on a real build)
import * as configuration from "./configuration";
import * as cpp from "vscode-cpptools";
import * as ext from "./extension";
import * as logger from "./logger";
import * as make from "./make";
import * as path from "path";
import * as util from "./util";
import * as vscode from "vscode";
import * as nls from "vscode-nls";
nls.config({
messageFormat: nls.MessageFormat.bundle,
bundleFormat: nls.BundleFormat.standalone,
})();
const localize: nls.LocalizeFunc = nls.loadMessageBundle();
// List of compiler tools plus the most common aliases cc and c++
// ++ needs to be escaped for the regular expression in parseLineAsTool.
// Versioning and cross compilers naming variations dont' need to be included in this list,
// they will be considered in the regular expression.
// If one compiler name is a substring of another, include it after in this list.
// todo: any other scenarios of aliases and symlinks
// that would make parseLineAsTool to not match the regular expression,
// therefore wrongly skipping over compilation lines?
const compilers: string[] = [
"ccache",
"clang\\+\\+",
"clang-cl",
"clang-cpp",
"clang",
"gcc",
"gpp",
"cpp",
"icc",
"cc",
"icl",
"cl",
"g\\+\\+",
"c\\+\\+",
];
const linkers: string[] = [
"ccache",
"ilink",
"link",
"ld",
"ccld",
"gcc",
"clang\\+\\+",
"clang",
"cc",
"g\\+\\+",
"c\\+\\+",
];
const sourceFileExtensions: string[] = ["cpp", "cc", "cxx", "c"];
const chunkSize: number = 100;
export async function parseTargets(
cancel: vscode.CancellationToken,
verboseLog: string,
statusCallback: (message: string) => void,
foundTargetCallback: (target: string) => void
): Promise<number> {
if (cancel.isCancellationRequested) {
return make.ConfigureBuildReturnCodeTypes.cancelled;
}
// Extract the text between "# Files" and "# Finished Make data base" lines
// There can be more than one matching section.
let regexpExtract: RegExp =
/(# Files\n*)([\s\S]*?)(\n# Finished Make data base)/gm;
let result: RegExpExecArray | null;
let extractedLog: string = "";
let matches: string[] = [];
let match: string[] | null;
result = await util.scheduleTask(() => regexpExtract.exec(verboseLog));
while (result) {
extractedLog = result[2];
// Skip lines starting with {#,.} or preceeded by "# Not a target" and extract the target.
// Additionally, if makefile.phonyOnlyTargets is true, include only targets
// succeeded by "# Phony target (prerequisite of .PHONY).".
let regexpTargetStr: string =
"^(?!\\n?[#\\.])(?<!^\\n?# Not a target:\\s*)\\s*(\\S*[^:]):\\s+";
if (configuration.getPhonyOnlyTargets()) {
regexpTargetStr +=
".*\\s+(?=# Phony target \\(prerequisite of \\.PHONY\\)\\.)";
}
let regexpTarget: RegExp = RegExp(regexpTargetStr, "mg");
match = regexpTarget.exec(extractedLog);
if (match) {
let done: boolean = false;
let doParsingChunk: () => void = () => {
let chunkIndex: number = 0;
while (match && chunkIndex <= chunkSize) {
// Make sure we don't insert duplicates.
// They can be caused by the makefile syntax of defining variables for a target.
// That creates multiple lines with the same target name followed by :,
// which is the pattern parsed here.
if (!matches.includes(match[1])) {
matches.push(match[1]);
foundTargetCallback(match[1]);
}
statusCallback("Parsing build targets...");
match = regexpTarget.exec(extractedLog);
if (!match) {
done = true;
}
chunkIndex++;
}
};
while (!done) {
if (cancel.isCancellationRequested) {
return make.ConfigureBuildReturnCodeTypes.cancelled;
}
await util.scheduleTask(doParsingChunk);
}
} // if match
result = await util.scheduleTask(() => regexpExtract.exec(verboseLog));
} // while result
return cancel.isCancellationRequested
? make.ConfigureBuildReturnCodeTypes.cancelled
: make.ConfigureBuildReturnCodeTypes.success;
}
export interface PreprocessDryRunOutputReturnType {
retc: number;
elapsed: number;
result?: string;
}
// Make various preprocessing transformations on the dry-run output
// TODO: "cmd -c", "start cmd", "exit"
export async function preprocessDryRunOutput(
cancel: vscode.CancellationToken,
dryRunOutputStr: string,
statusCallback: (message: string) => void
): Promise<PreprocessDryRunOutputReturnType> {
let preprocessedDryRunOutputStr: string = dryRunOutputStr;
if (cancel.isCancellationRequested) {
return {
retc: make.ConfigureBuildReturnCodeTypes.cancelled,
elapsed: 0,
};
}
let startTime: number = Date.now();
statusCallback("Preprocessing the dry-run output");
// Array of tasks required to be executed during the preprocess configure phase
let preprocessTasks: (() => void)[] = [];
// Expand {REPO:VSCODE-MAKEFILE-TOOLS} to the full path of the root of the extension
// This is used for the pre-created dry-run logs consumed by the tests,
// in order to be able to have source files and includes for the test repro
// within the test subfolder of the extension repo, while still exercising full paths for parsing
// and not generating a different output with every new location where Makefile Tools is enlisted.
// A real user scenario wouldn't need this construct.
preprocessTasks.push(function (): void {
if (process.env["MAKEFILE_TOOLS_TESTING"] === "1") {
let extensionRootPath: string = path.resolve(__dirname, "../");
preprocessedDryRunOutputStr = preprocessedDryRunOutputStr.replace(
/{REPO:VSCODE-MAKEFILE-TOOLS}/gm,
extensionRootPath
);
}
});
// Some compiler/linker commands are split on multiple lines.
// At the end of every intermediate line is at least a space, then a \ and end of line.
// Concatenate all these lines to see clearly each command on one line.
let regexp: RegExp = /(\\$\n)|(\\\\$\n)/gm;
preprocessTasks.push(function (): void {
preprocessedDryRunOutputStr = preprocessedDryRunOutputStr.replace(
regexp,
" "
);
});
// In case we parse a build log (as opposed to a dryrun log) for a project using libtool,
// capture the compiler commands reported by the libtool output.
// They may be redundant with the corresponding line from the dryrun (which is present in the build log as well)
// but in case of $ variables and commands invoked on the fly, the second time all are resolved/expanded
// and we can actually send good IntelliSense information for a good source code URL.
// For such a case, look at MONO (git clone https://github.com/mono/mono.git), for source code cordxtra.c
// Line with the original command, containing a 'test' command to determine on the fly the source code path.
// This line is present in the dryrun and also in the build log. Can't easily parse the correct source code path.
// /bin/bash ./libtool --tag=CC --mode=compile gcc -DHAVE_CONFIG_H -I./include -I./include -DGC_PTHREAD_START_STANDALONE -fexceptions -Wall -Wextra -Wpedantic -Wno-long-long -g -O2 -fno-strict-aliasing -MT cord/libcord_la-cordxtra.lo -MD -MP -MF cord/.deps/libcord_la-cordxtra.Tpo -c -o cord/libcord_la-cordxtra.lo `test -f 'cord/cordxtra.c' || echo './'`cord/cordxtra.c
// Line with the resolved command, from which the extension can parse a valid source code path.
// This line is present only in the build log, immediately following the above line.
// libtool: compile: gcc -DHAVE_CONFIG_H -I./include -I./include -DGC_PTHREAD_START_STANDALONE -fexceptions -Wall -Wextra -Wpedantic -Wno-long-long -g -O2 -fno-strict-aliasing -MT cord/libcord_la-cordxtra.lo -MD -MP -MF cord/.deps/libcord_la-cordxtra.Tpo -c cord/cordxtra.c -fPIC -DPIC -o cord/.libs/libcord_la-cordxtra.o
preprocessTasks.push(function (): void {
regexp = /libtool: compile:|libtool: link:/gm;
preprocessedDryRunOutputStr = preprocessedDryRunOutputStr.replace(
regexp,
"\nLIBTOOL_PATTERN\n"
);
});
// Process some more makefile output weirdness
// When --mode=compile or --mode=link are present in a line, we can ignore anything that is before
// and all that is after is a normal complete compiler or link command.
// Replace these patterns with end of line so that the parser will see only the right half.
preprocessTasks.push(function (): void {
regexp = /--mode=compile|--mode=link/gm;
preprocessedDryRunOutputStr = preprocessedDryRunOutputStr.replace(
regexp,
"\nLIBTOOL_PATTERN\n"
);
});
// Remove lines with $() since they come from unexpanded yet variables. The extension can't do anything yet
// about them anyway and also there will be a correspondent line in the dryrun with these variables expanded.
// Don't remove lines with $ without paranthesis, there are valid compilation lines that would be ignored otherwise.
preprocessTasks.push(function (): void {
preprocessedDryRunOutputStr = preprocessedDryRunOutputStr
.split("\n")
.map((e) => {
return e.indexOf("$(") >= 0 ? "" : e;
})
.join("\n");
});
// Extract the link command
// Keep the /link switch to the cl command because otherwise we will see compiling without /c
// and we will deduce some other output binary based on its /Fe or /Fo or first source given,
// instead of the output binary defined via the link operation (which will be parsed on the next line).
// TODO: address more accurately the overriding scenarios between output files defined via cl.exe
// and output files defined via cl.exe /link.
// For example, "cl.exe source.cpp /Fetest.exe /link /debug" still produces test.exe
// but cl.exe source.cpp /Fetest.exe /link /out:test2.exe produces only test2.exe.
// For now, ignore any output binary rules of cl while having the /link switch.
preprocessTasks.push(function (): void {
if (process.platform === "win32") {
preprocessedDryRunOutputStr = preprocessedDryRunOutputStr.replace(
/ \/link /g,
"/link \n link.exe "
);
}
});
// The splitting of multiple commands is better to be done at the end.
// Oherwise, this scenario interferes with the line ending '\' in some cases
// (see MAKE repo, ar.c compiler command, for example).
// Split multiple commands concatenated by '&&'
preprocessTasks.push(function (): void {
preprocessedDryRunOutputStr = util.replaceStringNotInQuotes(
preprocessedDryRunOutputStr,
" && ",
"\n"
);
});
// Split multiple commands concatenated by ";"
preprocessTasks.push(function (): void {
preprocessedDryRunOutputStr = util.replaceStringNotInQuotes(
preprocessedDryRunOutputStr,
";",
"\n"
);
});
// Replace multiple "-" sequence because it hangs the regular expression engine.
// Strings with this pattern do not contain useful information to parse, they are safe to replace
// in our internal representation of the dryrun or build log.
// Replace with "- " instead of remove since this pattern does not cause hang or slow processing
// and so that we have a similar view of the preprocessed text.
preprocessTasks.push(function (): void {
regexp = /------/gm;
preprocessedDryRunOutputStr = preprocessedDryRunOutputStr.replace(
regexp,
"- - - - - - "
);
});
// Loop through all the configure preprocess tasks, checking for cancel.
for (const func of preprocessTasks) {
await util.scheduleTask(func);
if (cancel.isCancellationRequested) {
return {
retc: make.ConfigureBuildReturnCodeTypes.cancelled,
elapsed: util.elapsedTimeSince(startTime),
};
}
}
return {
retc: make.ConfigureBuildReturnCodeTypes.success,
elapsed: util.elapsedTimeSince(startTime),
result: preprocessedDryRunOutputStr,
};
// TODO: Insert preprocessed files content
// TODO: Wrappers (example: cl.cmd)
}
interface ToolInvocation {
// how the makefile invokes the tool:
// relative -to the makefile location- path, full path, explicit current directory or no path
// also including the file name, with or without extension
pathInMakefile: string;
// a full path formed from the given current path and the path in makefile
// plus the file name, with the extension appended (for windows)
fullPath: string;
// if found at the full path resolved above
found: boolean;
// the arguments passed to the tool invocation
// define as string so that we deal with the separator properly later, via RegExp
arguments: string;
// the original arguments pass to the tool invocation
// before any parsing or backtick command replacement
originalArguments: string;
}
// Helper that parses the given line as a tool invocation.
// The full path that is returned is calculated with the following logic:
// - make a full path out of the one given in the makefile
// and the current path that is calculated as of now
// - if the tool is not found at the full path above and if requested,
// it will be searched in all the paths of the PATH environment variable
// and the first one found will be returned
// TODO: handle the following corner cases:
// - quotes only around directory (file name outside quotes)
// - path containing "toolName(no extension) " in the middle
// `replaceCommands` is a flag that tells the function to replace any backtick commands, we default it to true, but we can set it to false
// when we want to reparse the line after the backtick commands have been replaced.
async function parseLineAsTool(
line: string,
toolNames: string[],
currentPath: string,
replaceCommands: boolean = true,
isCompilerOrLinker: boolean = true
): Promise<ToolInvocation | undefined> {
// To avoid hard-coding (and ever maintaining) in the tools list
// the various compilers/linkers that can have versions, prefixes or suffixes
// in their names, include a crafted regex around each tool name.
// Any number of prefix or suffix text, separated by '-'.
let versionedToolNames: string[] = [];
const prefixRegex: string = isCompilerOrLinker ? "(([a-zA-Z0-9-_.]*-)*" : "";
const suffixRegex: string = isCompilerOrLinker ? "(-[a-zA-Z0-9-_.]*)*)" : "";
toolNames.forEach((tool) => {
// Check if the user defined this tool as to be excluded
if (!configuration.getExcludeCompilerNames()?.includes(tool)) {
versionedToolNames.push(`${prefixRegex}${tool}${suffixRegex}`);
}
});
// Add any additional tools specified by the user
// when we are looking at compilers or linkers,
// not when we parse for binary targets.
if (isCompilerOrLinker) {
configuration.getAdditionalCompilerNames()?.forEach((compiler) => {
if (!toolNames.includes(compiler)) {
versionedToolNames.push(`${prefixRegex}${compiler}${suffixRegex}`);
}
});
}
// - any spaces/tabs before the tool invocation
// - with or without path (relative -to the makefile location- or full)
// - with or without extension (windows only)
// - with or without quotes
// - must have at least one space or tab after the tool invocation
let regexpStr: string = '^[\\s\\"]*(.*?)(';
if (process.platform === "win32") {
regexpStr += versionedToolNames.join("\\.exe|");
// ensure to append the extension for the last tool in the array since join didn't.
if (versionedToolNames.length > 0) {
regexpStr += "\\.exe";
}
regexpStr += "|";
}
regexpStr += versionedToolNames.join("|") + ')(\\s|\\"\\s)(.*)$';
let regexp: RegExp = RegExp(regexpStr, "mg");
let match: RegExpExecArray | null = regexp.exec(line);
if (!match) {
return undefined;
}
let toolPathInMakefile: string = match[1];
let toolNameInMakefile: string = match[2];
if (process.platform === "win32" && !path.extname(toolNameInMakefile)) {
toolNameInMakefile += ".exe";
}
// Quotes are not needed either for the compiler path or the current path.
// checkFileExists works just fine without quotes,
// but makeFullPath gets confused sometimes for some quotes scenarios.
currentPath = util.removeQuotes(currentPath);
toolPathInMakefile = toolPathInMakefile.trimLeft();
toolPathInMakefile = util.removeQuotes(toolPathInMakefile);
let toolFullPath: string = await util.makeFullPath(
toolPathInMakefile + toolNameInMakefile,
currentPath
);
let toolFound: boolean = util.checkFileExistsSync(toolFullPath);
// Reject a regexp match that doesn't have a real path before the tool invocation,
// like for example link.exe /out:cl.exe being mistakenly parsed as a compiler command.
// Basically, only spaces and/or tabs and/or a valid path are allowed before the compiler name.
// There is no other easy way to eliminate that case via the regexp
// (it must accept a string before the tool).
// For now, we consider a path as valid if it can be found on disk.
// TODO: be able to recognize a string as a valid path even if it doesn't exist on disk,
// in case the project has a setup phase that is copying/installing stuff (like the toolset)
// and it does not have yet a build in place, therefore a path or file is not yet found on disk,
// even if it is valid.
// In other words, we allow the tool to not be found only if the makefile invokes it without any path,
// which opens the possibility of searching the tool through all the paths in the PATH environment variable.
// Note: when searching for execution targets in the makefile, if a binary was not previously built,
// the extension will not detect it for a launch configuration because of this following return.
if (toolPathInMakefile !== "" && !toolFound) {
return undefined;
}
const originalArguments = match[match.length - 1];
return {
// don't use join and neither paths/filenames processed above if we want to keep the exact text in the makefile
pathInMakefile: match[1] + match[2],
fullPath: toolFullPath,
arguments: replaceCommands
? await util.replaceCommands(
originalArguments,
configuration.getSafeCommands(),
{ cwd: util.getWorkspaceRoot(), shell: true }
)
: originalArguments,
found: toolFound,
originalArguments,
};
}
// Helper to identify anything that looks like a compiler switch in the given command string.
// The result is passed to IntelliSense custom configuration provider as compilerArgs.
// excludeArgs helps with narrowing down the search, when we know for sure that we're not
// interested in some switches. For example, -D, -I, -FI, -include, -std are treated separately.
// Once we identified what looks to be the switches in the given command line, for each region
// between two consecutive switches we let the shell parse it into arguments via a script invocation
// (instead of us using other parser helpers in this module) to be in sync with how CppTools
// expects the compiler arguments to be passed in.
async function parseAnySwitchFromToolArguments(
args: string,
excludeArgs: string[]
): Promise<string[]> {
// Identify the non value part of the switch: prefix, switch name
// and what may separate this from an eventual switch value
let switches: string[] = [];
let regExpStr: string =
"(^|\\s+)(--|-" +
// On Win32 allow '/' as switch prefix as well,
// otherwise it conflicts with path character
(process.platform === "win32" ? "|\\/" : "") +
")([a-zA-Z0-9_]+)";
let regexp: RegExp = RegExp(regExpStr, "mg");
let match1: RegExpExecArray | null;
let match2: RegExpExecArray | null;
let index1: number = -1;
let index2: number = -1;
// This contains all the compilation command fragments in between two different consecutive switches
// (except the ones we plan to ignore, specified by excludeArgs).
// Once this function is done concatenating into compilerArgRegions,
// we call the compiler args parsing script once for the whole list of regions
// (as opposed to invoking it for each fragment separately).
let compilerArgRegions: string = "";
// With every loop iteration we need 2 switch matches so that we analyze the text
// that is between them. If the current match is the last one, then we will analyze
// everything until the end of line.
match1 = regexp.exec(args);
// Even if we don't find any arguments that have a switch syntax,
// consider the whole command line to parse into arguments
// (this case is encountered when we call this helper while we parse launch targets).
if (!match1) {
compilerArgRegions = args;
}
while (match1) {
// Marks the beginning of the current switch (prefix + name).
// The exact switch prefix is needed when we call other parser helpers later
// and also CppTools expects the compiler arguments to be prefixed
// when received from the custom providers.
index1 = regexp.lastIndex - match1[0].length;
// Marks the beginning of the next switch
match2 = regexp.exec(args);
if (match2) {
index2 = regexp.lastIndex - match2[0].length;
} else {
index2 = args.length;
}
// The substring to analyze for the current switch.
// It doesn't help to look beyond the next switch match.
let partialArgs: string = args.substring(index1, index2);
let swi: string = match1[3];
swi = swi.trim();
// Skip over any switches that we know we don't need
let exclude: boolean = false;
for (const arg of excludeArgs) {
if (swi.startsWith(arg)) {
exclude = true;
break;
}
}
if (!exclude) {
compilerArgRegions += partialArgs;
}
match1 = match2;
}
let parseCompilerArgsScriptFile: string = util.parseCompilerArgsScriptFile();
if (process.platform === "win32") {
// There is a potential problem with the windows version of the script:
// A fragment like "-sw1,-sw2,-sw3" gets split by comma and a fragment like
// "-SwDef=Val" is split by equal. Opened GitHub issue
// https://github.com/microsoft/vscode-makefile-tools/issues/149.
// These scenarios don't happen on pure windows but can be encountered in classic linux
// scenarios run under MSYS/MINGW.
// Until a better fix is implemented for 149, use a temporary marker that we replace from and into.
compilerArgRegions = compilerArgRegions.replace(
/\,/gm,
"DONT_USE_COMMA_AS_SEPARATOR"
);
compilerArgRegions = compilerArgRegions.replace(
/\=/gm,
"DONT_USE_EQUAL_AS_SEPARATOR"
);
}
let scriptArgs: string[] = [];
let runCommand: string;
if (process.platform === "win32") {
runCommand = "cmd";
scriptArgs.push("/c");
scriptArgs.push(`""${parseCompilerArgsScriptFile}" ${compilerArgRegions}"`);
} else {
runCommand = "/bin/bash";
scriptArgs.push("-c");
scriptArgs.push(
`"source '${parseCompilerArgsScriptFile}' ${compilerArgRegions}"`
);
}
try {
let stdout: any = (result: string): void => {
if (process.platform === "win32") {
// Restore the commas and equals that were hidden from the script invocation.
result = result.replace(/DONT_USE_COMMA_AS_SEPARATOR/gm, ",");
result = result.replace(/DONT_USE_EQUAL_AS_SEPARATOR/gm, "=");
}
let results: string[] = result.replace(/\r\n/gm, "\n").split("\n");
// In case of concatenated separators, the shell sees different empty arguments
// which we can remove (most common is more spaces not being seen as a single space).
results.forEach((res) => {
if (res !== "") {
switches.push(res.trim());
}
});
};
let stderr: any = (result: string): void => {
logger.message(
localize(
"error.running.args.parser.script",
"Error running the compiler args parser script {0} for regions ({1}): {2}",
parseCompilerArgsScriptFile,
compilerArgRegions,
result
),
"Normal"
);
};
// Running the compiler arguments parsing script can use the system locale.
const opts: util.ProcOptions = {
workingDirectory: util.getWorkspaceRoot(),
forceEnglish: false,
ensureQuoted: false,
stdoutCallback: stdout,
stderrCallback: stderr,
};
const result: util.SpawnProcessResult = await util.spawnChildProcess(
runCommand,
scriptArgs,
opts
);
if (result.returnCode !== 0) {
logger.message(
localize(
"compiler.args.parser.failed",
"The compiler args parser script '{0}' failed with error code {1} for regions ({2})",
parseCompilerArgsScriptFile,
compilerArgRegions
),
"Normal"
);
}
} catch (error) {
logger.message(error);
}
return switches;
}
// Helper that parses for a particular switch that can occur one or more times
// in the tool command line (example -I or -D for compiler)
// and returns an array of the values passed via that switch
// todo: refactor common parts in parseMultipleSwitchFromToolArguments and parseSingleSwitchFromToolArguments
// removeSurroundingQuotes: needs to be false when called from parseAnySwitchFromToolArguments,
// and true otherwise. We need to analyze more scenarios before setting in stone a particular algorithm
// regarding the decision to remove or not to remove them.
function parseMultipleSwitchFromToolArguments(
args: string,
sw: string,
removeSurroundingQuotes: boolean = true
): string[] {
// - '-' or '/' or '--' as switch prefix
// - before each switch, we allow only for one or more spaces/tabs OR begining of line,
// to reject a case where a part of a path looks like a switch with its value
// (example: "drive:/dir/Ifolder" taking /Ifolder as include switch).
// - can be wrapped by a pair of ', before the switch prefix and after the switch value
// (example: '-DMY_DEFINE=SOMETHING' or '/I drive/folder/subfolder').
// - one or none or more spaces/tabs or ':' or '=' between the switch and the value
// (examples): -Ipath, -I path, -I path, -std=gnu89
// - the value can be wrapped by a pair of ", ' or `, even simmetrical combinations ('"..."')
// and should be able to not stop at space when inside the quote characters.
// (examples): -D'MY_DEFINE', -D "MY_DEFINE=SOME_VALUE", -I`drive:/folder with space/subfolder`
// - when the switch value contains a '=', the right half can be also quoted by ', ", ` or '"..."'
// and should be able to not stop at space when inside the quote characters.
// (example): -DMY_DEFINE='"SOME_VALUE"'
function anythingBetweenQuotes(fullyQuoted: boolean): string {
// The basic pattern for anything between quotes accepts equally single quote, double quote or back tick.
// One pattern that is accepted is to wrap between escaped quotes and allow inside anything (including non-escaped quotes) except escaped quotes.
// Another pattern that is accepted is to wrap between non-escaped quotes and allow inside anything (including escaped quotes) except non-escaped quotes.
// One problem with the "..." pattern is that a simple "\" (or anything ending with \") will not know if the backslash is part of the inside of quote-quote
// or together with the following quote represents a \" and needs to look forward for another ending quote.
// If there is another quote somewhere else later in the command line (another -D or a file name wrapped in quotes) everything until that first upcoming quote
// will be included.
// Example that doesn't work: -DSLASH_DEFINE="\" -DSOME_OTHER_SWITCH "drive:\folder\file.extension"
// SLASH_DEFINE is equal to '\" -DSOME_OTHER_SWITCH '
// Example that works: -DGIT_VERSION=" \" 1.2.3 \" "
// GIT_VERSION is equal to ' \" 1.2.3 \" '
// Unfortunately, we also can't identify this to log in the output channel for later analysis of more makefile switch and quoting user scenarios.
// Fortunately, we didn't encounter the last scenario, only the first.
function anythingBetweenQuotesBasicPattern(quoteChar: string): string {
return (
"\\\\\\" +
quoteChar +
"((?!\\\\\\" +
quoteChar +
").)*\\\\\\" +
quoteChar +
"|" + // \" anything(except \") \"
"\\" +
quoteChar +
"(\\\\\\" +
quoteChar +
"|[^\\" +
quoteChar +
"])*?[^\\\\\\" +
quoteChar +
"]?\\" +
quoteChar
); // " anything (except ") "
}
// If the switch is fully quoted with ', like ('-DMY_DEFINE="MyValue"'), don't allow single quotes
// inside the switch value.
// One example of what can be broken if we don't do this: gcc '-DDEF1=' '-DDef2=val2'
// in which case DEF1 would be seen as DEF1=' ' instead of empty =
let str: string =
anythingBetweenQuotesBasicPattern("`") +
"|" +
anythingBetweenQuotesBasicPattern('"') +
(fullyQuoted ? "" : "|" + anythingBetweenQuotesBasicPattern("'"));
return str;
}
function mainPattern(fullyQuoted: boolean): string {
let pattern: string =
// prefix and switch name
"(" +
"\\/" +
sw +
"(:|=|\\s*)|-" +
sw +
"(:|=|\\s*)|--" +
sw +
"(:|=|\\s*)" +
")" +
// switch value
"(" +
anythingBetweenQuotes(fullyQuoted) +
"|" +
// not fully quoted switch value scenarios
"(" +
// the left side (or whole value if no '=' is following)
"(" +
"[^\\s=]+" + // not quoted switch value component
")" +
"(" +
"=" + // separator between switch value left side and right side
"(" +
anythingBetweenQuotes(fullyQuoted) +
"|" +
"[^\\s]+" + // not quoted right side of switch value
// equal is actually allowed (example gcc switch: -fmacro-prefix-map=./= )
")?" + // right side of '=' is optional, meaning we can define as nothing, like: -DMyDefine=
")?" + // = is also optional (simple define)
")" +
")";
return pattern;
}
let regexpStr: string =
"(" +
"^|\\s+" +
")" + // start of line or any amount of space character
"(" +
"(" +
"\\'" +
mainPattern(true) +
"\\'" +
")" +
"|" + // switch if fully quoted
"(" +
mainPattern(false) +
")" + // switch if not fully quoted
")";
let regexp: RegExp = RegExp(regexpStr, "mg");
let match: RegExpExecArray | null;
let results: string[] = [];
match = regexp.exec(args);
while (match) {
let matchIndex: number =
match[2].startsWith("'") && match[2].endsWith("'") ? 8 : 26;
let result: string = match[matchIndex];
if (result) {
if (removeSurroundingQuotes) {
result = util.removeSurroundingQuotes(result);
}
results.push(result);
}
match = regexp.exec(args);
}
return results;
}
// Helper that parses for any switch from a set that can occur one or more times
// in the tool command line and returns an array of the values passed via all of the identified switches.
// It is based on parseMultipleSwitchFromToolArguments (extends the regex for more switches
// and also accepts a switch without a following value, like -m32 or -m64 are different from -arch:arm).
// This is useful especially when we need the order of these different switches in the command line:
// for example, when we want to know which switch wins (for cancelling pairs or for overriding switches).
// Parsing the switches separately wouldn't give us the order information.
// Also, we don't have yet a function to parse the whole string of arguments into individual arguments,
// so that we anaylze each switch one by one, thus knowing the order.
// TODO: review the regexp for parseMultipleSwitchFromToolArguments to make sure all new capabilities
// are reflected in the regexp here (especially around quoting scenarios and '=').
// For now it's not critical because parseMultipleSwitchesFromToolArguments is called for target
// architecture switches which don't have such complex scenarios.
function parseMultipleSwitchesFromToolArguments(
args: string,
simpleSwitches: string[],
valueSwitches: string[]
): string[] {
// - '-' or '/' or '--' as switch prefix
// - before each switch, we allow only for one or more spaces/tabs OR begining of line,
// to reject a case where a part of a path looks like a switch with its value
// - can be wrapped by a pair of ', before the switch prefix and after the switch value
// - the value can be wrapped by a pair of "
// - one or none or more spaces/tabs between the switch and the value
let regexpStr: string = "(^|\\s+)\\'?(";
valueSwitches.forEach((sw) => {
regexpStr +=
"\\/" + sw + "(:|=|\\s*)|-" + sw + "(:|=|\\s*)|--" + sw + "(:|=|\\s*)";
// Make sure we don't append '|' after the last extension value
if (sw !== valueSwitches[valueSwitches.length - 1]) {
regexpStr += "|";
}
});
regexpStr += ')(\\".*?\\"|[^\\\'\\s]+)';
regexpStr += "|((\\/|-|--)(" + simpleSwitches.join("|") + "))";
regexpStr += "\\'?";
let regexp: RegExp = RegExp(regexpStr, "mg");
let match: RegExpExecArray | null;
let results: string[] = [];
match = regexp.exec(args);
while (match) {
// If the current match is a simple switch, find it at index 15, otherwise at 12.
// In each scenario, only one will have a value while the other is undefined.
let result: string = match[12] || match[15];
if (result) {
result = result.trim();
results.push(result);
}
match = regexp.exec(args);
}
return results;
}
// Helper that parses for a particular switch that can occur once in the tool command line,
// or if it is allowed to be specified more than once, the latter would override the former.
// The switch is an array of strings (as opposed to a simple string)
// representing all the alternative switches in distinct toolsets (cl, versus gcc, versus clang, etc)
// of the same conceptual argument of the given tool.
// The helper returns the value passed via the given switch
// Examples for compiler: -std:c++17, -Fotest.obj, -Fe test.exe
// Example for linker: -out:test.exe versus -o a.out
// TODO: review the regexp for parseMultipleSwitchFromToolArguments to make sure all new capabilities
// are reflected in the regexp here (especially around quoting scenarios and '=').
// For now it's not critical because parseSingleSwitchFromToolArguments is called for switches
// that have simple value scenarios.
function parseSingleSwitchFromToolArguments(
args: string,
sw: string[]
): string | undefined {
// - '-' or '/' or '--' as switch prefix
// - before the switch, we allow only for one or more spaces/tabs OR begining of line,
// to reject a case where a part of a path looks like a switch with its value
// - can be wrapped by a pair of ', before the switch prefix and after the switch value
// - the value can be wrapped by a pair of "
// - ':' or '=' or one/none/more spaces/tabs between the switch and the value
let regexpStr: string =
"(^|\\s+)\\'?(\\/|-|--)(" +
sw.join("|") +
")(:|=|\\s*)(\\\".*?\\\"|[^\\'\\s]+)\\'?";
let regexp: RegExp = RegExp(regexpStr, "mg");
let match: RegExpExecArray | null;
let results: string[] = [];
match = regexp.exec(args);
while (match) {
let result: string = match[5];
if (result) {
result = result.trim();
results.push(result);
}
match = regexp.exec(args);
}
return results.pop();
}
// Helper that answers whether a particular switch is passed to the tool.
// When calling this helper, we are not interested in obtaining the
// (or there is no) value passed in via the switch.
// There must be at least one space/tab before the switch,
// so that we don't match a path by mistake.
// Same after the switch, in case the given name is a substring
// of another switch name. Or have the switch be the last in the command line.
// Examples: we call this helper for /c compiler switch or /dll linker switch.
// TODO: detect sets of switches that cancel each other to return a more
// accurate result in case of override (example: /TC and /TP)
function isSwitchPassedInArguments(args: string, sw: string[]): boolean {
// - '-' or '/' or '--' as switch prefix
// - one or more spaces/tabs after
let regexpStr: string =
"((\\s+)|^)(\\/|-|--)(" + sw.join("|") + ")((\\s+)|$)";
let regexp: RegExp = RegExp(regexpStr, "mg");
if (regexp.exec(args)) {
return true;
}
return false;
}
// Helper that parses for files (of given extensions) that are given as arguments to a tool
// TODO: consider non standard extensions (or no extension at all) in the presence of TC/TP.
// Attention to obj, pdb or exe files tied to /Fo, /Fd and /Fe
// TODO: consider also ' besides "
function parseFilesFromToolArguments(args: string, exts: string[]): string[] {
// no switch prefix and no association yet with a preceding switch
// one or more spaces/tabs before (or beginning of line) and after (or end of line)
// with or without quotes surrounding the argument
// - if surrounding quotes, don't allow another quote in between
// (todo: handle the scenario when quotes enclose just the directory path, without the file name)
let regexpStr: string = "(";
exts.forEach((ext) => {
regexpStr += '\\".[^\\"]*?\\.' + ext + '\\"|';
regexpStr += "\\S+\\." + ext;
// Make sure we don't append '|' after the last extension value
if (ext !== exts[exts.length - 1]) {
regexpStr += "|";
}
});
regexpStr += ")(\\s+|$)";
let regexp: RegExp = RegExp(regexpStr, "mg");
let match: string[] | null;
let files: string[] = [];
match = regexp.exec(args);
while (match) {
let result: string = match[1];
// It is quite common to encounter the following pattern:
// `test -f 'sourceFile.c' || echo './'`sourceFile.c
// or `test -f 'sourceFile.c' || echo '../../../libwally-core/src/'`sourceFile.c
// Until we implement the correct approach (to query live the test command)
// we can just ignore it and consider the second option of the OR
// (by removing the quotes while preserving the relative path).
// This is a short term workaround.
let idx: number = args.lastIndexOf(result);
let echo: string = "' || echo ";
let str: string = args.substring(idx - echo.length, idx);
if (str === echo) {
// not to use util.removeQuotes because that also removes double quotes "
result = result.replace(/\'/gm, "");
result = result.replace(/\`/gm, "");
}
if (result) {
result = util.removeSurroundingQuotes(result);
// Debug message to identify easier the scenarios where source files have inner quotes.
if (result.includes('"')) {
logger.message(
localize(
"file.argument.has.quotes",
"File argument that contains quotes: `{0}`",
result
),
"Debug"
);
}
files.push(result);
}
match = regexp.exec(args);
}
return files;
}
// Helper that identifies system commands (cd, cd -, pushd, popd) and make.exe change directory switch (-C)
// to calculate the effect on the current path, also remembering the transition in the history stack.
// The current path is always the last one into the history.
async function currentPathAfterCommand(
line: string,
currentPathHistory: string[]
): Promise<string[]> {
line = line.trimLeft();
line = line.trimRight();
let lastCurrentPath: string =
currentPathHistory.length > 0
? currentPathHistory[currentPathHistory.length - 1]
: "";
let newCurrentPath: string = "";
const analyzeLine = localize("analyze.line", "Analyzing line: {0}", line);