-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathcheck_copyright.py
executable file
·728 lines (613 loc) · 29.1 KB
/
check_copyright.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
#!/usr/bin/env python
# SPDX-FileCopyrightText: 2021-2022 Espressif Systems (Shanghai) CO LTD
# SPDX-License-Identifier: Apache-2.0
"""
Check files for copyright headers:
- file not on ignore list:
- old Espressif copyright -> replace with SPDX
- SPDX with invalid year or old company name -> replace with valid SPDX
- other SPDX copyright -> PASS
- non-SPDX copyright -> FAIL
- no copyright -> insert Espressif copyright
- file on ignore list:
- old Espressif copyright -> replace with SPDX, remove from ignore list
- SPDX with invalid year or company format -> replace with valid SPDX and remove from ignore list
else -> keep on ignore list
"""
import argparse
import ast
import glob
import configparser
import datetime
import os
import re
import subprocess
import sys
import textwrap
from typing import List, Optional, Tuple, Dict
import pathspec
import yaml
# importing the whole comment_parser causes a crash when running inside of gitbash environment on Windows.
from comment_parser.parsers import c_parser, python_parser
from comment_parser.parsers.common import Comment
from thefuzz import fuzz
CHECK_FAIL_MESSAGE = textwrap.dedent('''\
To make a file pass the test, it needs to contain both:
an SPDX-FileCopyrightText and an SPDX-License-Identifier with an allowed license for the section.
More information about SPDX license identifiers can be found here:
https://spdx.github.io/spdx-spec/appendix-V-using-SPDX-short-identifiers-in-source-files/
To have this hook automatically insert the standard Espressif copyright notice,
ensure the word "copyright" is not in any comment up to line 30 and the file is not on the ignore list.
Below is a list of files, which failed the copyright check.
''')
CHECK_MODIFY_MESSAGE = textwrap.dedent('''\
Above is a list of files, which were modified. Please check their contents, stage them and run the commit again!
Files prefixed with "(ignore)" were on the ignore list at the time of invoking this script.
They may have been removed if noted above.
''')
CHECK_FOOTER_MESSAGE = textwrap.dedent('''\
Additional information about this hook and copyright headers may be found here:
https://docs.espressif.com/projects/esp-idf/en/latest/esp32/contribute/copyright-guide.html
''')
# This is an old header style, which this script
# attempts to detect and replace with a new SPDX license identifier
OLD_APACHE_HEADER = textwrap.dedent('''\
Copyright 2015-2019 Espressif Systems (Shanghai) PTE LTD
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
''')
# New headers to be used
NEW_APACHE_HEADER_PYTHON = textwrap.dedent('''\
# SPDX-FileCopyrightText: {years} Espressif Systems (Shanghai) CO LTD
# SPDX-License-Identifier: Apache-2.0
''')
NEW_APACHE_HEADER = textwrap.dedent('''\
/*
* SPDX-FileCopyrightText: {years} Espressif Systems (Shanghai) CO LTD
*
* SPDX-License-Identifier: Apache-2.0
*/
''')
# filetype -> mime
MIME = {
'python': 'text/x-python',
'c': 'text/x-c',
'cpp': 'text/x-c++',
'bsasm': 'text/x-bsasm',
}
# mime -> parser
MIME_PARSER = {
'text/x-c': c_parser,
'text/x-c++': c_parser,
'text/x-python': python_parser,
'text/x-bsasm': python_parser,
}
# terminal color output
TERMINAL_RESET = '\33[0m'
TERMINAL_BOLD = '\33[1m'
TERMINAL_YELLOW = '\33[93m'
TERMINAL_GREEN = '\33[92m'
TERMINAL_RED = '\33[91m'
TERMINAL_GRAY = '\33[90m'
class UnsupportedFileType(Exception):
"""Exception raised for unsupported file types.
Attributes:
file_name -- input file which caused the error
message -- explanation of the error
"""
def __init__(self, file_name: str, message: str = 'this file type is not supported') -> None:
self.fine_name = file_name
self.message = message
super().__init__(self.message)
def __str__(self) -> str:
return f'{self.fine_name}: {self.message}'
class NeedsToBeUpdated(Exception):
"""Exception raised for licenses that needs to be updated in a check mode.
Attributes:
file_name -- input file which caused the error
message -- explanation of the error
"""
def __init__(self, file_name: str, message: str = 'this file needs to be updated') -> None:
self.fine_name = file_name
self.message = message
super().__init__(self.message)
def __str__(self) -> str:
return f'{self.fine_name}: {self.message}'
class NotFound(Exception):
"""Exception raised when something is not found.
Attributes:
thing -- what was not found
"""
def __init__(self, thing: str = 'something') -> None:
self.thing = thing
super().__init__(self.thing)
def __str__(self) -> str:
return f'{self.thing} was not found'
class CustomFile:
"""
Custom data object to hold file name and if it's on the ignore list
and to make it easier to print
"""
def __init__(self, file_name: str, is_on_ignore_list: bool) -> None:
self.file_name = file_name
self.is_on_ignore_list = is_on_ignore_list
def __str__(self) -> str:
if self.is_on_ignore_list:
return f'(ignore) {self.file_name}'
return f' {self.file_name}'
class CommentHolder(Comment):
"""
Hold the comment, its line number and when it is multiline,
also store if it's the first in a comment block
"""
def __init__(self, text: str, line_number: int, multiline: bool = False, first_in_multiline: bool = False):
"""
Args:
text: String text of comment.
line_number: Line number (int) comment was found on.
multiline: bool is it multiline
first_in_multiline: bool if multiline, is it first in that comment block
"""
super(self.__class__, self).__init__(text, line_number, multiline)
self._first_in_multiline = first_in_multiline and multiline
def is_first_in_multiline(self) -> bool:
"""
Returns whether this comment was a first in a multiline comment.
"""
return self._first_in_multiline
def get_file_mime(fn: str) -> str:
"""
Return the mime type based on file's extension
"""
if fn.endswith('.py'):
return MIME['python']
if fn.endswith(('.cpp', '.hpp', '.cc', '.hh')):
return MIME['cpp']
if fn.endswith(('.c', '.h', '.ld', '.s', '.S')):
return MIME['c']
if fn.endswith('.bsasm'):
return MIME['bsasm']
raise UnsupportedFileType(fn)
def get_comments(code: str, mime: str) -> list:
"""
Extracts all comments from source code and does a multiline split
"""
parser = MIME_PARSER[mime]
comments = parser.extract_comments(code)
new_comments = []
for comment in comments:
if comment.is_multiline():
comment_lines = comment.text().splitlines()
for line_number, line in enumerate(comment_lines, start=comment.line_number()):
# the third argument of Comment is a bool multiline. Store the relative line number inside the multiline comment
new_comments.append(CommentHolder(line, line_number, True, line_number == comment.line_number()))
else:
new_comments.append(CommentHolder(comment.text(), comment.line_number()))
return new_comments
def has_valid_copyright(file_name: str, mime: str, is_on_ignore: bool, is_new_file: bool, config_section: configparser.SectionProxy,
args: argparse.Namespace) -> Tuple[bool, bool]:
"""
Detects if a file has a valid SPDX copyright notice.
returns: Tuple[valid, modified]
"""
detected_licenses = []
detected_notices = []
detected_contributors = []
valid, modified = False, False
with open(file_name, 'r') as f:
code = f.read()
comments = get_comments(code, mime)
code_lines = code.splitlines()
lines_changed = sum(args.numstat[file_name]) if file_name in args.numstat else 0
if not code_lines: # file is empty
print(f'{TERMINAL_YELLOW}"{file_name}" is empty!{TERMINAL_RESET}')
valid = True
return valid, modified
if args.replace:
try:
year, line = detect_old_header_style(file_name, comments, args)
except NotFound as e:
if args.debug:
print(f'{TERMINAL_GRAY}{e} in {file_name}{TERMINAL_RESET}')
else:
if not args.dry_run:
code_lines = replace_copyright(code_lines, year, line, mime, file_name)
else:
raise NeedsToBeUpdated(file_name)
valid = True
for comment in comments:
if comment.line_number() > args.max_lines:
break
matches = re.search(r'SPDX-FileCopyrightText: ?(.*)', comment.text(), re.IGNORECASE)
if matches:
detected_notices.append((matches.group(1), comment.line_number()))
try:
if is_new_file:
years = (0, None)
else:
years = extract_years_from_espressif_notice(matches.group(1))
except NotFound as e:
if args.verbose:
print(f'{TERMINAL_GRAY}Not an {e.thing} {file_name}:{comment.line_number()}{TERMINAL_RESET}')
else:
template = '// SPDX-FileCopyrightText: ' + config_section['espressif_copyright']
if comment.is_multiline():
template = ' * SPDX-FileCopyrightText: ' + config_section['espressif_copyright']
if comment.is_first_in_multiline():
template = '/* SPDX-FileCopyrightText: ' + config_section['espressif_copyright']
if mime in (MIME['python'], MIME['bsasm']):
template = '# SPDX-FileCopyrightText: ' + config_section['espressif_copyright']
candidate_line = template.format(years=format_years(years[0], file_name))
no_time_update = template.format(years=format_years(years[0], file_name, years[1] or years[0]))
if code_lines[comment.line_number() - 1] != no_time_update or lines_changed >= args.lines_changed:
# update the line only in cases when not only the dates are changing or
# if number of changed lines is greater or equal to limit specified by
# args.lines_changed
code_lines[comment.line_number() - 1] = candidate_line
matches = re.search(r'SPDX-FileContributor: ?(.*)', comment.text(), re.IGNORECASE)
if matches:
detected_contributors.append((matches.group(1), comment.line_number()))
try:
if is_new_file:
years = (0, None)
else:
years = extract_years_from_espressif_notice(matches.group(1))
except NotFound as e:
if args.debug:
print(f'{TERMINAL_GRAY}Not an {e.thing} {file_name}:{comment.line_number()}{TERMINAL_RESET}')
else:
template = '// SPDX-FileContributor: ' + config_section['espressif_copyright']
if comment.is_multiline():
template = ' * SPDX-FileContributor: ' + config_section['espressif_copyright']
if comment.is_first_in_multiline():
template = '/* SPDX-FileContributor: ' + config_section['espressif_copyright']
if mime in (MIME['python'], MIME['bsasm']):
template = '# SPDX-FileContributor: ' + config_section['espressif_copyright']
candidate_line = template.format(years=format_years(years[0], file_name))
no_time_update = template.format(years=format_years(years[0], file_name, years[1] or years[0]))
if code_lines[comment.line_number() - 1] != no_time_update or lines_changed >= args.lines_changed:
# update the line only in cases when not only the dates are changing or
# if number of changed lines is greater or equal to limit specified by
# args.lines_changed
code_lines[comment.line_number() - 1] = candidate_line
matches = re.search(r'SPDX-License-Identifier: ?(.*)', comment.text(), re.IGNORECASE)
if matches:
detected_licenses.append((matches.group(1), comment.line_number()))
if not is_on_ignore and not contains_any_copyright(comments, args):
if not args.dry_run:
code_lines = insert_copyright(code_lines, file_name, mime, config_section)
print(f'"{file_name}": inserted copyright notice - please check the content and run commit again!')
else:
raise NeedsToBeUpdated(file_name)
valid = True
new_code = '\n'.join(code_lines) + '\n'
if code != new_code:
with open(file_name, 'w') as f:
f.write(new_code)
modified = True
if detected_licenses and detected_notices:
valid = True
if args.debug:
print(f'{file_name} notices: {detected_notices}')
print(f'{file_name} licenses: {detected_licenses}')
if detected_licenses:
for detected_license, line_number in detected_licenses:
allowed_licenses = ast.literal_eval(config_section['allowed_licenses'])
if not allowed_license_combination(detected_license, allowed_licenses):
valid = False
print(f'{TERMINAL_RED}{file_name}:{line_number} License "{detected_license}" is not allowed! Allowed licenses: {allowed_licenses}.')
return valid, modified
def contains_any_copyright(comments: list, args: argparse.Namespace) -> bool:
"""
Return True if any comment contain the word "copyright"
"""
return any(
comment.line_number() <= args.max_lines
and re.search(r'copyright', comment.text(), re.IGNORECASE)
for comment in comments
)
def insert_copyright(code_lines: list, file_name: str, mime: str, config_section: configparser.SectionProxy) -> list:
"""
Insert a copyright notice in the beginning of a file, respecting a potential shebang
"""
new_code_lines = []
# if first line contains a shebang, keep it first
if code_lines[0].startswith('#!'):
new_code_lines.append(code_lines[0])
del code_lines[0]
template = config_section['new_notice_c']
if mime == MIME['python']:
template = config_section['new_notice_python']
if mime == MIME['bsasm']:
template = config_section['new_notice_bsasm']
new_code_lines.extend(template.format(license=config_section['license_for_new_files'], years=format_years(0, file_name)).splitlines())
new_code_lines.extend(code_lines)
return new_code_lines
def extract_years_from_espressif_notice(notice: str) -> Tuple[int, Optional[int]]:
"""
Extracts copyright years from a Espressif copyright notice. It returns a tuple (x, y) where x is the first year of
the copyright and y is the second year. y is None if the copyright notice contains only one year.
"""
matches = re.search(r'(\d{4})(-(\d{4}))? Espressif Systems', notice, re.IGNORECASE)
if matches:
years = matches.group(1, 3)
return (int(years[0]), int(years[1]) if years[1] else None)
raise NotFound('Espressif copyright notice')
def replace_copyright(code_lines: list, year: int, line: int, mime: str, file_name: str) -> list:
"""
Replaces old header style with new SPDX form.
"""
# replace from line number (line) to line number (line + number of lines in the OLD HEADER)
# with new header depending on file type
end = line + OLD_APACHE_HEADER.count('\n')
del code_lines[line - 1:end - 1]
template = NEW_APACHE_HEADER
if mime in (MIME['python'], MIME['bsasm']):
template = NEW_APACHE_HEADER_PYTHON
code_lines[line - 1:line - 1] = template.format(years=format_years(year, file_name)).splitlines()
print(f'{TERMINAL_BOLD}"{file_name}": replacing old Apache-2.0 header (lines: {line}-{end}) with the new SPDX header.{TERMINAL_RESET}')
return code_lines
def detect_old_header_style(file_name: str, comments: list, args: argparse.Namespace) -> Tuple[int, int]:
"""
Detects old header style (Apache-2.0) and extracts the year and line number.
returns: Tuple[year, comment line number]
"""
comments_text = str()
for comment in comments:
if comment.line_number() > args.max_lines:
break
comments_text = f'{comments_text}\n{comment.text().strip()}'
ratio = fuzz.partial_ratio(comments_text, OLD_APACHE_HEADER)
if args.debug:
print(f'{TERMINAL_GRAY}ratio for {file_name}: {ratio}{TERMINAL_RESET}')
if ratio > args.fuzzy_ratio:
for comment in comments:
# only check up to line number MAX_LINES
if comment.line_number() > args.max_lines:
break
try:
year = extract_years_from_espressif_notice(comment.text())[0]
except NotFound:
pass
else:
return (year, comment.line_number())
raise NotFound('Old Espressif header')
def format_years(past: int, file_name: str, today: Optional[int]=None) -> str:
"""
Function to format a year:
- just current year -> output: [year]
- some year in the past -> output: [past year]-[current year]
"""
_today = today or datetime.datetime.now().year
if past == 0:
# use the current year
past = _today
if past == _today:
return str(past)
if past > _today or past < 1972:
error_msg = f'{file_name}: invalid year in the copyright header detected. ' \
+ 'Check your system clock and the copyright header.'
raise ValueError(error_msg)
return '{past}-{today}'.format(past=past, today=_today)
def check_copyrights(args: argparse.Namespace, config: configparser.ConfigParser) -> Tuple[List, List, List]:
"""
Main logic and for loop
returns:
list of files with wrong headers
list of files which were modified
"""
wrong_header_files = []
modified_files = []
must_be_updated = []
pathspecs = {}
ignore_list = []
updated_ignore_list = []
if os.path.isfile(args.ignore):
with open(args.ignore, 'r') as f:
ignore_list = [item.strip() for item in f.readlines()]
updated_ignore_list = ignore_list.copy()
# compile the file patterns
for section in config.sections():
# configparser stores all values as strings
patterns = ast.literal_eval(config[section]['include'])
try:
pathspecs[section] = pathspec.PathSpec.from_lines('gitwildmatch', patterns)
except TypeError:
print(f'Error while compiling file patterns. Section {section} has invalid include option. Must be a list of file patterns.')
sys.exit(1)
for file_name in args.filenames:
try:
mime = get_file_mime(file_name)
except UnsupportedFileType:
print(f'{TERMINAL_GRAY}"{file_name}" is not of a supported type! Skipping.{TERMINAL_RESET}')
continue
matched_section = 'DEFAULT'
for section in config.sections():
if pathspecs[section].match_file(file_name):
if args.debug:
print(f'{TERMINAL_GRAY}{file_name} matched {section}{TERMINAL_RESET}')
matched_section = section
if config[matched_section]['perform_check'] == 'False': # configparser stores all values as strings
print(f'{TERMINAL_GRAY}"{file_name}" is using config section "{matched_section}" which does not perform the check! Skipping.{TERMINAL_RESET}')
continue
# Is this file a new file
is_new_file = args.is_new_file.get(file_name)
if file_name in ignore_list:
if args.verbose:
print(f'{TERMINAL_GRAY}"{file_name}" is on the ignore list.{TERMINAL_RESET}')
valid, modified = has_valid_copyright(file_name, mime, True, is_new_file, config[matched_section], args)
if modified:
modified_files.append(CustomFile(file_name, True))
if valid:
if args.dont_update_ignore_list:
print(f'{TERMINAL_YELLOW}"{file_name}" now has a correct copyright header - remove it from the ignore list '
f'or run this script without the --dont-update-ignore-list option to do this automatically!{TERMINAL_RESET}')
else:
updated_ignore_list.remove(file_name)
else:
wrong_header_files.append(CustomFile(file_name, True))
else:
try:
valid, modified = has_valid_copyright(file_name, mime, False, is_new_file, config[matched_section], args)
if modified:
modified_files.append(CustomFile(file_name, False))
if not valid:
wrong_header_files.append(CustomFile(file_name, False))
except NeedsToBeUpdated:
must_be_updated.append(file_name)
if updated_ignore_list != ignore_list:
with open(args.ignore, 'w') as f:
for item in updated_ignore_list:
f.write(f'{item}\n')
modified_files.append(CustomFile(args.ignore, False))
print(f'\n{TERMINAL_GREEN}Files removed from ignore list:{TERMINAL_RESET}')
for file in ignore_list:
if file not in updated_ignore_list:
print(f' {file}')
return wrong_header_files, modified_files, must_be_updated
def build_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(description='Check copyright headers')
parser.add_argument('-v', '--verbose', action='store_true',
help='print more information (useful for debugging)')
parser.add_argument('-r', '--replace', action='store_true',
help='tries to update copyright notices')
parser.add_argument('-m', '--max-lines', type=int, default=30,
help='how far to check for copyright notice in a file (default 30)')
parser.add_argument('-f', '--fuzzy-ratio', type=int, default=95,
help='minimum %% ratio to be considered as equal to the old header style (default 95)')
parser.add_argument('-d', '--debug', action='store_true',
help='print debug info')
parser.add_argument('-du', '--dont-update-ignore-list', action='store_true')
parser.add_argument('-dr', '--dry-run', action='store_true', help='check without adding new headers')
parser.add_argument('-i', '--ignore', default='check_copyright_ignore', help='set path to the ignore list')
parser.add_argument('-l', '--lines-changed', type=int, default=5,
help='minimum number of changed lines that will enforce copyright date update (default 5)')
parser.add_argument('-c', '--config', default='check_copyright_config.yaml',
help='set path to the config yaml file')
parser.add_argument('filenames', nargs='+', help='file(s) to check', metavar='file')
return parser
def debug_output(args: argparse.Namespace, config: configparser.ConfigParser) -> None:
print(f'{TERMINAL_GRAY}Running with args: {args}')
print(f'Config file: {args.config}')
print(f'Ignore list: {args.ignore}{TERMINAL_RESET}')
print(f'Sections: {config.sections()}')
for section in config:
print(f'section: "{section}"')
for key in config[section]:
print(f' {key}: "{config[section][key]}"')
def allowed_license_combination(license_to_match: str, all_licenses: List[str]) -> bool:
"""
Licenses can be combined together with the OR keyword. Therefore, a simple "in" lookup in a list is not enough.
For example, if "A" and "B" are supported then "A OR B" and "B OR A" should be supported as well.
"""
if license_to_match in all_licenses:
# This is the simple case, for example, when "A" is used from the list ["A", "B"]
return True
# for example, if license_to_match is "A OR B" then the following split will be ["A", "B"]
split_list = [sp for sp in map(str.strip, license_to_match.split(' OR ')) if len(sp) > 0]
# for example, "A" and "B" needs to be in the supported list in order to match "A OR B".
return all(i in all_licenses for i in split_list)
def verify_config(config: configparser.ConfigParser) -> None:
fail = False
for section in config:
license_for_new_files = config[section]['license_for_new_files']
# configparser stores all values as strings
allowed_licenses = ast.literal_eval(config[section]['allowed_licenses'])
if not allowed_license_combination(license_for_new_files, allowed_licenses):
print(f'Invalid config, section "{section}":\nDefault license for new files '
f'({license_for_new_files}) is not on the allowed licenses list {allowed_licenses}.')
fail = True
for section in config.sections():
if 'include' not in config[section]:
print(f'Invalid config, section "{section}":\nSection does not have the "include" option set.')
fail = True
if fail:
sys.exit(1)
def git_diff_numstat() -> Dict[str, Tuple[int, int]]:
def call_git(args: List, die: bool = True) -> subprocess.CompletedProcess:
p = subprocess.run(['git'] + args, stdout=subprocess.PIPE,
stderr=subprocess.STDOUT, check=die, text=True)
return p
numstat = {}
try:
call_git(['rev-parse', '--git-dir'])
p = call_git(['rev-parse', '--verify', 'HEAD'], die=False)
if not p.returncode:
against = 'HEAD'
else:
p = call_git(['hash-object', '-t', 'tree', os.devnull])
against = p.stdout.strip()
p = call_git(['diff', '--cached', '--numstat', against])
numstat = {file: (int(added), int(deleted)) for added, deleted, file in [l.split() for l in p.stdout.splitlines()]}
except Exception as e:
pass
return numstat
def git_status_is_new_file() -> Dict[str, Tuple[bool]]:
file_status = {}
try:
p = subprocess.run(['git', 'status', '--short', '--renames'], stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
status_lines = p.stdout.splitlines()
# If the file status shows that it was newly added, i.e, 'A', then store True in the tuple, else False
file_status = {file: bool(status == 'A') for status, file in [l.split() for l in p.stdout.splitlines()]}
except Exception as e:
pass
return file_status
def main() -> None:
args = build_parser().parse_args()
args.numstat = git_diff_numstat()
args.is_new_file = git_status_is_new_file()
files = set()
all_paths = args.filenames
for path in all_paths:
if os.path.isfile(path):
files.add(path)
else:
all_paths += glob.glob(path + '/*')
args.filenames = list(files)
config = configparser.ConfigParser()
with open(args.config, 'r') as f:
yaml_dict = yaml.safe_load(f)
config.read_dict(yaml_dict)
if args.debug:
debug_output(args, config)
verify_config(config)
wrong_header_files, modified_files, must_be_updated = check_copyrights(args, config)
abort_commit = bool(modified_files)
num_files_wrong = 0
if wrong_header_files:
print(f'{TERMINAL_YELLOW}Information about this test{TERMINAL_RESET}')
print(CHECK_FAIL_MESSAGE.format())
print(f'{TERMINAL_YELLOW}Files which failed the copyright check:{TERMINAL_RESET}')
for wrong_file in wrong_header_files:
if not wrong_file.is_on_ignore_list:
abort_commit = True
num_files_wrong += 1
print(wrong_file)
if must_be_updated:
print(f'{TERMINAL_RED}Some files are without a copyright note and a license header needs to be added:{TERMINAL_RESET}')
for file in must_be_updated:
print(file)
abort_commit = True
if modified_files:
print(f'\n{TERMINAL_YELLOW}Modified files:{TERMINAL_RESET}')
for file in modified_files:
print(file)
print(CHECK_MODIFY_MESSAGE)
num_files_processed = len(args.filenames)
print(CHECK_FOOTER_MESSAGE)
if abort_commit:
num_files_modified = len(modified_files)
print(f'{TERMINAL_RED}Processed {num_files_processed} source file{"s"[:num_files_processed^1]},', end=' ')
print(f'{num_files_modified} were modified and {num_files_wrong} have an invalid copyright (excluding ones on the ignore list).{TERMINAL_RESET}')
sys.exit(1) # sys.exit(1) to abort the commit
# pre-commit also automatically aborts a commit if files are modified on disk
print(f'{TERMINAL_GREEN}Successfully processed {num_files_processed} file{"s"[:num_files_processed^1]}.{TERMINAL_RESET}')
if __name__ == '__main__':
main()