scripts/correct_error_ids.py

import random
import re
import os
from os import path

ENCODING = "utf-8"
PATTERN = r"\b\d+_error\b"


def read_file(file_name):
    content = None
    try:
        with open(file_name, "r", encoding=ENCODING) as f:
            content = f.read()
    finally:
        if content == None:
            print(f"Error reading: {file_name}")
    return content


def write_file(file_name, content):
    with open(file_name, "w", encoding=ENCODING) as f:
        f.write(content)


def in_comment(source, pos):
    slash_slash_pos = source.rfind("//", 0, pos)
    lf_pos = source.rfind("\n", 0, pos)
    if slash_slash_pos > lf_pos:
        return True
    slash_star_pos = source.rfind("/*", 0, pos)
    star_slash_pos = source.rfind("*/", 0, pos)
    return slash_star_pos > star_slash_pos


def find_ids_in_file(file_name, ids):
    source = read_file(file_name)
    for m in re.finditer(PATTERN, source):
        if in_comment(source, m.start()):
            continue
        underscore_pos = m.group(0).index("_")
        id = m.group(0)[0:underscore_pos]
        if id in ids:
            ids[id] += 1
        else:
            ids[id] = 1


def get_used_ids(file_names):
    used_ids = {}
    for file_name in file_names:
        find_ids_in_file(file_name, used_ids)
    return used_ids


def get_id(available_ids, used_ids):
    while len(available_ids) > 0:
        random.seed(len(available_ids))
        k = random.randrange(len(available_ids))
        id = list(available_ids.keys())[k]
        del available_ids[id]
        if id not in used_ids:
            return id
    assert False, "Out of IDs"


def fix_ids_in_file(file_name, available_ids, used_ids):
    source = read_file(file_name)

    k = 0
    destination = []
    for m in re.finditer(PATTERN, source):
        destination.extend(source[k:m.start()])

        underscore_pos = m.group(0).index("_")
        id = m.group(0)[0:underscore_pos]

        # incorrect id or id has a duplicate somewhere
        if not in_comment(source, m.start()) and (len(id) != 4 or id[0] == "0" or used_ids[id] > 1):
            assert id in used_ids
            new_id = get_id(available_ids, used_ids)
            used_ids[id] -= 1
        else:
            new_id = id

        destination.extend(new_id + "_error")
        k = m.end()

    destination.extend(source[k:])

    destination = ''.join(destination)
    if source != destination:
        write_file(file_name, destination)
        print(f"Fixed file: {file_name}")


def fix_ids(used_ids, file_names):
    available_ids = {str(id): None for id in range(1000, 10000)}
    for file_name in file_names:
        fix_ids_in_file(file_name, available_ids, used_ids)


def find_source_files(top_dir):
    """Builds the list of .h and .cpp files in top_dir directory"""

    source_file_names = []
    dirs = ['libevmasm', 'liblangutil', 'libsolc', 'libsolidity', 'libsolutil', 'libyul', 'solc']

    for dir in dirs:
        for root, _, file_names in os.walk(os.path.join(top_dir, dir), onerror=lambda e: exit(f"Walk error: {e}")):
            for file_name in file_names:
                _, ext = path.splitext(file_name)
                if ext in [".h", ".cpp"]:
                    source_file_names.append(path.join(root, file_name))

    return source_file_names


def main():
    cwd = os.getcwd()
    answer = input(
        f"This script checks and corrects *_error literals in .h and .cpp files\n"
        f"in {cwd}, recursively.\n\n"
        f"Please commit current changes first, and review the results when the script finishes.\n\n"
        f"Do you want to start [Y/N]? "
    )
    while len(answer) == 0 or answer not in "YNyn":
        answer = input("[Y/N]? ")
    if answer not in "yY":
        return

    source_file_names = find_source_files(cwd)

    used_ids = get_used_ids(source_file_names)

    ok = True
    for id in sorted(used_ids):
        if len(id) != 4:
            print(f"ID {id} length != 4")
            ok = False
        if id[0] == "0":
            print(f"ID {id} starts with zero")
            ok = False
        if used_ids[id] > 1:
            print(f"ID {id} appears {used_ids[id]} times")
            ok = False

    if ok:
        print("No incorrect IDs found")
    else:
        fix_ids(used_ids, source_file_names)
        print("Fixing completed")


if __name__ == "__main__":
    main()