Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

infra: add script to capture replayable commands #12608

Draft
wants to merge 6 commits into
base: master
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions infra/base-images/base-builder/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -165,6 +165,7 @@ COPY bazel_build_fuzz_tests \
install_rust.sh \
install_swift.sh \
python_coverage_helper.py \
bash_parser.py \
srcmap \
write_labels.py \
/usr/local/bin/
Expand Down
194 changes: 194 additions & 0 deletions infra/base-images/base-builder/bash_parser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
import os
import sys
import bashlex

from glob import glob


def find_all_bash_scripts_in_src():
all_scripts = [
y for x in os.walk('/src/') for y in glob(os.path.join(x[0], '*.sh'))
]
scripts_we_care_about = []
to_ignore = {'aflplusplus', 'honggfuzz', '/fuzztest', '/centipede'}
for s in all_scripts:
if any([x for x in to_ignore if x in s]):
continue
scripts_we_care_about.append(s)

print(scripts_we_care_about)
return scripts_we_care_about


def should_discard_command(ast_tree) -> bool:
"""Returns True if the command shuold be avoided, otherwise False"""
try:
first_word = ast_tree.parts[0].word
except: # pylint: disable=bare-except
return False
if 'configure' in first_word:
return True
if 'autoheader' in first_word:
return True
if 'autoconf' in first_word:
return True
if 'cmake' in first_word:
return True
if 'autogen.sh' in first_word:
return True

try:
second_word = ast_tree.parts[1].word
except: # pylint: disable=bare-except
return False

if 'make' in first_word and 'clean' in second_word:
return True

return False


def is_local_redirection(ast_node, all_scripts):
"""Return the list of scripts corresponding to the command, in case
the command is an execution of a local script."""
# print("Checking")

# Capture local script called with ./random/path/build.sh

if len(ast_node.parts) >= 2:
try:
ast_node.parts[0].word
except:
return []
if ast_node.parts[0].word == '.':
suffixes_matching = []
#print(ast_node.parts[1].word)
for bash_script in all_scripts:
#print("- %s"%(bash_script))
cmd_to_exec = ast_node.parts[1].word.replace('$SRC', 'src')
if bash_script.endswith(cmd_to_exec):
suffixes_matching.append(bash_script)
#print(suffixes_matching)
return suffixes_matching
# Capture a local script called with $SRC/random/path/build.sh
if len(ast_node.parts) >= 1:
if '$SRC' in ast_node.parts[0].word:
suffixes_matching = []
print(ast_node.parts[0].word)
for bash_script in all_scripts:
print("- %s" % (bash_script))
cmd_to_exec = ast_node.parts[0].word.replace('$SRC', 'src')
if bash_script.endswith(cmd_to_exec):
suffixes_matching.append(bash_script)
print(suffixes_matching)
return suffixes_matching

return []


def handle_ast_command(ast_node, all_scripts_in_fs, raw_script):
"""Generate bash script string for command node"""
new_script = ''
if should_discard_command(ast_node):
return ''

matches = is_local_redirection(ast_node, all_scripts_in_fs)
if len(matches) == 1:
new_script += parse_script(matches[0], all_scripts_in_fs) + '\n'
return ''

# Extract the command from the script string
idx_start = ast_node.pos[0]
idx_end = ast_node.pos[1]
new_script += raw_script[idx_start:idx_end]
#new_script += '\n'

# If mkdir is used, then ensure that '-p' is provided, as
# otherwise we will run into failures. We don't have to worry
# about multiple uses of -p as `mkdir -p -p -p`` is valid.
new_script = new_script.replace('mkdir', 'mkdir -p')
return new_script


def handle_ast_list(ast_node, all_scripts_in_fs, raw_script):
new_script = ''
try_hard = 1

if not try_hard:
list_start = ast_node.pos[0]
list_end = ast_node.pos[1]
new_script += raw_script[list_start:list_end] # + '\n'
else:
# This is more refined logic. Ideally, this should work, but it's a bit
# more intricate to get right due to e.g. white-space between positions
# and more extensive parsing needed. We don't neccesarily need this
# level of success rate for what we're trying to achieve, so am disabling
# this for now.
for part in ast_node.parts:
if part.kind == 'list':
new_script += handle_ast_list(part, all_scripts_in_fs, raw_script)
elif part.kind == 'command':
new_script += handle_ast_command(part, all_scripts_in_fs, raw_script)
else:
idx_start = part.pos[0]
idx_end = part.pos[1]
new_script += raw_script[idx_start:idx_end]
new_script += ' '

# Make sure what was created is valid syntax, and otherwise return empty
try:
bashlex.parse(new_script)
except: # pylint: disable=bare-except
# Maybe return the original here instead of skipping?
return ''
return new_script


def handle_ast_compound(ast_node, all_scripts_in_fs, raw_script):
new_script = ''
try_hard = 1
list_start = ast_node.pos[0]
list_end = ast_node.pos[1]
new_script += raw_script[list_start:list_end] + '\n'
return new_script


def handle_node(ast_node, all_scripts_in_fs, build_script):
"""Generates a bash script string for a given node"""
if ast_node.kind == 'command':
return handle_ast_command(ast_node, all_scripts_in_fs, build_script)
elif ast_node.kind == 'list':
return handle_ast_list(ast_node, all_scripts_in_fs, build_script)
elif ast_node.kind == 'compound':
print('todo: handle compound')
return handle_ast_compound(ast_node, all_scripts_in_fs, build_script)
else:
raise Exception(f'Missing node handling: {ast_node.kind}')


def parse_script(bash_script, all_scripts) -> str:
"""Top-level bash script parser"""
new_script = ''
with open(bash_script, 'r', encoding='utf-8') as f:
build_script = f.read()
parts = bashlex.parse(build_script)
for part in parts:
new_script += handle_node(part, all_scripts, build_script)
new_script += '\n'
print("-" * 45)
print(part.kind)
print(part.dump())

return new_script


if __name__ == "__main__":
all_scripts = find_all_bash_scripts_in_src()
replay_bash_script = parse_script(sys.argv[1], all_scripts)

print("REPLAYABLE BASH SCRIPT")
print("#" * 60)
print(replay_bash_script)
print("#" * 60)
with open('/out/replay-build-script.sh', 'w') as f:
f.write(replay_bash_script)
6 changes: 6 additions & 0 deletions infra/base-images/base-builder/compile
Original file line number Diff line number Diff line change
Expand Up @@ -231,6 +231,10 @@ if [ "${OSS_FUZZ_ON_DEMAND}" != "0" ]; then
exit 0
fi

#echo 'export SHELLOPTS' | cat - $SRC/build.sh > temp && mv temp $SRC/build.sh
python3 -m pip install bashlex
python3 /usr/local/bin/bash_parser.py $SRC/build.sh

BUILD_CMD="bash -eux $SRC/build.sh"

# Set +u temporarily to continue even if GOPATH and OSSFUZZ_RUSTPATH are undefined.
Expand All @@ -248,6 +252,7 @@ if [ "$FUZZING_LANGUAGE" = "rust" ]; then
cp -r /rust/rustup/toolchains/$rustdef/lib/rustlib/src/rust/library/ /rustc/$rustch/
fi

#export SHELLOPTS
if [ "${BUILD_UID-0}" -ne "0" ]; then
adduser -u $BUILD_UID --disabled-password --gecos '' builder
chown -R builder $SRC $OUT $WORK
Expand All @@ -263,6 +268,7 @@ else
$COPY_SOURCES_CMD 2>/dev/null || true
fi
fi
#unset SHELLOPTS

if [ "$SANITIZER" = "introspector" ]; then
unset CXXFLAGS
Expand Down
Loading