Skip to content

Commit

Permalink
[4.2.8] Multiple APK Analysis improvements, general Code QA & bug fix…
Browse files Browse the repository at this point in the history
…es (#2470)

* Dockerfile QA
* Add sdk-build-tools to Docker image
* Replace biplist with plistlib std lib
* Fixed a bug in iOS pbxproj parsing
* Added support for APK parsing with aapt2/aapt
* Use aapt/aapt2 as a fallback for APK parsing, files listing and string extraction
* Added "started at" to Scan task queue model #2463
* Tasks List API to return string status #2464
* Replaced all minidom calls with defusedxml.minidom
* Code QA on android manifest data extraction and parsing
* Improved android file analysis
* Improved android manifest data extraction
* Improved android icon file extraction
* Improved android app name extraction
* Improved android appstore package details extraction
* Android string extraction to fallback on aapt2 strings
* APK analysis arguments refactor
* Handle packed APKs, refactor unzip to handle malformed APK files
* Handle reserved filename conflict during ZIP extraction
* Explicit Zipslip handling during ZIP extraction
* Graceful files extraction on unzip failure
* Removed bail out and continue analysis
* Moved androguard parsing to the start of static analysis
* AndroidManifest.xml fallback from apktool to androguard during extraction and parsing
* Updated Tasks UI to show started at
  • Loading branch information
ajinabraham authored Nov 29, 2024
1 parent a015df5 commit 5ce7e23
Show file tree
Hide file tree
Showing 40 changed files with 1,029 additions and 598 deletions.
5 changes: 1 addition & 4 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,6 @@ ENV DEBIAN_FRONTEND=noninteractive \
USER_ID=9901 \
MOBSF_PLATFORM=docker \
MOBSF_ADB_BINARY=/usr/bin/adb \
JDK_FILE=openjdk-22.0.2_linux-x64_bin.tar.gz \
JDK_FILE_ARM=openjdk-22.0.2_linux-aarch64_bin.tar.gz \
WKH_FILE=wkhtmltox_0.12.6.1-3.bookworm_amd64.deb \
WKH_FILE_ARM=wkhtmltox_0.12.6.1-3.bookworm_arm64.deb \
JAVA_HOME=/jdk-22.0.2 \
PATH=/jdk-22.0.2/bin:/root/.local/bin:$PATH \
DJANGO_SUPERUSER_USERNAME=mobsf \
Expand All @@ -32,6 +28,7 @@ ENV DEBIAN_FRONTEND=noninteractive \
# See https://docs.docker.com/develop/develop-images/dockerfile_best-practices/#run
RUN apt update -y && \
apt install -y --no-install-recommends \
android-sdk-build-tools \
android-tools-adb \
build-essential \
curl \
Expand Down
8 changes: 4 additions & 4 deletions mobsf/DynamicAnalyzer/views/common/device.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,11 +19,11 @@
read_sqlite,
)

from biplist import (
writePlistToString,
from plistlib import (
FMT_XML,
dumps,
)


logger = logging.getLogger(__name__)


Expand Down Expand Up @@ -57,7 +57,7 @@ def view_file(request, api=False):
return print_n_send_error_response(request, err, api)
dat = sfile.read_text('ISO-8859-1')
if fil.endswith('.plist') and dat.startswith('bplist0'):
dat = writePlistToString(dat).decode('utf-8', 'ignore')
dat = dumps(dat, fmt=FMT_XML).decode('utf-8', 'ignore')
if fil.endswith(('.xml', '.plist')) and typ in ['xml', 'plist']:
rtyp = 'xml'
elif typ == 'db':
Expand Down
2 changes: 1 addition & 1 deletion mobsf/MobSF/init.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

logger = logging.getLogger(__name__)

VERSION = '4.2.7'
VERSION = '4.2.8'
BANNER = r"""
__ __ _ ____ _____ _ _ ____
| \/ | ___ | |__/ ___|| ___|_ _| || | |___ \
Expand Down
29 changes: 28 additions & 1 deletion mobsf/MobSF/security.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@
import sys
from shutil import which
from pathlib import Path
from platform import system
from concurrent.futures import ThreadPoolExecutor


from mobsf.MobSF.utils import (
find_aapt,
find_java_binary,
gen_sha256_hash,
get_adb,
Expand Down Expand Up @@ -72,9 +73,21 @@ def get_executable_hashes():
downloaded_tools,
manage_py,
]
aapt = 'aapt'
aapt2 = 'aapt2'
if system() == 'Windows':
aapt = 'aapt.exe'
aapt2 = 'aapt2.exe'
aapts = [find_aapt(aapt), find_aapt(aapt2)]
exec_loc.extend(Path(a) for a in aapts if a)
# External binaries used directly by MobSF
system_bins = [
'aapt',
'aapt.exe',
'aapt2',
'aapt2.exe',
'adb',
'adb.exe',
'which',
'wkhtmltopdf',
'httptools',
Expand Down Expand Up @@ -110,6 +123,8 @@ def get_executable_hashes():
settings.CLASSDUMP_BINARY,
settings.CLASSDUMP_SWIFT_BINARY,
getattr(settings, 'BUNDLE_TOOL', ''),
getattr(settings, 'AAPT2_BINARY', ''),
getattr(settings, 'AAPT_BINARY', ''),
]
for ubin in user_defined_bins:
if ubin:
Expand Down Expand Up @@ -222,3 +237,15 @@ def sanitize_filename(filename):
# Remove leading and trailing underscores
safe_filename = safe_filename.strip('_')
return safe_filename


def sanitize_for_logging(filename: str, max_length: int = 255) -> str:
"""Sanitize a filename to prevent log injection."""
# Remove newline, carriage return, and other risky characters
filename = filename.replace('\n', '_').replace('\r', '_').replace('\t', '_')

# Allow only safe characters (alphanumeric, underscore, dash, and period)
filename = re.sub(r'[^a-zA-Z0-9._-]', '_', filename)

# Truncate filename to the maximum allowed length
return filename[:max_length]
2 changes: 2 additions & 0 deletions mobsf/MobSF/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -462,6 +462,8 @@
VD2SVG_BINARY = os.getenv('MOBSF_VD2SVG_BINARY', '')
APKTOOL_BINARY = os.getenv('MOBSF_APKTOOL_BINARY', '')
ADB_BINARY = os.getenv('MOBSF_ADB_BINARY', '')
AAPT2_BINARY = os.getenv('MOBSF_AAPT2_BINARY', '')
AAPT_BINARY = os.getenv('MOBSF_AAPT_BINARY', '')

# iOS 3P Tools
JTOOL_BINARY = os.getenv('MOBSF_JTOOL_BINARY', '')
Expand Down
29 changes: 29 additions & 0 deletions mobsf/MobSF/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,6 +184,32 @@ def find_java_binary():
return 'java'


def find_aapt(tool_name):
"""Find the specified tool (aapt or aapt2)."""
# Check system PATH for the tool
tool_path = shutil.which(tool_name)
if tool_path:
return tool_path

# Check common Android SDK locations
home_dir = Path.home() # Get the user's home directory
sdk_paths = [
home_dir / 'Library' / 'Android' / 'sdk', # macOS
home_dir / 'Android' / 'Sdk', # Linux
home_dir / 'AppData' / 'Local' / 'Android' / 'Sdk', # Windows
]

for sdk_path in sdk_paths:
build_tools_path = sdk_path / 'build-tools'
if build_tools_path.exists():
for version in sorted(build_tools_path.iterdir(), reverse=True):
tool_path = version / tool_name
if tool_path.exists():
return str(tool_path)

return None


def print_n_send_error_response(request,
msg,
api=False,
Expand Down Expand Up @@ -667,6 +693,8 @@ def common_check(instance_id):

def is_path_traversal(user_input):
"""Check for path traversal."""
if not user_input:
return False
if (('../' in user_input)
or ('%2e%2e' in user_input)
or ('..' in user_input)
Expand Down Expand Up @@ -836,6 +864,7 @@ def get_android_dm_exception_msg():

def get_android_src_dir(app_dir, typ):
"""Get Android source code location."""
src = None
if typ == 'apk':
src = app_dir / 'java_source'
elif typ == 'studio':
Expand Down
1 change: 1 addition & 0 deletions mobsf/StaticAnalyzer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ class EnqueuedTask(models.Model):
file_name = models.CharField(max_length=255)
created_at = models.DateTimeField(default=timezone.now)
status = models.CharField(max_length=255, default='Enqueued')
started_at = models.DateTimeField(null=True)
completed_at = models.DateTimeField(null=True)
app_name = models.CharField(max_length=255, default='')

Expand Down
5 changes: 3 additions & 2 deletions mobsf/StaticAnalyzer/tools/androguard4/resources/public.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# -*- coding: utf_8 -*-
# flake8: noqa
import os
from xml.dom import minidom

from defusedxml.minidom import parseString

_public_res = None
# copy the newest sdk/platforms/android-?/data/res/values/public.xml here
Expand All @@ -11,7 +12,7 @@
xmlfile = os.path.join(root, "public.xml")
if os.path.isfile(xmlfile):
with open(xmlfile, "r") as fp:
_xml = minidom.parseString(fp.read())
_xml = parseString(fp.read())
for element in _xml.getElementsByTagName("public"):
_type = element.getAttribute('type')
_name = element.getAttribute('name')
Expand Down
151 changes: 151 additions & 0 deletions mobsf/StaticAnalyzer/views/android/aapt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
# -*- coding: utf_8 -*-
"""Use aapt2 to extract APK features."""
import re
import logging
import subprocess
from platform import system
from pathlib import Path

from django.conf import settings

from mobsf.MobSF.utils import (
find_aapt,
)

logger = logging.getLogger(__name__)


class AndroidAAPT:

def __init__(self, apk_path):
self.aapt2_path = None
self.aapt_path = None
self.apk_path = apk_path
self.data = {
'permissions': [],
'uses_features': {},
'package': None,
'application_label': None,
'application_icon': None,
'launchable_activity': None,
'min_sdk_version': None,
'target_sdk_version': None,
}

# Check for custom AAPT2 path in settings
if (getattr(settings, 'AAPT2_BINARY', '')
and len(settings.AAPT2_BINARY) > 0
and Path(settings.AAPT2_BINARY).exists()):
self.aapt2_path = settings.AAPT2_BINARY
else:
aapt2 = 'aapt2.exe' if system() == 'Windows' else 'aapt2'
self.aapt2_path = find_aapt(aapt2)

# Check for custom AAPT path in settings
if (getattr(settings, 'AAPT_BINARY', '')
and len(settings.AAPT_BINARY) > 0
and Path(settings.AAPT_BINARY).exists()):
self.aapt_path = settings.AAPT_BINARY
else:
aapt = 'aapt.exe' if system() == 'Windows' else 'aapt'
self.aapt_path = find_aapt(aapt)

# Ensure both aapt and aapt2 are found
if not (self.aapt2_path and self.aapt_path):
raise FileNotFoundError('aapt and aapt2 found')

def _execute_command(self, args):
try:
out = subprocess.check_output(
args,
stderr=subprocess.STDOUT)
return out.decode('utf-8', errors='ignore')
except subprocess.CalledProcessError as e:
logger.warning(e.output)
return None

def _get_strings(self, output):
# Regex to match strings while ignoring paths (strings without slashes)
pattern = r'String #[\d]+ : ([^\/\n]+)'
matches = re.findall(pattern, output)
# Strip whitespace and return the extracted strings
return [match.strip() for match in matches]

def _parse_badging(self, output):
# Match the package information
package_match = re.search(r'package: name=\'([\w\.]+)\'', output)
if package_match:
self.data['package'] = package_match.group(1)

# Match permissions
permissions = re.findall(r'uses-permission: name=\'([\w\.]+)\'', output)
if permissions:
self.data['permissions'] = permissions

# Match minSdkVersion
min_sdk_match = re.search(r'minSdkVersion:\'(\d+)\'', output)
if min_sdk_match:
self.data['min_sdk_version'] = min_sdk_match.group(1)

# Match targetSdkVersion
target_sdk_match = re.search(r'targetSdkVersion:\'(\d+)\'', output)
if target_sdk_match:
self.data['target_sdk_version'] = target_sdk_match.group(1)

# Match application label
label_match = re.search(r'application-label(?:-[\w\-]+)?:\'([^\']+)\'', output)
if label_match:
self.data['application_label'] = label_match.group(1)

# Match application icon
icon_match = re.search(r'application:.*icon=\'([^\']+)\'', output)
if icon_match:
self.data['application_icon'] = icon_match.group(1)

# Match launchable activity
activity_match = re.search(r'launchable-activity: name=\'([\w\.]+)\'', output)
if activity_match:
self.data['launchable_activity'] = activity_match.group(1)

# Match used features
features = {}
feature_matches = re.findall(
(r'(uses-feature(?:-not-required)?|uses-implied-feature): '
r'name=\'([\w\.]+)\'(?: reason=\'([^\']+)\')?'),
output,
)
for feature_type, feature_name, reason in feature_matches:
features[feature_name] = {
'type': feature_type,
# e.g., 'uses-feature',
# 'uses-feature-not-required',
# 'uses-implied-feature'
'reason': reason if reason else 'No reason provided',
}
self.data['uses_features'] = features

return self.data

def get_apk_files(self):
"""List all files in the APK."""
output = self._execute_command(
[self.aapt_path, 'list', self.apk_path])
if output:
return output.splitlines()
return []

def get_apk_strings(self):
"""Extract strings from the APK."""
output = self._execute_command(
[self.aapt2_path, 'dump', 'strings', self.apk_path])
if output:
return self._get_strings(output)
return []

def get_apk_features(self):
"""Extract features from the APK."""
output = self._execute_command(
[self.aapt2_path, 'dump', 'badging', self.apk_path])
if output:
return self._parse_badging(output)
return self.data
Loading

0 comments on commit 5ce7e23

Please sign in to comment.