From 418d74968092bfc806d864535f64d220d13e206e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= Date: Thu, 20 Oct 2016 12:56:51 +0200 Subject: [PATCH] backup: use 'scrypt' tool for backup encryption and integrity protection `openssl dgst` and `openssl enc` used previously poorly handle key stretching - in case of `openssl enc` encryption key is derived using single MD5 iteration, without even any salt. This hardly prevent brute force or even rainbow tables attacks. To make things worse, the same key is used for encryption and integrity protection which ease brute force even further. All this is still about brute force attacks, so when using long, high entropy passphrase, it should be still relatively safe. But lets do better. According to discussion in QubesOS/qubes-issues#971, scrypt algorithm is a good choice for key stretching (it isn't the best of all existing, but a good one and widely adopted). At the same time, lets switch away from `openssl` tool, as it is very limited and apparently not designed for production use. Use `scrypt` tool, which is very simple and does exactly what we need - encrypt the data and integrity protect it. Its archive format have own (simple) header with data required by the `scrypt` algorithm, including salt. Internally data is encrypted with AES256-CTR and integrity protected with HMAC-SHA256. For details see: https://github.com/tarsnap/scrypt/blob/master/FORMAT This means change of backup format. Mainly: 1. HMAC is stored in scrypt header, so don't use separate file for it. Instead have data in files with `.enc` extension. 2. For compatibility leave `backup-header` and `backup-header.hmac`. But `backup-header.hmac` is really scrypt-encrypted version of `backup-header`. 3. For each file, prepend its identifier to the passphrase, to authenticate filename itself too. Having this we can guard against reordering archive files within a single backup and across backups. This identifier is built as: backup ID (from backup-header)!filename! For backup-header itself, there is no backup ID (just 'backup-header!'). Fixes QubesOS/qubes-issues#971 --- qubes/backup.py | 406 +++++++++++++++++++++++++--------------- rpm_spec/core-dom0.spec | 1 + 2 files changed, 260 insertions(+), 147 deletions(-) diff --git a/qubes/backup.py b/qubes/backup.py index 637f24c52..361d3c2ff 100644 --- a/qubes/backup.py +++ b/qubes/backup.py @@ -24,6 +24,8 @@ from __future__ import unicode_literals import itertools import logging +import termios + from qubes.utils import size_to_human import sys import stat @@ -50,7 +52,9 @@ HEADER_FILENAME = 'backup-header' DEFAULT_CRYPTO_ALGORITHM = 'aes-256-cbc' -DEFAULT_HMAC_ALGORITHM = 'SHA512' +# 'scrypt' is not exactly HMAC algorithm, but a tool we use to +# integrity-protect the data +DEFAULT_HMAC_ALGORITHM = 'scrypt' DEFAULT_COMPRESSION_FILTER = 'gzip' CURRENT_BACKUP_FORMAT_VERSION = '4' # Maximum size of error message get from process stderr (including VM process) @@ -213,6 +217,63 @@ def run(self): self.log.debug("Finished sending thread") +def launch_proc_with_pty(args, stdin=None, stdout=None, stderr=None, echo=True): + """Similar to pty.fork, but handle stdin/stdout according to parameters + instead of connecting to the pty + + :return tuple (subprocess.Popen, pty_master) + """ + + def set_ctty(ctty_fd, master_fd): + os.setsid() + os.close(master_fd) + fcntl.ioctl(ctty_fd, termios.TIOCSCTTY, 0) + if not echo: + termios_p = termios.tcgetattr(ctty_fd) + # termios_p.c_lflags + termios_p[3] &= ~termios.ECHO + termios.tcsetattr(ctty_fd, termios.TCSANOW, termios_p) + (pty_master, pty_slave) = os.openpty() + p = subprocess.Popen(args, stdin=stdin, stdout=stdout, stderr=stderr, + preexec_fn=lambda: set_ctty(pty_slave, pty_master)) + os.close(pty_slave) + return p, os.fdopen(pty_master, 'w+') + + +def launch_scrypt(action, input_name, output_name, passphrase): + ''' + Launch 'scrypt' process, pass passphrase to it and return + subprocess.Popen object. + + :param action: 'enc' or 'dec' + :param input_name: input path or '-' for stdin + :param output_name: output path or '-' for stdout + :param passphrase: passphrase + :return: subprocess.Popen object + ''' + command_line = ['scrypt', action, input_name, output_name] + (p, pty) = launch_proc_with_pty(command_line, + stdin=subprocess.PIPE if input_name == '-' else None, + stdout=subprocess.PIPE if output_name == '-' else None, + stderr=subprocess.PIPE, + echo=False) + if action == 'enc': + prompts = ('Please enter passphrase: ', 'Please confirm passphrase: ') + else: + prompts = ('Please enter passphrase: ',) + for prompt in prompts: + actual_prompt = p.stderr.read(len(prompt)) + if actual_prompt != prompt: + raise qubes.exc.QubesException( + 'Unexpected prompt from scrypt: {}'.format(actual_prompt)) + pty.write(passphrase + '\n') + pty.flush() + # save it here, so garbage collector would not close it (which would kill + # the child) + p.pty = pty + return p + + class Backup(object): class FileToBackup(object): def __init__(self, file_path, subdir=None, name=None): @@ -476,15 +537,18 @@ def prepare_backup_header(self): compression_filter=self.compression_filter, ) backup_header.save(header_file_path) - - hmac = subprocess.Popen( - ["openssl", "dgst", "-" + self.hmac_algorithm, - "-hmac", self.passphrase], - stdin=open(header_file_path, "r"), - stdout=open(header_file_path + ".hmac", "w")) - if hmac.wait() != 0: + # Start encrypt, scrypt will also handle integrity + # protection + scrypt_passphrase = HEADER_FILENAME + '!' + self.passphrase.encode( + 'utf-8') + scrypt = launch_scrypt( + 'enc', header_file_path, header_file_path + '.hmac', + scrypt_passphrase) + + if scrypt.wait() != 0: raise qubes.exc.QubesException( - "Failed to compute hmac of header file") + "Failed to compute hmac of header file: " + + scrypt.stderr.read()) return HEADER_FILENAME, HEADER_FILENAME + ".hmac" @@ -640,60 +704,39 @@ def backup_do(self): self.log.debug(" ".join(tar_cmdline)) - # Tips: Popen(bufsize=0) - # Pipe: tar-sparse | encryptor [| hmac] | tar | backup_target - # Pipe: tar-sparse [| hmac] | tar | backup_target + # Pipe: tar-sparse | scrypt | tar | backup_target # TODO: log handle stderr tar_sparse = subprocess.Popen( - tar_cmdline, stdin=subprocess.PIPE) + tar_cmdline) self.processes_to_kill_on_cancel.append(tar_sparse) # Wait for compressor (tar) process to finish or for any # error of other subprocesses i = 0 + pipe = open(backup_pipe, 'rb') run_error = "paused" - encryptor = None - if self.encrypted: - # Start encrypt - # If no cipher is provided, - # the data is forwarded unencrypted !!! - encryptor = subprocess.Popen([ - "openssl", "enc", - "-e", "-" + self.crypto_algorithm, - "-pass", "pass:" + passphrase], - stdin=open(backup_pipe, 'rb'), - stdout=subprocess.PIPE) - pipe = encryptor.stdout - else: - pipe = open(backup_pipe, 'rb') while run_error == "paused": - - # Start HMAC - hmac = subprocess.Popen([ - "openssl", "dgst", "-" + self.hmac_algorithm, - "-hmac", passphrase], - stdin=subprocess.PIPE, - stdout=subprocess.PIPE) - # Prepare a first chunk - chunkfile = backup_tempfile + "." + "%03d" % i + chunkfile = backup_tempfile + ".%03d.enc" % i i += 1 - chunkfile_p = open(chunkfile, 'wb') + + # Start encrypt, scrypt will also handle integrity + # protection + scrypt_passphrase = os.path.relpath(chunkfile[:-4], + self.tmpdir) + '!' + passphrase + scrypt = launch_scrypt( + "enc", "-", chunkfile, scrypt_passphrase) run_error = handle_streams( pipe, - {'hmac_data': hmac.stdin, - 'backup_target': chunkfile_p, - }, - {'hmac': hmac, - 'vmproc': vmproc, + {'backup_target': scrypt.stdin}, + {'vmproc': vmproc, 'addproc': tar_sparse, - 'streamproc': encryptor, + 'scrypt': scrypt, }, self.chunk_size, self._add_vm_progress ) - chunkfile_p.close() self.log.debug( "12 returned: {}".format(run_error)) @@ -703,12 +746,7 @@ def backup_do(self): tar_sparse.terminate() except OSError: pass - try: - hmac.terminate() - except OSError: - pass tar_sparse.wait() - hmac.wait() to_send.put(QUEUE_ERROR) send_proc.join() shutil.rmtree(self.tmpdir) @@ -724,29 +762,16 @@ def backup_do(self): "Failed to perform backup: error in " + run_error) + scrypt.stdin.close() + scrypt.wait() + self.log.debug("scrypt return code: {}".format( + scrypt.poll())) + # Send the chunk to the backup target self._queue_put_with_check( send_proc, vmproc, to_send, os.path.relpath(chunkfile, self.tmpdir)) - # Close HMAC - hmac.stdin.close() - hmac.wait() - self.log.debug("HMAC proc return code: {}".format( - hmac.poll())) - - # Write HMAC data next to the chunk file - hmac_data = hmac.stdout.read() - self.log.debug( - "Writing hmac to {}.hmac".format(chunkfile)) - with open(chunkfile + ".hmac", 'w') as hmac_file: - hmac_file.write(hmac_data) - - # Send the HMAC to the backup target - self._queue_put_with_check( - send_proc, vmproc, to_send, - os.path.relpath(chunkfile, self.tmpdir) + ".hmac") - if tar_sparse.poll() is None or run_error == "size_limit": run_error = "paused" else: @@ -1310,6 +1335,8 @@ def get_supported_hmac_algo(hmac_algorithm=None): # Start with provided default if hmac_algorithm: yield hmac_algorithm + if hmac_algorithm != 'scrypt': + yield 'scrypt' proc = subprocess.Popen(['openssl', 'list-message-digest-algorithms'], stdout=subprocess.PIPE) for algo in proc.stdout.readlines(): @@ -1529,6 +1556,10 @@ def _start_retrieval_process(self, filelist, limit_count, limit_bytes): def _verify_hmac(self, filename, hmacfile, algorithm=None): def load_hmac(hmac_text): + if filter(lambda x: ord(x) not in range(128), + hmac_text): + raise qubes.exc.QubesException( + "Invalid content of {}".format(hmacfile)) hmac_text = hmac_text.strip().split("=") if len(hmac_text) > 1: hmac_text = hmac_text[1].strip() @@ -1547,6 +1578,17 @@ def load_hmac(hmac_text): "ERROR: expected hmac for {}, but got {}". format(filename, hmacfile)) + if algorithm == 'scrypt': + # in case of 'scrypt' _verify_hmac is only used for backup header + assert filename == HEADER_FILENAME + self._verify_and_decrypt(hmacfile, HEADER_FILENAME + '.dec') + if open(os.path.join(self.tmpdir, filename)).read() != \ + open(os.path.join(self.tmpdir, filename + '.dec')).read(): + raise qubes.exc.QubesException( + 'Invalid hmac on {}'.format(filename)) + else: + return True + hmac_proc = subprocess.Popen( ["openssl", "dgst", "-" + algorithm, "-hmac", passphrase], stdin=open(os.path.join(self.tmpdir, filename), 'rb'), @@ -1572,6 +1614,72 @@ def load_hmac(hmac_text): "Is the passphrase correct?". format(filename, load_hmac(hmac_stdout))) + def _verify_and_decrypt(self, filename, output=None): + assert filename.endswith('.enc') or filename.endswith('.hmac') + fullname = os.path.join(self.tmpdir, filename) + (origname, _) = os.path.splitext(filename) + if output: + fulloutput = os.path.join(self.tmpdir, output) + else: + fulloutput = os.path.join(self.tmpdir, origname) + passphrase = origname + '!' + self.passphrase.encode('utf-8') + p = launch_scrypt('dec', fullname, fulloutput, passphrase) + (_, stderr) = p.communicate() + if p.returncode != 0: + os.unlink(fulloutput) + raise qubes.exc.QubesException('failed to decrypt {}: {}'.format( + fullname, stderr)) + # encrypted file is no longer needed + os.unlink(fullname) + return origname + + def _retrieve_backup_header_files(self, files, allow_none=False): + (retrieve_proc, filelist_pipe, error_pipe) = \ + self._start_retrieval_process( + files, len(files), 1024 * 1024) + filelist = filelist_pipe.read() + retrieve_proc_returncode = retrieve_proc.wait() + if retrieve_proc in self.processes_to_kill_on_cancel: + self.processes_to_kill_on_cancel.remove(retrieve_proc) + extract_stderr = error_pipe.read(MAX_STDERR_BYTES) + + # wait for other processes (if any) + for proc in self.processes_to_kill_on_cancel: + if proc.wait() != 0: + raise qubes.exc.QubesException( + "Backup header retrieval failed (exit code {})".format( + proc.wait()) + ) + + if retrieve_proc_returncode != 0: + if not filelist and 'Not found in archive' in extract_stderr: + if allow_none: + return None + else: + raise qubes.exc.QubesException( + "unable to read the qubes backup file {0} ({1}): {2}".format( + self.backup_location, + retrieve_proc.wait(), + extract_stderr + )) + actual_files = filelist.splitlines() + if sorted(actual_files) != sorted(files): + raise qubes.exc.QubesException( + 'unexpected files in archive: got {!r}, expeced {!r}'.format( + actual_files, files + )) + for f in files: + if not os.path.exists(os.path.join(self.tmpdir, f)): + if allow_none: + return None + else: + raise qubes.exc.QubesException( + 'Unable to retrieve file {} from backup {}: {}'.format( + f, self.backup_location, extract_stderr + ) + ) + return files + def _retrieve_backup_header(self): """Retrieve backup header and qubes.xml. Only backup header is analyzed, qubes.xml is left as-is @@ -1588,82 +1696,47 @@ def _retrieve_backup_header(self): header_data.version = 1 return header_data - (retrieve_proc, filelist_pipe, error_pipe) = \ - self._start_retrieval_process( - ['backup-header', 'backup-header.hmac', - 'qubes.xml.000', 'qubes.xml.000.hmac'], 4, 1024 * 1024) - - expect_tar_error = False - - filename = filelist_pipe.readline().strip() - hmacfile = filelist_pipe.readline().strip() - # tar output filename before actually extracting it, so wait for the - # next one before trying to access it - if not self.backup_vm: - filelist_pipe.readline().strip() + header_files = self._retrieve_backup_header_files( + ['backup-header', 'backup-header.hmac'], allow_none=True) - self.log.debug("Got backup header and hmac: {}, {}".format( - filename, hmacfile)) - - if not filename or filename == "EOF" or \ - not hmacfile or hmacfile == "EOF": - retrieve_proc.wait() - proc_error_msg = error_pipe.read(MAX_STDERR_BYTES) - raise qubes.exc.QubesException( - "Premature end of archive while receiving " - "backup header. Process output:\n" + proc_error_msg) - file_ok = False - hmac_algorithm = DEFAULT_HMAC_ALGORITHM - for hmac_algo in get_supported_hmac_algo(hmac_algorithm): - try: - if self._verify_hmac(filename, hmacfile, hmac_algo): - file_ok = True - hmac_algorithm = hmac_algo - break - except qubes.exc.QubesException: - # Ignore exception here, try the next algo - pass - if not file_ok: - raise qubes.exc.QubesException( - "Corrupted backup header (hmac verification " - "failed). Is the password correct?") - if os.path.basename(filename) == HEADER_FILENAME: - filename = os.path.join(self.tmpdir, filename) - header_data = BackupHeader(open(filename, 'r').read()) - os.unlink(filename) - else: - # if no header found, create one with guessed HMAC algo + if not header_files: + # R2-Beta3 didn't have backup header, so if none is found, + # assume it's version=2 and use values present at that time header_data = BackupHeader( version=2, - hmac_algorithm=hmac_algorithm, # place explicitly this value, because it is what format_version # 2 have + hmac_algorithm='SHA1', crypto_algorithm='aes-256-cbc', # TODO: set encrypted to something... ) - # when tar do not find expected file in archive, it exit with - # code 2. This will happen because we've requested backup-header - # file, but the archive do not contain it. Ignore this particular - # error. - if not self.backup_vm: - expect_tar_error = True - - if retrieve_proc.wait() != 0 and not expect_tar_error: - raise qubes.exc.QubesException( - "unable to read the qubes backup file {0} ({1}): {2}".format( - self.backup_location, - retrieve_proc.wait(), - error_pipe.read(MAX_STDERR_BYTES) - )) - if retrieve_proc in self.processes_to_kill_on_cancel: - self.processes_to_kill_on_cancel.remove(retrieve_proc) - # wait for other processes (if any) - for proc in self.processes_to_kill_on_cancel: - if proc.wait() != 0: + else: + filename = HEADER_FILENAME + hmacfile = HEADER_FILENAME + '.hmac' + self.log.debug("Got backup header and hmac: {}, {}".format( + filename, hmacfile)) + + file_ok = False + hmac_algorithm = DEFAULT_HMAC_ALGORITHM + for hmac_algo in get_supported_hmac_algo(hmac_algorithm): + try: + if self._verify_hmac(filename, hmacfile, hmac_algo): + file_ok = True + break + except qubes.exc.QubesException as e: + self.log.debug( + 'Failed to verify {} using {}: {}'.format( + hmacfile, hmac_algo, str(e))) + # Ignore exception here, try the next algo + pass + if not file_ok: raise qubes.exc.QubesException( - "Backup header retrieval failed (exit code {})".format( - proc.wait()) - ) + "Corrupted backup header (hmac verification " + "failed). Is the password correct?") + filename = os.path.join(self.tmpdir, filename) + header_data = BackupHeader(open(filename, 'r').read()) + os.unlink(filename) + return header_data def _start_inner_extraction_worker(self, queue, relocate): @@ -1696,6 +1769,9 @@ def _start_inner_extraction_worker(self, queue, relocate): elif format_version in [3, 4]: extractor_params['compression_filter'] = \ self.header_data.compression_filter + if format_version == 4: + # encryption already handled + extractor_params['encrypted'] = False extract_proc = ExtractWorker3(**extractor_params) else: raise NotImplementedError( @@ -1714,7 +1790,14 @@ def _process_qubes_xml(self): offline_mode=True) return backup_app else: - self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac") + if self.header_data.version in [2, 3]: + self._retrieve_backup_header_files( + ['qubes.xml.000', 'qubes.xml.000.hmac']) + self._verify_hmac("qubes.xml.000", "qubes.xml.000.hmac") + else: + self._retrieve_backup_header_files(['qubes.xml.000.enc']) + self._verify_and_decrypt('qubes.xml.000.enc') + queue = Queue() queue.put("qubes.xml.000") queue.put(QUEUE_FINISHED) @@ -1762,6 +1845,7 @@ def _restore_vm_dirs(self, vms_dirs, vms_size, relocate): try: filename = None + hmacfile = None nextfile = None while True: if self.canceled: @@ -1785,30 +1869,58 @@ def _restore_vm_dirs(self, vms_dirs, vms_size, relocate): if not filename or filename == "EOF": break - hmacfile = filelist_pipe.readline().strip() - - if self.canceled: - break # if reading archive directly with tar, wait for next filename - # tar prints filename before processing it, so wait for # the next one to be sure that whole file was extracted if not self.backup_vm: nextfile = filelist_pipe.readline().strip() - self.log.debug("Getting hmac:" + hmacfile) - if not hmacfile or hmacfile == "EOF": - # Premature end of archive, either of tar1_command or - # vmproc exited with error - break + if self.header_data.version in [2, 3]: + if not self.backup_vm: + hmacfile = nextfile + nextfile = filelist_pipe.readline().strip() + else: + hmacfile = filelist_pipe.readline().strip() + + if self.canceled: + break + + self.log.debug("Getting hmac:" + hmacfile) + if not hmacfile or hmacfile == "EOF": + # Premature end of archive, either of tar1_command or + # vmproc exited with error + break + else: # self.header_data.version == 4 + if not filename.endswith('.enc'): + raise qubes.exc.QubesException( + 'Invalid file extension found in archive: {}'. + format(filename)) if not any(map(lambda x: filename.startswith(x), vms_dirs)): self.log.debug("Ignoring VM not selected for restore") os.unlink(os.path.join(self.tmpdir, filename)) - os.unlink(os.path.join(self.tmpdir, hmacfile)) + if hmacfile: + os.unlink(os.path.join(self.tmpdir, hmacfile)) continue - if self._verify_hmac(filename, hmacfile): - to_extract.put(os.path.join(self.tmpdir, filename)) + if self.header_data.version in [2, 3]: + self._verify_hmac(filename, hmacfile) + else: + # _verify_and_decrypt will write output to a file with + # '.enc' extension cut off. This is safe because: + # - `scrypt` tool will override output, so if the file was + # already there (received from the VM), it will be removed + # - incoming archive extraction will refuse to override + # existing file, so if `scrypt` already created one, + # it can not be manipulated by the VM + # - when the file is retrieved from the VM, it appears at + # the final form - if it's visible, VM have no longer + # influence over its content + # + # This all means that if the file was correctly verified + # + decrypted, we will surely access the right file + filename = self._verify_and_decrypt(filename) + to_extract.put(os.path.join(self.tmpdir, filename)) if self.canceled: raise BackupCanceledError("Restore canceled", diff --git a/rpm_spec/core-dom0.spec b/rpm_spec/core-dom0.spec index ecfd43919..bce75a54a 100644 --- a/rpm_spec/core-dom0.spec +++ b/rpm_spec/core-dom0.spec @@ -83,6 +83,7 @@ Requires: createrepo Requires: gnome-packagekit Requires: cronie Requires: bsdtar +Requires: scrypt # for qubes-hcl-report Requires: dmidecode Requires: PyQt4