Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sanitize sandbox output #491

Merged
merged 9 commits into from
Aug 1, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@ since 0.4.1, and this project adheres to [Semantic Versioning](https://semver.or

- Continuously scan our Python dependencies and container image for
vulnerabilities ([issue #222](https://github.com/freedomofpress/dangerzone/issues/222))
- Sanitize potentially unsafe characters from strings that are shown in a user's
terminal.

## Dangerzone 0.4.1

Expand Down
6 changes: 5 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,11 @@ lint-apply: lint-black-apply lint-isort-apply ## apply all the linter's suggesti

.PHONY: test
test:
pytest -v --cov --ignore dev_scripts
# Make each GUI test run as a separate process, to avoid segfaults due to
# shared state.
# See more in https://github.com/freedomofpress/dangerzone/issues/493
pytest --co -q tests/gui | grep -v ' collected' | xargs -n 1 pytest -v
pytest -v --cov --ignore dev_scripts --ignore tests/gui


# Makefile self-help borrowed from the securedrop-client project
Expand Down
5 changes: 3 additions & 2 deletions dangerzone/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

import appdirs

from . import errors
from . import errors, util

SAFE_EXTENSION = "-safe.pdf"
ARCHIVE_SUBDIR = "unsafe"
Expand Down Expand Up @@ -156,7 +156,8 @@ def default_output_filename(self) -> str:
return f"{os.path.splitext(self.input_filename)[0]}{self.suffix}"

def announce_id(self) -> None:
log.info(f"Assigning ID '{self.id}' to doc '{self.input_filename}'")
sanitized_filename = util.replace_control_chars(self.input_filename)
log.info(f"Assigning ID '{self.id}' to doc '{sanitized_filename}'")

def set_output_dir(self, path: str) -> None:
# keep the same name
Expand Down
6 changes: 3 additions & 3 deletions dangerzone/gui/logic.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from ..isolation_provider.base import IsolationProvider
from ..logic import DangerzoneCore
from ..settings import Settings
from ..util import get_resource_path
from ..util import get_resource_path, replace_control_chars

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -67,7 +67,7 @@ def open_pdf_viewer(self, filename: str) -> None:
args = ["open", "-a", "Preview.app", filename]

# Run
args_str = " ".join(shlex.quote(s) for s in args)
args_str = replace_control_chars(" ".join(shlex.quote(s) for s in args))
log.info(Fore.YELLOW + "> " + Fore.CYAN + args_str)
subprocess.run(args)

Expand All @@ -88,7 +88,7 @@ def open_pdf_viewer(self, filename: str) -> None:
args[i] = filename

# Open as a background process
args_str = " ".join(shlex.quote(s) for s in args)
args_str = replace_control_chars(" ".join(shlex.quote(s) for s in args))
log.info(Fore.YELLOW + "> " + Fore.CYAN + args_str)
subprocess.Popen(args)

Expand Down
1 change: 1 addition & 0 deletions dangerzone/gui/main_window.py
Original file line number Diff line number Diff line change
Expand Up @@ -988,6 +988,7 @@ def __init__(
self.error_label = QtWidgets.QLabel()
self.error_label.setAlignment(QtCore.Qt.AlignVCenter | QtCore.Qt.AlignLeft)
self.error_label.setWordWrap(True)
self.error_label.setTextFormat(QtCore.Qt.PlainText)
self.error_label.hide() # only show on error

# Progress bar
Expand Down
22 changes: 18 additions & 4 deletions dangerzone/isolation_provider/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from colorama import Fore, Style

from ..document import Document
from ..util import replace_control_chars

log = logging.getLogger(__name__)

Expand Down Expand Up @@ -49,21 +50,34 @@ def _convert(
) -> bool:
pass

def print_progress(
def _print_progress(
self, document: Document, error: bool, text: str, percentage: float
) -> None:
s = Style.BRIGHT + Fore.YELLOW + f"[doc {document.id}] "
s += Fore.CYAN + f"{percentage}% "
s += Fore.CYAN + f"{percentage}% " + Style.RESET_ALL
if error:
s += Style.RESET_ALL + Fore.RED + text
s += Fore.RED + text + Style.RESET_ALL
log.error(s)
else:
s += Style.RESET_ALL + text
s += text
log.info(s)

if self.progress_callback:
self.progress_callback(error, text, percentage)

def print_progress_trusted(
self, document: Document, error: bool, text: str, percentage: float
) -> None:
return self._print_progress(document, error, text, percentage)

def print_progress(
self, document: Document, error: bool, untrusted_text: str, percentage: float
) -> None:
text = replace_control_chars(untrusted_text)
return self.print_progress_trusted(
document, error, "UNTRUSTED> " + text, percentage
)

@abstractmethod
def get_max_parallel_conversions(self) -> int:
pass
Expand Down
47 changes: 35 additions & 12 deletions dangerzone/isolation_provider/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,15 @@
import shutil
import subprocess
import tempfile
from typing import Callable, List, Optional, Tuple
from typing import Any, Callable, List, Optional, Tuple

from ..document import Document
from ..util import get_resource_path, get_subprocess_startupinfo, get_tmp_dir
from ..util import (
get_resource_path,
get_subprocess_startupinfo,
get_tmp_dir,
replace_control_chars,
)
from .base import IsolationProvider

# Define startupinfo for subprocesses
Expand Down Expand Up @@ -133,19 +138,37 @@ def is_container_installed() -> bool:

return installed

def parse_progress(self, document: Document, line: str) -> None:
def assert_field_type(self, val: Any, _type: object) -> None:
# XXX: Use a stricter check than isinstance because `bool` is a subclass of
# `int`.
#
# See https://stackoverflow.com/a/37888668
if not type(val) == _type:
raise ValueError("Status field has incorrect type")

def parse_progress(self, document: Document, untrusted_line: str) -> None:
"""
Parses a line returned by the container.
"""
try:
status = json.loads(line)
self.print_progress(
document, status["error"], status["text"], status["percentage"]
untrusted_status = json.loads(untrusted_line)

text = untrusted_status["text"]
self.assert_field_type(text, str)

error = untrusted_status["error"]
self.assert_field_type(error, bool)

percentage = untrusted_status["percentage"]
self.assert_field_type(percentage, int)

self.print_progress(document, error, text, percentage)
except Exception:
line = replace_control_chars(untrusted_line)
error_message = (
f"Invalid JSON returned from container:\n\n\tUNTRUSTED> {line}"
)
except:
error_message = f"Invalid JSON returned from container:\n\n\t {line}"
log.error(error_message)
self.print_progress(document, True, error_message, -1)
self.print_progress_trusted(document, True, error_message, -1)

def exec(
self,
Expand All @@ -165,8 +188,8 @@ def exec(
startupinfo=startupinfo,
) as p:
if p.stdout is not None:
for line in p.stdout:
self.parse_progress(document, line)
for untrusted_line in p.stdout:
self.parse_progress(document, untrusted_line)

p.communicate()
return p.returncode
Expand Down
8 changes: 4 additions & 4 deletions dangerzone/isolation_provider/qubes.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,11 +120,11 @@ def _convert(
percentage += percentage_per_page

text = f"Converting page {page}/{n_pages} to pixels"
self.print_progress(document, False, text, percentage)
self.print_progress_trusted(document, False, text, percentage)

# TODO handle leftover code input
text = "Converted document to pixels"
self.print_progress(document, False, text, percentage)
self.print_progress_trusted(document, False, text, percentage)

# FIXME pass OCR stuff properly (see #455)
old_environ = dict(os.environ)
Expand All @@ -133,13 +133,13 @@ def _convert(
os.environ["OCR_LANGUAGE"] = ocr_lang

def print_progress_wrapper(error: bool, text: str, percentage: float) -> None:
self.print_progress(document, error, text, percentage)
self.print_progress_trusted(document, error, text, percentage)

asyncio.run(PixelsToPDF(progress_callback=print_progress_wrapper).convert())

percentage = 100.0
text = "Safe PDF created"
self.print_progress(document, False, text, percentage)
self.print_progress_trusted(document, False, text, percentage)

# FIXME remove once the OCR args are no longer passed with env vars
os.environ.clear()
Expand Down
10 changes: 10 additions & 0 deletions dangerzone/util.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pathlib
import platform
import string
import subprocess
import sys
from typing import Optional
Expand Down Expand Up @@ -62,3 +63,12 @@ def get_subprocess_startupinfo(): # type: ignore [no-untyped-def]
return startupinfo
else:
return None


def replace_control_chars(untrusted_str: str) -> str:
"""Remove control characters from string. Protects a terminal emulator
from obcure control characters"""
sanitized_str = ""
for char in untrusted_str:
sanitized_str += char if char in string.printable else "_"
return sanitized_str
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,9 @@ pytest-qt = "^4.2.0"
pytest-cov = "^3.0.0"
strip-ansi = "*"

[tool.isort]
profile = "black"

[build-system]
requires = ["poetry>=1.1.4"]
build-backend = "poetry.masonry.api"
21 changes: 21 additions & 0 deletions tests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,24 @@ def unreadable_pdf(tmp_path: Path) -> str:
file_path = tmp_path / "document.pdf"
file_path.touch(mode=0o000)
return str(file_path)


@pytest.fixture
def uncommon_text() -> str:
"""Craft a string with Unicode characters that are considered not common.

Create a string that contains the following uncommon characters:

* ANSI escape sequences: \033[31;1;4m and \033[0m
* A Unicode character that resembles an English character: greek "X" (U+03A7)
* A Unicode control character that is not part of ASCII: zero-width joiner
(U+200D)
* An emoji: Cross Mark (U+274C)
"""
return "\033[31;1;4m BaD TeΧt \u200d ❌ \033[0m"


@pytest.fixture
def sanitized_text() -> str:
"""Return a sanitized version of the uncommon_text."""
return "_[31;1;4m BaD Te_t _ _ _[0m"
Empty file.
56 changes: 56 additions & 0 deletions tests/isolation_provider/test_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import pytest
from colorama import Style
from pytest_mock import MockerFixture

from dangerzone.document import Document
from dangerzone.isolation_provider import base, container, qubes

from .. import sanitized_text, uncommon_text


@pytest.mark.parametrize(
"provider",
[
container.Container(enable_timeouts=False),
qubes.Qubes(),
],
ids=["Container", "Qubes"],
)
def test_print_progress(
provider: base.IsolationProvider,
uncommon_text: str,
sanitized_text: str,
mocker: MockerFixture,
) -> None:
"""Test that the print_progress() method of our isolation providers sanitizes text.

Iterate our isolation providers and make sure that their print_progress() methods
sanitizes the provided text, before passing it to the logging functions and other
callbacks.
"""
d = Document()
provider.progress_callback = mocker.MagicMock()
log_info_spy = mocker.spy(base.log, "info")
log_error_spy = mocker.spy(base.log, "error")
_print_progress_spy = mocker.spy(provider, "_print_progress")

for error, untrusted_text, sanitized_text in [
(True, "normal text", "UNTRUSTED> normal text"),
(False, "normal text", "UNTRUSTED> normal text"),
(True, uncommon_text, "UNTRUSTED> " + sanitized_text),
(False, uncommon_text, "UNTRUSTED> " + sanitized_text),
]:
log_info_spy.reset_mock()
log_error_spy.reset_mock()

provider.print_progress(d, error, untrusted_text, 0)
provider.progress_callback.assert_called_with(error, sanitized_text, 0) # type: ignore [union-attr]
_print_progress_spy.assert_called_with(d, error, sanitized_text, 0)
if error:
assert log_error_spy.call_args[0][0].endswith(
sanitized_text + Style.RESET_ALL
)
log_info_spy.assert_not_called()
else:
assert log_info_spy.call_args[0][0].endswith(sanitized_text)
log_error_spy.assert_not_called()
Loading