diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 078a9d10e0290d95b1109b44fbd57cd0fbeee3c3..03aa46802f8f52520397de46f54df10abf0bb77f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -93,4 +93,4 @@ repos: args: ["--python-version=3.10"] additional_dependencies: - types-requests - - behave-utils ~=0.4.1 + - behave-utils ~=0.6.1 diff --git a/plugins/docker-integration.php b/plugins/docker-integration.php index a30e2fa599e1a9e99f5ea310fc6cd42a0fb1649f..63f3b5a829c732d6846ee0f1130e74104a4f69f8 100644 --- a/plugins/docker-integration.php +++ b/plugins/docker-integration.php @@ -65,8 +65,7 @@ add_filter( function ( $params ) { $params['endpoint'] = S3_MEDIA_ENDPOINT; - $params['bucket_endpoint'] = true; - $params['disable_host_prefix_injection'] = true; + $params['bucket_endpoint'] = S3_MEDIA_DISABLE_INJECTION; $params['use_path_style_endpoint'] = true; $params['debug'] = S3_DEBUG; return $params; diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 853bc1d7c12471f25fa720f1abdcab9b349e61c8..7b64e69e787f274d979d50ed526d1e9ab3de4fec 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -52,6 +52,11 @@ timestamp() echo "[$(date --utc +'%Y-%m-%dT%H:%M:%S%z')] $*" } +fatal() +{ + timestamp >&2 "FATAL:" "$@" +} + create_config() { [[ -f wp-config.php ]] && unlink wp-config.php @@ -115,21 +120,37 @@ setup_s3() { [[ -v S3_MEDIA_SECRET ]] || return 0 - wp config set S3_UPLOADS_BUCKET_URL "${S3_MEDIA_REWRITE_URL-$S3_MEDIA_ENDPOINT}" + local path_prefix=${S3_MEDIA_PREFIX+/${S3_MEDIA_PREFIX#/}} + local rewrite_url=${S3_MEDIA_ENDPOINT%/}${path_prefix} + + if [[ $S3_MEDIA_ENDPOINT =~ ^(https?://[^/]+)/([^/]+)/?(.*) ]]; then + [[ ${#BASH_REMATCH[3]} -gt 0 ]] && fatal \ + "S3_MEDIA_ENDPOINT may only contain a bucket name as a path," \ + "provide S3 path prefixes with S3_MEDIA_PREFIX" + wp config set S3_MEDIA_ENDPOINT "${BASH_REMATCH[1]}" + wp config set S3_UPLOADS_BUCKET "${BASH_REMATCH[2]}${path_prefix}" + wp config set S3_MEDIA_DISABLE_INJECTION false --raw + else + wp config set S3_MEDIA_ENDPOINT "${S3_MEDIA_ENDPOINT}" + # The plugin needs a value, but it is not injected into URLS + wp config set S3_UPLOADS_BUCKET "media-bucket" + wp config set S3_MEDIA_DISABLE_INJECTION true --raw + fi + + # Workaround for hardcoded amazonaws.com URL in plugin + wp config set S3_UPLOADS_BUCKET_URL "${S3_MEDIA_REWRITE_URL-$rewrite_url}" - wp config set S3_MEDIA_ENDPOINT "${S3_MEDIA_ENDPOINT}" wp config set S3_UPLOADS_KEY "${S3_MEDIA_KEY}" wp config set S3_UPLOADS_SECRET "${S3_MEDIA_SECRET}" --quiet # Plugin requires something here, it's not used wp config set S3_UPLOADS_REGION 'eu-west-1' - wp config set S3_UPLOADS_BUCKET "media-bucket" - - # If there is anything in ./media, upload it - local contents=( media/* ) + # If there is anything in the media dir, upload it + get_writable_dirs + local contents=( "$MEDIA"/* ) [[ ${#contents[*]} -gt 0 ]] && - wp s3-uploads upload-directory media + wp s3-uploads upload-directory "$MEDIA" # Clear potentialy sensitive information from environment lest it leaks unset ${!S3_MEDIA_*} diff --git a/tests/environment.py b/tests/environment.py index 5f2f04b915fb77adbb866600a2d7b27a7b139b5d..8e6abe3a7249773faf0cd18563fd05496368a7e3 100644 --- a/tests/environment.py +++ b/tests/environment.py @@ -1,4 +1,4 @@ -# Copyright 2021-2023 Dominik Sekotill +# Copyright 2021-2024 Dominik Sekotill # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this @@ -15,12 +15,14 @@ https://behave.readthedocs.io/en/stable/tutorial.html#environmental-controls from __future__ import annotations import sys +from pathlib import Path from typing import TYPE_CHECKING from behave import use_fixture from behave.model import Feature from behave.model import Scenario from behave.runner import Context +from behave_utils.behave import register_pattern from behave_utils.mysql import snapshot_rollback from wp import running_site_fixture @@ -29,6 +31,9 @@ if TYPE_CHECKING: from behave.runner import ScenarioContext +register_pattern("\S+", Path) + + def before_all(context: Context) -> None: """ Prepare fixtures for all tests diff --git a/tests/minio.py b/tests/minio.py new file mode 100644 index 0000000000000000000000000000000000000000..5ee957981b0fc92e78feff24c9a6cf03ec5a4c4a --- /dev/null +++ b/tests/minio.py @@ -0,0 +1,254 @@ +# Copyright 2024 Dominik Sekotill +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Fixtures for S3 services +""" + +from __future__ import annotations + +import atexit +import json +from collections.abc import Iterator +from gzip import GzipFile +from pathlib import Path +from typing import ClassVar +from typing import TypeAlias +from uuid import NAMESPACE_URL +from uuid import uuid3 + +from behave import fixture +from behave.runner import Context +from behave_utils import URL +from behave_utils.binaries import DownloadableExecutable +from behave_utils.docker import Cli +from behave_utils.docker import Container +from behave_utils.docker import Image +from behave_utils.docker import Network +from behave_utils.secret import make_secret +from behave_utils.utils import wait +from requests import Session +from typing_extensions import Self +from wp import Site + +BucketsKey: TypeAlias = tuple[str, bool] + +CURRENT_BUCKET_KEY: BucketsKey = "current://", True + +MINIO_IMAGE = Image.pull(f"quay.io/minio/minio:latest") + +__all__ = [ + "Bucket", + "Minio", + "bucket_fixture", + "current_bucket_fixture", +] + + +class DownloadableMC(DownloadableExecutable, name="minio-client"): + + def get_latest(self, session: Session) -> str: + return "latest" + + def get_stream(self, session: Session, version: str) -> GzipFile: + binary = "mc.exe" if self.kernel == "windows" else "mc" + url = f"https://dl.min.io/client/mc/release/{self.kernel}-{self.goarch}/{binary}" + resp = session.get(url, allow_redirects=True, stream=True) + assert resp.raw is not None + return GzipFile(fileobj=resp.raw) + + +class Minio(Container): + """ + A `Container` subclass to run and manage a Minio S3 service + """ + + _inst: ClassVar[Minio|None] = None + + domain = "s3.test.local" + + @classmethod + def get_running(cls) -> Minio: + """ + Return a running instance of the Minio server + """ + if not (self := cls._inst): + self = cls._inst = Minio() + self.start() + return self + + def __init__(self) -> None: + self.key = make_secret(8) + self.secret = make_secret(20) + + mc_bin = DownloadableMC("latest").get_binary() + Container.__init__( + self, MINIO_IMAGE, + ["server", "/tmp", "--address=:80", "--console-address=:9001"], + volumes=[(mc_bin, Path("/bin/mc"))], + env=dict( + MINIO_ROOT_USER=self.key, + MINIO_ROOT_PASSWORD=self.secret, + MINIO_DOMAIN=self.domain, + ), + ) + self.mc = Cli(self, "/bin/mc") + + def start(self) -> None: + """ + Idempotently start the service, and register an alias to it with the *mc* tool + """ + if self.is_running(): + return + super().start() + atexit.register(self.stop, rm=True) + wait(lambda: self.is_running()) + # Add "local" alias + self.mc("config", "host", "add", "local", "http://localhost", self.key, self.secret) + + def bucket_domain(self, bucket: str, use_subdomain: bool) -> str: + """ + Return the domain name to use for a given bucket + + If 'use_subdomain' is `True` the name will consist of the bucket name as a subdomain + of the server domain, otherwise it be the same as the server' domain. + """ + return f"{bucket}.{self.domain}" if use_subdomain else f"{self.domain}" + + def bucket_url(self, bucket: str, use_subdomain: bool) -> URL: + """ + Return the HTTP endpoint URL to use for the given bucket + + The URL will have the domain as returned by `Minio.bucket_domain`, with the bucket + name as the only path component if 'use_subdomain' is `False`. + """ + domain = self.bucket_domain(bucket, use_subdomain) + base = URL(f"http://{domain}") + return base if use_subdomain else (base / bucket) + + def add_bucket_user(self, bucket: str, key: str, secret: str) -> None: + """ + Create an ephemeral bucket and account with access to that bucket + """ + policy = f"{bucket}-write" + stmt = json.dumps( + dict( + Version="2012-10-17", + Statement=[ + dict( + Effect="Allow", + Action=["s3:ListBucket"], + Resource=[f"arn:aws:s3:::{bucket}"], + ), + dict( + Effect="Allow", + Action=["s3:GetObject", "s3:PutObject", "s3:DeleteObject"], + Resource=[f"arn:aws:s3:::{bucket}/*"], + ), + ], + ), + ) + self.mc("mb", f"local/{bucket}") + self.mc("admin", "user", "add", "local", key, secret) + self.mc("admin", "policy", "create", "local", policy, "/dev/stdin", input=stmt) + self.mc("admin", "policy", "attach", "local", policy, "--user", key) + + def rm_bucket_user(self, bucket: str, key: str) -> None: + """ + Remove an ephemeral bucket and user + """ + self.mc("admin", "user", "rm", "local", key) + self.mc("rb", f"local/{bucket}", "--force") + + def has_path(self, bucket: str, path: Path) -> bool: + """ + Return whether the given path (key) exists in the named bucket + """ + res = self.mc("stat", f"local/{bucket}/{path}", query=True) + # import time + # time.sleep(3600) + return res == 0 + + +class Bucket: + """ + An ephemeral bucket fixture within an S3 service + + To create the bucket on the server and update name records, an instance must be used as + a context manager. Once the context ends, the bucket is removed. + """ + + def __init__( + self, + ident: str, + use_subdomain: bool, + network: Network, + server: Minio|None = None, + ): + self.network = network + self.server = server or Minio.get_running() + + self.name = str(uuid3(NAMESPACE_URL, ident)) + self.domain = self.server.bucket_domain(self.name, use_subdomain) + self.url = self.server.bucket_url(self.name, use_subdomain) + self.key = make_secret(8) + self.secret = make_secret(20) + + def __enter__(self) -> Self: + self.server.start() # Ensure server is started, method is idempotent + self.server.add_bucket_user(self.name, self.key, self.secret) + self.server.connect(self.network, self.domain) + return self + + def __exit__(self, *_: object) -> None: + self.server.rm_bucket_user(self.name, self.key) + self.server.disconnect(self.network, self.domain) + + def has_path(self, path: Path) -> bool: + """ + Return whether the given path (key) exists in this bucket + """ + return self.server.has_path(self.name, path) + + +@fixture +def bucket_fixture(context: Context, /, site: Site, use_subdomain: bool) -> Iterator[Bucket]: + """ + When used with `use_fixture`, creates and returns a `Bucket` fixture + + The 'use_subdomain' parameter selects which type of bucket addressing is used, out of + subdomain or path-based buckets. + """ + buckets = _buckets_from_context(context) + key: BucketsKey = site.url, use_subdomain + if (bucket := buckets.get(key)): + yield bucket + return + prev = buckets.get(CURRENT_BUCKET_KEY, None) + with Bucket(site.url, use_subdomain, site.network) as bucket: + buckets[key] = buckets[CURRENT_BUCKET_KEY] = bucket + yield bucket + del buckets[key] + if prev: + buckets[CURRENT_BUCKET_KEY] = prev + else: + del buckets[CURRENT_BUCKET_KEY] + + +@fixture +def current_bucket_fixture(context: Context) -> Iterator[Bucket]: + """ + When used with `use_fixture`, returns the most recently created `Bucket` fixture + """ + buckets = _buckets_from_context(context) + yield buckets[CURRENT_BUCKET_KEY] + + +def _buckets_from_context(context: Context) -> dict[BucketsKey, Bucket]: + if not hasattr(context, "buckets"): + context.buckets = dict[BucketsKey, Bucket]() + assert isinstance(context.buckets, dict) + return context.buckets diff --git a/tests/requirements.txt b/tests/requirements.txt index b0423a81089369cc07bf8439d6bdbef261ea2207..6555343b0650971b23306b897862fdabd64d36c0 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,6 +1,6 @@ Python ~=3.10; python_version < '3.10' behave -behave-utils ~=0.5.0 +behave-utils ~=0.6.1 requests ~=2.26 typing-extensions ~=4.0 diff --git a/tests/s3-media.feature b/tests/s3-media.feature new file mode 100644 index 0000000000000000000000000000000000000000..4e97e37e2a745653e35725ffa3e0afe1e6a780b9 --- /dev/null +++ b/tests/s3-media.feature @@ -0,0 +1,19 @@ +Feature: S3 Media Uploads + When an S3 bucket is configured for user-uploaded media, media should be + pushed to the bucket during an upload; requests for the media either + proxied or provided with a URL at the bucket; and pre-existing media + uploaded during container startup to facilitate migrations. + + Scenario: Pre-existing media in the uploads directory is uploaded to a path bucket + Given the site is not running + And the site is configured to use S3 with path buckets + And /app/media/some-content.txt exists in the backend + When the site is started + Then the S3 bucket has some-content.txt + + Scenario: Pre-existing media in the uploads directory is uploaded to a subdomain bucket + Given the site is not running + And the site is configured to use S3 with subdomain buckets + And /app/media/some-content.txt exists in the backend + When the site is started + Then the S3 bucket has some-content.txt diff --git a/tests/steps/s3.py b/tests/steps/s3.py new file mode 100644 index 0000000000000000000000000000000000000000..6e267c95c3b1d8ddfa15d157c7220ebbd5cb46d2 --- /dev/null +++ b/tests/steps/s3.py @@ -0,0 +1,56 @@ +# Copyright 2024 Dominik Sekotill +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Step implementations involving S3 integrations with a site +""" + +from __future__ import annotations + +import enum +from pathlib import Path + +from behave import given +from behave import then +from behave import use_fixture +from behave.runner import Context +from behave_utils.behave import PatternEnum +from minio import bucket_fixture +from minio import current_bucket_fixture +from wp import site_fixture + + +class BucketStyle(PatternEnum): + """ + An enum of the two types of S3 bucket: subdomain or path-based + """ + + path = enum.auto() + subdomain = enum.auto() + + +@given("the site is configured to use S3") +@given("the site is configured to use S3 with {style:BucketStyle} buckets") +def configure_site(context: Context, style: BucketStyle = BucketStyle.path) -> None: + """ + Create a Minio fixture and configure the current (unstarted) site to use it + """ + site = use_fixture(site_fixture, context) + bucket = use_fixture(bucket_fixture, context, site, style is BucketStyle.subdomain) + site.backend.env.update( + S3_MEDIA_ENDPOINT=bucket.url, + S3_MEDIA_KEY=bucket.key, + S3_MEDIA_SECRET=bucket.secret, + ) + + +@then("the S3 bucket has {path:Path}") +def bucket_has(context: Context, path: Path) -> None: + """ + Check to see that a configured Minio bucket has the given path in it + """ + bucket = use_fixture(current_bucket_fixture, context) + assert bucket.has_path(path) diff --git a/tests/steps/site.py b/tests/steps/site.py index 3a14a8119708ef61bfb2541ebbdfc10006546b2c..7db8193cc7950eb5e7d9ea5e81345e8cdeaa7ec7 100644 --- a/tests/steps/site.py +++ b/tests/steps/site.py @@ -14,7 +14,6 @@ from base64 import b32encode as b32 from collections.abc import Iterator from contextlib import contextmanager from pathlib import Path -from tempfile import NamedTemporaryFile from tempfile import TemporaryDirectory from behave import fixture @@ -24,7 +23,6 @@ from behave import use_fixture from behave import when from behave.runner import Context from behave_utils.behave import PatternEnum -from behave_utils.behave import register_pattern from behave_utils.docker import Cli from behave_utils.docker import Container from behave_utils.url import URL @@ -37,9 +35,6 @@ CONFIG_DIR = Path(__file__).parent.parent / "configs" DELAYED_SITE = URL("http://delayed.example.com") -register_pattern("\S+", Path) - - class Addon(PatternEnum): """ Addon types for WP; i.e. themes or plugins @@ -93,10 +88,10 @@ def container_file( return # For unstarted containers, write to a temporary file and add it to the volumes mapping - with NamedTemporaryFile("wb") as temp: - temp.write(contents) - temp.flush() - container.volumes.append((Path(temp.name), path)) + with TemporaryDirectory() as tdir: + temp = Path(tdir) / "temp.dat" + temp.write_bytes(contents) + container.volumes.append((temp, path)) yield