From 17ff4260d4948b1cb52be9e510674a496b4b1310 Mon Sep 17 00:00:00 2001 From: Dom Sekotill Date: Sat, 29 Jun 2024 01:53:52 +0100 Subject: [PATCH 1/4] Add S3_MEDIA settings tests --- .pre-commit-config.yaml | 2 +- tests/environment.py | 7 +- tests/minio.py | 254 ++++++++++++++++++++++++++++++++++++++++ tests/requirements.txt | 2 +- tests/s3-media.feature | 19 +++ tests/steps/s3.py | 56 +++++++++ tests/steps/site.py | 4 - 7 files changed, 337 insertions(+), 7 deletions(-) create mode 100644 tests/minio.py create mode 100644 tests/s3-media.feature create mode 100644 tests/steps/s3.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 078a9d1..03aa468 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -93,4 +93,4 @@ repos: args: ["--python-version=3.10"] additional_dependencies: - types-requests - - behave-utils ~=0.4.1 + - behave-utils ~=0.6.1 diff --git a/tests/environment.py b/tests/environment.py index 5f2f04b..8e6abe3 100644 --- a/tests/environment.py +++ b/tests/environment.py @@ -1,4 +1,4 @@ -# Copyright 2021-2023 Dominik Sekotill +# Copyright 2021-2024 Dominik Sekotill # # This Source Code Form is subject to the terms of the Mozilla Public # License, v. 2.0. If a copy of the MPL was not distributed with this @@ -15,12 +15,14 @@ https://behave.readthedocs.io/en/stable/tutorial.html#environmental-controls from __future__ import annotations import sys +from pathlib import Path from typing import TYPE_CHECKING from behave import use_fixture from behave.model import Feature from behave.model import Scenario from behave.runner import Context +from behave_utils.behave import register_pattern from behave_utils.mysql import snapshot_rollback from wp import running_site_fixture @@ -29,6 +31,9 @@ if TYPE_CHECKING: from behave.runner import ScenarioContext +register_pattern("\S+", Path) + + def before_all(context: Context) -> None: """ Prepare fixtures for all tests diff --git a/tests/minio.py b/tests/minio.py new file mode 100644 index 0000000..5ee9579 --- /dev/null +++ b/tests/minio.py @@ -0,0 +1,254 @@ +# Copyright 2024 Dominik Sekotill +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Fixtures for S3 services +""" + +from __future__ import annotations + +import atexit +import json +from collections.abc import Iterator +from gzip import GzipFile +from pathlib import Path +from typing import ClassVar +from typing import TypeAlias +from uuid import NAMESPACE_URL +from uuid import uuid3 + +from behave import fixture +from behave.runner import Context +from behave_utils import URL +from behave_utils.binaries import DownloadableExecutable +from behave_utils.docker import Cli +from behave_utils.docker import Container +from behave_utils.docker import Image +from behave_utils.docker import Network +from behave_utils.secret import make_secret +from behave_utils.utils import wait +from requests import Session +from typing_extensions import Self +from wp import Site + +BucketsKey: TypeAlias = tuple[str, bool] + +CURRENT_BUCKET_KEY: BucketsKey = "current://", True + +MINIO_IMAGE = Image.pull(f"quay.io/minio/minio:latest") + +__all__ = [ + "Bucket", + "Minio", + "bucket_fixture", + "current_bucket_fixture", +] + + +class DownloadableMC(DownloadableExecutable, name="minio-client"): + + def get_latest(self, session: Session) -> str: + return "latest" + + def get_stream(self, session: Session, version: str) -> GzipFile: + binary = "mc.exe" if self.kernel == "windows" else "mc" + url = f"https://dl.min.io/client/mc/release/{self.kernel}-{self.goarch}/{binary}" + resp = session.get(url, allow_redirects=True, stream=True) + assert resp.raw is not None + return GzipFile(fileobj=resp.raw) + + +class Minio(Container): + """ + A `Container` subclass to run and manage a Minio S3 service + """ + + _inst: ClassVar[Minio|None] = None + + domain = "s3.test.local" + + @classmethod + def get_running(cls) -> Minio: + """ + Return a running instance of the Minio server + """ + if not (self := cls._inst): + self = cls._inst = Minio() + self.start() + return self + + def __init__(self) -> None: + self.key = make_secret(8) + self.secret = make_secret(20) + + mc_bin = DownloadableMC("latest").get_binary() + Container.__init__( + self, MINIO_IMAGE, + ["server", "/tmp", "--address=:80", "--console-address=:9001"], + volumes=[(mc_bin, Path("/bin/mc"))], + env=dict( + MINIO_ROOT_USER=self.key, + MINIO_ROOT_PASSWORD=self.secret, + MINIO_DOMAIN=self.domain, + ), + ) + self.mc = Cli(self, "/bin/mc") + + def start(self) -> None: + """ + Idempotently start the service, and register an alias to it with the *mc* tool + """ + if self.is_running(): + return + super().start() + atexit.register(self.stop, rm=True) + wait(lambda: self.is_running()) + # Add "local" alias + self.mc("config", "host", "add", "local", "http://localhost", self.key, self.secret) + + def bucket_domain(self, bucket: str, use_subdomain: bool) -> str: + """ + Return the domain name to use for a given bucket + + If 'use_subdomain' is `True` the name will consist of the bucket name as a subdomain + of the server domain, otherwise it be the same as the server' domain. + """ + return f"{bucket}.{self.domain}" if use_subdomain else f"{self.domain}" + + def bucket_url(self, bucket: str, use_subdomain: bool) -> URL: + """ + Return the HTTP endpoint URL to use for the given bucket + + The URL will have the domain as returned by `Minio.bucket_domain`, with the bucket + name as the only path component if 'use_subdomain' is `False`. + """ + domain = self.bucket_domain(bucket, use_subdomain) + base = URL(f"http://{domain}") + return base if use_subdomain else (base / bucket) + + def add_bucket_user(self, bucket: str, key: str, secret: str) -> None: + """ + Create an ephemeral bucket and account with access to that bucket + """ + policy = f"{bucket}-write" + stmt = json.dumps( + dict( + Version="2012-10-17", + Statement=[ + dict( + Effect="Allow", + Action=["s3:ListBucket"], + Resource=[f"arn:aws:s3:::{bucket}"], + ), + dict( + Effect="Allow", + Action=["s3:GetObject", "s3:PutObject", "s3:DeleteObject"], + Resource=[f"arn:aws:s3:::{bucket}/*"], + ), + ], + ), + ) + self.mc("mb", f"local/{bucket}") + self.mc("admin", "user", "add", "local", key, secret) + self.mc("admin", "policy", "create", "local", policy, "/dev/stdin", input=stmt) + self.mc("admin", "policy", "attach", "local", policy, "--user", key) + + def rm_bucket_user(self, bucket: str, key: str) -> None: + """ + Remove an ephemeral bucket and user + """ + self.mc("admin", "user", "rm", "local", key) + self.mc("rb", f"local/{bucket}", "--force") + + def has_path(self, bucket: str, path: Path) -> bool: + """ + Return whether the given path (key) exists in the named bucket + """ + res = self.mc("stat", f"local/{bucket}/{path}", query=True) + # import time + # time.sleep(3600) + return res == 0 + + +class Bucket: + """ + An ephemeral bucket fixture within an S3 service + + To create the bucket on the server and update name records, an instance must be used as + a context manager. Once the context ends, the bucket is removed. + """ + + def __init__( + self, + ident: str, + use_subdomain: bool, + network: Network, + server: Minio|None = None, + ): + self.network = network + self.server = server or Minio.get_running() + + self.name = str(uuid3(NAMESPACE_URL, ident)) + self.domain = self.server.bucket_domain(self.name, use_subdomain) + self.url = self.server.bucket_url(self.name, use_subdomain) + self.key = make_secret(8) + self.secret = make_secret(20) + + def __enter__(self) -> Self: + self.server.start() # Ensure server is started, method is idempotent + self.server.add_bucket_user(self.name, self.key, self.secret) + self.server.connect(self.network, self.domain) + return self + + def __exit__(self, *_: object) -> None: + self.server.rm_bucket_user(self.name, self.key) + self.server.disconnect(self.network, self.domain) + + def has_path(self, path: Path) -> bool: + """ + Return whether the given path (key) exists in this bucket + """ + return self.server.has_path(self.name, path) + + +@fixture +def bucket_fixture(context: Context, /, site: Site, use_subdomain: bool) -> Iterator[Bucket]: + """ + When used with `use_fixture`, creates and returns a `Bucket` fixture + + The 'use_subdomain' parameter selects which type of bucket addressing is used, out of + subdomain or path-based buckets. + """ + buckets = _buckets_from_context(context) + key: BucketsKey = site.url, use_subdomain + if (bucket := buckets.get(key)): + yield bucket + return + prev = buckets.get(CURRENT_BUCKET_KEY, None) + with Bucket(site.url, use_subdomain, site.network) as bucket: + buckets[key] = buckets[CURRENT_BUCKET_KEY] = bucket + yield bucket + del buckets[key] + if prev: + buckets[CURRENT_BUCKET_KEY] = prev + else: + del buckets[CURRENT_BUCKET_KEY] + + +@fixture +def current_bucket_fixture(context: Context) -> Iterator[Bucket]: + """ + When used with `use_fixture`, returns the most recently created `Bucket` fixture + """ + buckets = _buckets_from_context(context) + yield buckets[CURRENT_BUCKET_KEY] + + +def _buckets_from_context(context: Context) -> dict[BucketsKey, Bucket]: + if not hasattr(context, "buckets"): + context.buckets = dict[BucketsKey, Bucket]() + assert isinstance(context.buckets, dict) + return context.buckets diff --git a/tests/requirements.txt b/tests/requirements.txt index b0423a8..6555343 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -1,6 +1,6 @@ Python ~=3.10; python_version < '3.10' behave -behave-utils ~=0.5.0 +behave-utils ~=0.6.1 requests ~=2.26 typing-extensions ~=4.0 diff --git a/tests/s3-media.feature b/tests/s3-media.feature new file mode 100644 index 0000000..4e97e37 --- /dev/null +++ b/tests/s3-media.feature @@ -0,0 +1,19 @@ +Feature: S3 Media Uploads + When an S3 bucket is configured for user-uploaded media, media should be + pushed to the bucket during an upload; requests for the media either + proxied or provided with a URL at the bucket; and pre-existing media + uploaded during container startup to facilitate migrations. + + Scenario: Pre-existing media in the uploads directory is uploaded to a path bucket + Given the site is not running + And the site is configured to use S3 with path buckets + And /app/media/some-content.txt exists in the backend + When the site is started + Then the S3 bucket has some-content.txt + + Scenario: Pre-existing media in the uploads directory is uploaded to a subdomain bucket + Given the site is not running + And the site is configured to use S3 with subdomain buckets + And /app/media/some-content.txt exists in the backend + When the site is started + Then the S3 bucket has some-content.txt diff --git a/tests/steps/s3.py b/tests/steps/s3.py new file mode 100644 index 0000000..6e267c9 --- /dev/null +++ b/tests/steps/s3.py @@ -0,0 +1,56 @@ +# Copyright 2024 Dominik Sekotill +# +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. + +""" +Step implementations involving S3 integrations with a site +""" + +from __future__ import annotations + +import enum +from pathlib import Path + +from behave import given +from behave import then +from behave import use_fixture +from behave.runner import Context +from behave_utils.behave import PatternEnum +from minio import bucket_fixture +from minio import current_bucket_fixture +from wp import site_fixture + + +class BucketStyle(PatternEnum): + """ + An enum of the two types of S3 bucket: subdomain or path-based + """ + + path = enum.auto() + subdomain = enum.auto() + + +@given("the site is configured to use S3") +@given("the site is configured to use S3 with {style:BucketStyle} buckets") +def configure_site(context: Context, style: BucketStyle = BucketStyle.path) -> None: + """ + Create a Minio fixture and configure the current (unstarted) site to use it + """ + site = use_fixture(site_fixture, context) + bucket = use_fixture(bucket_fixture, context, site, style is BucketStyle.subdomain) + site.backend.env.update( + S3_MEDIA_ENDPOINT=bucket.url, + S3_MEDIA_KEY=bucket.key, + S3_MEDIA_SECRET=bucket.secret, + ) + + +@then("the S3 bucket has {path:Path}") +def bucket_has(context: Context, path: Path) -> None: + """ + Check to see that a configured Minio bucket has the given path in it + """ + bucket = use_fixture(current_bucket_fixture, context) + assert bucket.has_path(path) diff --git a/tests/steps/site.py b/tests/steps/site.py index 3a14a81..c066572 100644 --- a/tests/steps/site.py +++ b/tests/steps/site.py @@ -24,7 +24,6 @@ from behave import use_fixture from behave import when from behave.runner import Context from behave_utils.behave import PatternEnum -from behave_utils.behave import register_pattern from behave_utils.docker import Cli from behave_utils.docker import Container from behave_utils.url import URL @@ -37,9 +36,6 @@ CONFIG_DIR = Path(__file__).parent.parent / "configs" DELAYED_SITE = URL("http://delayed.example.com") -register_pattern("\S+", Path) - - class Addon(PatternEnum): """ Addon types for WP; i.e. themes or plugins -- GitLab From 5b3d9810c2f4d87bf0d0620cddd8d9591711e111 Mon Sep 17 00:00:00 2001 From: Dom Sekotill Date: Sat, 29 Jun 2024 02:20:57 +0100 Subject: [PATCH 2/4] Fix temp file fixture re. owner changes When temporary files were bind mounted into containers, then their owner uid was changed, they could not be deleted during cleanup. Solution is to put them in temporary directories and remove those. --- tests/steps/site.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/tests/steps/site.py b/tests/steps/site.py index c066572..7db8193 100644 --- a/tests/steps/site.py +++ b/tests/steps/site.py @@ -14,7 +14,6 @@ from base64 import b32encode as b32 from collections.abc import Iterator from contextlib import contextmanager from pathlib import Path -from tempfile import NamedTemporaryFile from tempfile import TemporaryDirectory from behave import fixture @@ -89,10 +88,10 @@ def container_file( return # For unstarted containers, write to a temporary file and add it to the volumes mapping - with NamedTemporaryFile("wb") as temp: - temp.write(contents) - temp.flush() - container.volumes.append((Path(temp.name), path)) + with TemporaryDirectory() as tdir: + temp = Path(tdir) / "temp.dat" + temp.write_bytes(contents) + container.volumes.append((temp, path)) yield -- GitLab From a40857f7ecb84603ef9eeba19642d9fb3778666f Mon Sep 17 00:00:00 2001 From: Dom Sekotill Date: Sat, 29 Jun 2024 02:24:43 +0100 Subject: [PATCH 3/4] Fix S3_MEDIA settings for subdomain style buckets --- plugins/docker-integration.php | 3 +-- scripts/entrypoint.sh | 28 ++++++++++++++++++++++++---- 2 files changed, 25 insertions(+), 6 deletions(-) diff --git a/plugins/docker-integration.php b/plugins/docker-integration.php index a30e2fa..63f3b5a 100644 --- a/plugins/docker-integration.php +++ b/plugins/docker-integration.php @@ -65,8 +65,7 @@ add_filter( function ( $params ) { $params['endpoint'] = S3_MEDIA_ENDPOINT; - $params['bucket_endpoint'] = true; - $params['disable_host_prefix_injection'] = true; + $params['bucket_endpoint'] = S3_MEDIA_DISABLE_INJECTION; $params['use_path_style_endpoint'] = true; $params['debug'] = S3_DEBUG; return $params; diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index 853bc1d..ac0ee00 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -52,6 +52,11 @@ timestamp() echo "[$(date --utc +'%Y-%m-%dT%H:%M:%S%z')] $*" } +fatal() +{ + timestamp >&2 "FATAL:" "$@" +} + create_config() { [[ -f wp-config.php ]] && unlink wp-config.php @@ -115,17 +120,32 @@ setup_s3() { [[ -v S3_MEDIA_SECRET ]] || return 0 - wp config set S3_UPLOADS_BUCKET_URL "${S3_MEDIA_REWRITE_URL-$S3_MEDIA_ENDPOINT}" + local path_prefix=${S3_MEDIA_PREFIX+/${S3_MEDIA_PREFIX#/}} + local rewrite_url=${S3_MEDIA_ENDPOINT%/}${path_prefix} + + if [[ $S3_MEDIA_ENDPOINT =~ ^(https?://[^/]+)/([^/]+)/?(.*) ]]; then + [[ ${#BASH_REMATCH[3]} -gt 0 ]] && fatal \ + "S3_MEDIA_ENDPOINT may only contain a bucket name as a path," \ + "provide S3 path prefixes with S3_MEDIA_PREFIX" + wp config set S3_MEDIA_ENDPOINT "${BASH_REMATCH[1]}" + wp config set S3_UPLOADS_BUCKET "${BASH_REMATCH[2]}${path_prefix}" + wp config set S3_MEDIA_DISABLE_INJECTION false --raw + else + wp config set S3_MEDIA_ENDPOINT "${S3_MEDIA_ENDPOINT}" + # The plugin needs a value, but it is not injected into URLS + wp config set S3_UPLOADS_BUCKET "media-bucket" + wp config set S3_MEDIA_DISABLE_INJECTION true --raw + fi + + # Workaround for hardcoded amazonaws.com URL in plugin + wp config set S3_UPLOADS_BUCKET_URL "${S3_MEDIA_REWRITE_URL-$rewrite_url}" - wp config set S3_MEDIA_ENDPOINT "${S3_MEDIA_ENDPOINT}" wp config set S3_UPLOADS_KEY "${S3_MEDIA_KEY}" wp config set S3_UPLOADS_SECRET "${S3_MEDIA_SECRET}" --quiet # Plugin requires something here, it's not used wp config set S3_UPLOADS_REGION 'eu-west-1' - wp config set S3_UPLOADS_BUCKET "media-bucket" - # If there is anything in ./media, upload it local contents=( media/* ) [[ ${#contents[*]} -gt 0 ]] && -- GitLab From cffb22ff062ff288c9fb0bf6dcc8009c763b83f5 Mon Sep 17 00:00:00 2001 From: Dom Sekotill Date: Sat, 29 Jun 2024 13:05:01 +0100 Subject: [PATCH 4/4] Upload to S3 from dynamically configured media uploads directory --- scripts/entrypoint.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/scripts/entrypoint.sh b/scripts/entrypoint.sh index ac0ee00..7b64e69 100755 --- a/scripts/entrypoint.sh +++ b/scripts/entrypoint.sh @@ -146,10 +146,11 @@ setup_s3() { # Plugin requires something here, it's not used wp config set S3_UPLOADS_REGION 'eu-west-1' - # If there is anything in ./media, upload it - local contents=( media/* ) + # If there is anything in the media dir, upload it + get_writable_dirs + local contents=( "$MEDIA"/* ) [[ ${#contents[*]} -gt 0 ]] && - wp s3-uploads upload-directory media + wp s3-uploads upload-directory "$MEDIA" # Clear potentialy sensitive information from environment lest it leaks unset ${!S3_MEDIA_*} -- GitLab