"""Tests for S3/MinIO storage backend in storage.py. Covers issue #1660: - S3StorageBackend read, write, exists, path_for - Error handling: NoSuchKey, generic S3 errors, bucket auto-creation - get_storage_backend() factory function - LocalStorageBackend (basic sanity checks) """ from unittest.mock import MagicMock, patch import pytest from SPARC.storage import LocalStorageBackend, S3StorageBackend, get_storage_backend # ---------- S3StorageBackend ---------- class TestS3StorageBackend: """Tests for the S3-compatible storage backend.""" @pytest.fixture def s3_backend(self): """Create an S3StorageBackend with a fully mocked boto3 client.""" with patch.dict("sys.modules", {"boto3": MagicMock()}): import boto3 as mock_boto mock_s3 = MagicMock() mock_boto.client.return_value = mock_s3 mock_s3.head_bucket.return_value = {} backend = S3StorageBackend( bucket="test-bucket", endpoint_url="http://minio:9000", access_key="minioadmin", secret_key="minioadmin", ) # Expose mock for assertions backend._mock_s3 = mock_s3 yield backend def test_write_puts_object(self, s3_backend): """write() calls put_object with correct bucket, key, and body.""" s3_backend.write("US-12345678-B2.pdf", b"PDF content here") s3_backend._mock_s3.put_object.assert_called_once_with( Bucket="test-bucket", Key="US-12345678-B2.pdf", Body=b"PDF content here", ContentType="application/pdf", ) def test_read_returns_body(self, s3_backend): """read() returns the Body content from get_object.""" mock_body = MagicMock() mock_body.read.return_value = b"PDF data" s3_backend._mock_s3.get_object.return_value = {"Body": mock_body} result = s3_backend.read("US-12345678-B2.pdf") assert result == b"PDF data" s3_backend._mock_s3.get_object.assert_called_once_with( Bucket="test-bucket", Key="US-12345678-B2.pdf", ) def test_read_nosuchkey_raises_file_not_found(self, s3_backend): """read() raises FileNotFoundError when object does not exist.""" # Create a NoSuchKey exception class on the mock nosuchkey = type("NoSuchKey", (Exception,), {}) s3_backend._mock_s3.exceptions.NoSuchKey = nosuchkey s3_backend._mock_s3.get_object.side_effect = nosuchkey("not found") # Reassign s3 to trigger the except branch s3_backend.s3 = s3_backend._mock_s3 with pytest.raises(FileNotFoundError, match="S3 object not found"): s3_backend.read("missing.pdf") def test_read_generic_404_raises_file_not_found(self, s3_backend): """read() handles generic 404 errors from S3-compatible APIs.""" nosuchkey = type("NoSuchKey", (Exception,), {}) s3_backend._mock_s3.exceptions.NoSuchKey = nosuchkey s3_backend.s3 = s3_backend._mock_s3 s3_backend.s3.get_object.side_effect = Exception("An error occurred (404)") with pytest.raises(FileNotFoundError, match="S3 object not found"): s3_backend.read("missing.pdf") def test_read_other_error_re_raises(self, s3_backend): """read() re-raises non-404 errors.""" nosuchkey = type("NoSuchKey", (Exception,), {}) s3_backend._mock_s3.exceptions.NoSuchKey = nosuchkey s3_backend.s3 = s3_backend._mock_s3 s3_backend.s3.get_object.side_effect = Exception("Internal server error") with pytest.raises(Exception, match="Internal server error"): s3_backend.read("some-file.pdf") def test_exists_returns_true_for_existing_object(self, s3_backend): """exists() returns True when head_object succeeds with content.""" s3_backend._mock_s3.head_object.return_value = {"ContentLength": 1024} assert s3_backend.exists("US-12345678-B2.pdf") is True def test_exists_returns_false_for_missing_object(self, s3_backend): """exists() returns False when head_object raises an exception.""" s3_backend._mock_s3.head_object.side_effect = Exception("Not Found") assert s3_backend.exists("missing.pdf") is False def test_exists_returns_false_for_zero_length(self, s3_backend): """exists() returns False when object has zero content length.""" s3_backend._mock_s3.head_object.return_value = {"ContentLength": 0} assert s3_backend.exists("empty.pdf") is False def test_path_for_returns_s3_uri(self, s3_backend): """path_for() returns an s3:// URI.""" path = s3_backend.path_for("US-12345678-B2.pdf") assert path == "s3://test-bucket/US-12345678-B2.pdf" def test_constructor_creates_bucket_if_missing(self): """Constructor creates the bucket if head_bucket fails.""" with patch.dict("sys.modules", {"boto3": MagicMock()}): import boto3 as mock_boto mock_s3 = MagicMock() mock_boto.client.return_value = mock_s3 mock_s3.head_bucket.side_effect = Exception("Bucket not found") S3StorageBackend( bucket="new-bucket", endpoint_url="http://minio:9000", access_key="admin", secret_key="admin", ) mock_s3.create_bucket.assert_called_once_with(Bucket="new-bucket") def test_constructor_handles_bucket_creation_failure(self): """Constructor logs warning but does not crash if bucket creation fails.""" with patch.dict("sys.modules", {"boto3": MagicMock()}): import boto3 as mock_boto mock_s3 = MagicMock() mock_boto.client.return_value = mock_s3 mock_s3.head_bucket.side_effect = Exception("Bucket not found") mock_s3.create_bucket.side_effect = Exception("Permission denied") # Should not raise backend = S3StorageBackend( bucket="locked-bucket", endpoint_url="http://minio:9000", access_key="admin", secret_key="admin", ) assert backend.bucket == "locked-bucket" def test_constructor_passes_endpoint_and_credentials(self): """Constructor passes endpoint_url and credentials to boto3.client.""" with patch.dict("sys.modules", {"boto3": MagicMock()}): import boto3 as mock_boto mock_s3 = MagicMock() mock_boto.client.return_value = mock_s3 S3StorageBackend( bucket="test", endpoint_url="http://minio:9000", access_key="mykey", secret_key="mysecret", ) mock_boto.client.assert_called_with( "s3", endpoint_url="http://minio:9000", aws_access_key_id="mykey", aws_secret_access_key="mysecret", ) # ---------- LocalStorageBackend ---------- class TestLocalStorageBackend: """Basic sanity checks for the local filesystem backend.""" def test_write_and_read(self, tmp_path): """Write and read round-trip produces identical content.""" backend = LocalStorageBackend(base_dir=str(tmp_path)) backend.write("test.pdf", b"hello world") result = backend.read("test.pdf") assert result == b"hello world" def test_read_missing_file_raises(self, tmp_path): """Reading a non-existent file raises FileNotFoundError.""" backend = LocalStorageBackend(base_dir=str(tmp_path)) with pytest.raises(FileNotFoundError): backend.read("nonexistent.pdf") def test_exists_true_for_written_file(self, tmp_path): """exists() returns True after writing a file.""" backend = LocalStorageBackend(base_dir=str(tmp_path)) backend.write("test.pdf", b"data") assert backend.exists("test.pdf") is True def test_exists_false_for_missing_file(self, tmp_path): """exists() returns False for non-existent file.""" backend = LocalStorageBackend(base_dir=str(tmp_path)) assert backend.exists("missing.pdf") is False def test_exists_false_for_empty_file(self, tmp_path): """exists() returns False for zero-length file.""" backend = LocalStorageBackend(base_dir=str(tmp_path)) backend.write("empty.pdf", b"") assert backend.exists("empty.pdf") is False def test_path_for_returns_full_path(self, tmp_path): """path_for() returns the full filesystem path.""" backend = LocalStorageBackend(base_dir=str(tmp_path)) path = backend.path_for("test.pdf") assert path == str(tmp_path / "test.pdf") # ---------- get_storage_backend() factory ---------- class TestGetStorageBackend: """Tests for the storage backend factory function.""" @patch("SPARC.storage.config") def test_returns_local_backend_by_default(self, mock_config): """Default config returns LocalStorageBackend.""" mock_config.storage_backend = "local" backend = get_storage_backend() assert isinstance(backend, LocalStorageBackend) @patch("SPARC.storage.config") def test_returns_s3_backend_when_configured(self, mock_config): """Setting storage_backend=s3 returns S3StorageBackend.""" mock_config.storage_backend = "s3" mock_config.s3_bucket = "test-bucket" mock_config.s3_endpoint_url = "http://minio:9000" mock_config.s3_access_key = "key" mock_config.s3_secret_key = "secret" with patch.dict("sys.modules", {"boto3": MagicMock()}): backend = get_storage_backend() assert isinstance(backend, S3StorageBackend) @patch("SPARC.storage.config") def test_case_insensitive_backend_selection(self, mock_config): """Backend selection is case-insensitive.""" mock_config.storage_backend = "LOCAL" backend = get_storage_backend() assert isinstance(backend, LocalStorageBackend)