diff --git a/tests/test_storage.py b/tests/test_storage.py new file mode 100644 index 0000000..ba75be8 --- /dev/null +++ b/tests/test_storage.py @@ -0,0 +1,263 @@ +"""Tests for S3/MinIO storage backend in storage.py. + +Covers issue #1660: +- S3StorageBackend read, write, exists, path_for +- Error handling: NoSuchKey, generic S3 errors, bucket auto-creation +- get_storage_backend() factory function +- LocalStorageBackend (basic sanity checks) +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from SPARC.storage import LocalStorageBackend, S3StorageBackend, get_storage_backend + + +# ---------- S3StorageBackend ---------- + +class TestS3StorageBackend: + """Tests for the S3-compatible storage backend.""" + + @pytest.fixture + def s3_backend(self): + """Create an S3StorageBackend with a fully mocked boto3 client.""" + with patch.dict("sys.modules", {"boto3": MagicMock()}): + import boto3 as mock_boto + mock_s3 = MagicMock() + mock_boto.client.return_value = mock_s3 + mock_s3.head_bucket.return_value = {} + + backend = S3StorageBackend( + bucket="test-bucket", + endpoint_url="http://minio:9000", + access_key="minioadmin", + secret_key="minioadmin", + ) + # Expose mock for assertions + backend._mock_s3 = mock_s3 + yield backend + + def test_write_puts_object(self, s3_backend): + """write() calls put_object with correct bucket, key, and body.""" + s3_backend.write("US-12345678-B2.pdf", b"PDF content here") + + s3_backend._mock_s3.put_object.assert_called_once_with( + Bucket="test-bucket", + Key="US-12345678-B2.pdf", + Body=b"PDF content here", + ContentType="application/pdf", + ) + + def test_read_returns_body(self, s3_backend): + """read() returns the Body content from get_object.""" + mock_body = MagicMock() + mock_body.read.return_value = b"PDF data" + s3_backend._mock_s3.get_object.return_value = {"Body": mock_body} + + result = s3_backend.read("US-12345678-B2.pdf") + + assert result == b"PDF data" + s3_backend._mock_s3.get_object.assert_called_once_with( + Bucket="test-bucket", + Key="US-12345678-B2.pdf", + ) + + def test_read_nosuchkey_raises_file_not_found(self, s3_backend): + """read() raises FileNotFoundError when object does not exist.""" + # Create a NoSuchKey exception class on the mock + nosuchkey = type("NoSuchKey", (Exception,), {}) + s3_backend._mock_s3.exceptions.NoSuchKey = nosuchkey + s3_backend._mock_s3.get_object.side_effect = nosuchkey("not found") + + # Reassign s3 to trigger the except branch + s3_backend.s3 = s3_backend._mock_s3 + + with pytest.raises(FileNotFoundError, match="S3 object not found"): + s3_backend.read("missing.pdf") + + def test_read_generic_404_raises_file_not_found(self, s3_backend): + """read() handles generic 404 errors from S3-compatible APIs.""" + nosuchkey = type("NoSuchKey", (Exception,), {}) + s3_backend._mock_s3.exceptions.NoSuchKey = nosuchkey + s3_backend.s3 = s3_backend._mock_s3 + s3_backend.s3.get_object.side_effect = Exception("An error occurred (404)") + + with pytest.raises(FileNotFoundError, match="S3 object not found"): + s3_backend.read("missing.pdf") + + def test_read_other_error_re_raises(self, s3_backend): + """read() re-raises non-404 errors.""" + nosuchkey = type("NoSuchKey", (Exception,), {}) + s3_backend._mock_s3.exceptions.NoSuchKey = nosuchkey + s3_backend.s3 = s3_backend._mock_s3 + s3_backend.s3.get_object.side_effect = Exception("Internal server error") + + with pytest.raises(Exception, match="Internal server error"): + s3_backend.read("some-file.pdf") + + def test_exists_returns_true_for_existing_object(self, s3_backend): + """exists() returns True when head_object succeeds with content.""" + s3_backend._mock_s3.head_object.return_value = {"ContentLength": 1024} + + assert s3_backend.exists("US-12345678-B2.pdf") is True + + def test_exists_returns_false_for_missing_object(self, s3_backend): + """exists() returns False when head_object raises an exception.""" + s3_backend._mock_s3.head_object.side_effect = Exception("Not Found") + + assert s3_backend.exists("missing.pdf") is False + + def test_exists_returns_false_for_zero_length(self, s3_backend): + """exists() returns False when object has zero content length.""" + s3_backend._mock_s3.head_object.return_value = {"ContentLength": 0} + + assert s3_backend.exists("empty.pdf") is False + + def test_path_for_returns_s3_uri(self, s3_backend): + """path_for() returns an s3:// URI.""" + path = s3_backend.path_for("US-12345678-B2.pdf") + + assert path == "s3://test-bucket/US-12345678-B2.pdf" + + def test_constructor_creates_bucket_if_missing(self): + """Constructor creates the bucket if head_bucket fails.""" + with patch.dict("sys.modules", {"boto3": MagicMock()}): + import boto3 as mock_boto + mock_s3 = MagicMock() + mock_boto.client.return_value = mock_s3 + mock_s3.head_bucket.side_effect = Exception("Bucket not found") + + S3StorageBackend( + bucket="new-bucket", + endpoint_url="http://minio:9000", + access_key="admin", + secret_key="admin", + ) + + mock_s3.create_bucket.assert_called_once_with(Bucket="new-bucket") + + def test_constructor_handles_bucket_creation_failure(self): + """Constructor logs warning but does not crash if bucket creation fails.""" + with patch.dict("sys.modules", {"boto3": MagicMock()}): + import boto3 as mock_boto + mock_s3 = MagicMock() + mock_boto.client.return_value = mock_s3 + mock_s3.head_bucket.side_effect = Exception("Bucket not found") + mock_s3.create_bucket.side_effect = Exception("Permission denied") + + # Should not raise + backend = S3StorageBackend( + bucket="locked-bucket", + endpoint_url="http://minio:9000", + access_key="admin", + secret_key="admin", + ) + assert backend.bucket == "locked-bucket" + + def test_constructor_passes_endpoint_and_credentials(self): + """Constructor passes endpoint_url and credentials to boto3.client.""" + with patch.dict("sys.modules", {"boto3": MagicMock()}): + import boto3 as mock_boto + mock_s3 = MagicMock() + mock_boto.client.return_value = mock_s3 + + S3StorageBackend( + bucket="test", + endpoint_url="http://minio:9000", + access_key="mykey", + secret_key="mysecret", + ) + + mock_boto.client.assert_called_with( + "s3", + endpoint_url="http://minio:9000", + aws_access_key_id="mykey", + aws_secret_access_key="mysecret", + ) + + +# ---------- LocalStorageBackend ---------- + +class TestLocalStorageBackend: + """Basic sanity checks for the local filesystem backend.""" + + def test_write_and_read(self, tmp_path): + """Write and read round-trip produces identical content.""" + backend = LocalStorageBackend(base_dir=str(tmp_path)) + backend.write("test.pdf", b"hello world") + + result = backend.read("test.pdf") + assert result == b"hello world" + + def test_read_missing_file_raises(self, tmp_path): + """Reading a non-existent file raises FileNotFoundError.""" + backend = LocalStorageBackend(base_dir=str(tmp_path)) + + with pytest.raises(FileNotFoundError): + backend.read("nonexistent.pdf") + + def test_exists_true_for_written_file(self, tmp_path): + """exists() returns True after writing a file.""" + backend = LocalStorageBackend(base_dir=str(tmp_path)) + backend.write("test.pdf", b"data") + + assert backend.exists("test.pdf") is True + + def test_exists_false_for_missing_file(self, tmp_path): + """exists() returns False for non-existent file.""" + backend = LocalStorageBackend(base_dir=str(tmp_path)) + + assert backend.exists("missing.pdf") is False + + def test_exists_false_for_empty_file(self, tmp_path): + """exists() returns False for zero-length file.""" + backend = LocalStorageBackend(base_dir=str(tmp_path)) + backend.write("empty.pdf", b"") + + assert backend.exists("empty.pdf") is False + + def test_path_for_returns_full_path(self, tmp_path): + """path_for() returns the full filesystem path.""" + backend = LocalStorageBackend(base_dir=str(tmp_path)) + path = backend.path_for("test.pdf") + + assert path == str(tmp_path / "test.pdf") + + +# ---------- get_storage_backend() factory ---------- + +class TestGetStorageBackend: + """Tests for the storage backend factory function.""" + + @patch("SPARC.storage.config") + def test_returns_local_backend_by_default(self, mock_config): + """Default config returns LocalStorageBackend.""" + mock_config.storage_backend = "local" + + backend = get_storage_backend() + + assert isinstance(backend, LocalStorageBackend) + + @patch("SPARC.storage.config") + def test_returns_s3_backend_when_configured(self, mock_config): + """Setting storage_backend=s3 returns S3StorageBackend.""" + mock_config.storage_backend = "s3" + mock_config.s3_bucket = "test-bucket" + mock_config.s3_endpoint_url = "http://minio:9000" + mock_config.s3_access_key = "key" + mock_config.s3_secret_key = "secret" + + with patch.dict("sys.modules", {"boto3": MagicMock()}): + backend = get_storage_backend() + + assert isinstance(backend, S3StorageBackend) + + @patch("SPARC.storage.config") + def test_case_insensitive_backend_selection(self, mock_config): + """Backend selection is case-insensitive.""" + mock_config.storage_backend = "LOCAL" + + backend = get_storage_backend() + + assert isinstance(backend, LocalStorageBackend)