added sandbox and bash tool

This commit is contained in:
2026-02-20 20:00:52 -07:00
parent 8b62f946ca
commit 93ce413c9b
10 changed files with 419 additions and 11 deletions
+26
View File
@@ -54,3 +54,29 @@ def sample_history():
{"role": "assistance", "content": "Hi there!"},
{"role": "user", "content": "What's 2+2?"},
]
@pytest.fixture
def mock_sandbox():
"""Mock sandbox for unit tests."""
sandbox = MagicMock()
sandbox.run = AsyncMock(return_value="mock output\n")
return sandbox
@pytest.fixture
def crashed_sandbox():
"""Mock sandbox that simulates a crash."""
sandbox = MagicMock()
sandbox.run = AsyncMock(side_effect=RuntimeError("Container crashed unexpectedly"))
return sandbox
@pytest.fixture
def mock_podman_client():
"""Mock Podman client for unit tests."""
with patch("sandbox.session.podman.PodmanClient") as mock:
mock_container = MagicMock()
mock_container.exec_run.return_value = (0, b"mock output\n")
mock.return_value.containers.run.return_value = mock_container
yield mock, mock_container
+1 -1
View File
@@ -19,7 +19,7 @@ async def test_run_turn_basic(mock_anthropic_client):
mock_anthropic_client.messages.create.assert_called_once()
# verify message returned
assert result.content[0].text == "42"
assert result == "42"
# verify call has correct parameters
call_args = mock_anthropic_client.messages.create.call_args
+187
View File
@@ -0,0 +1,187 @@
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
# ─── Unit Tests ───────────────────────────────────────────────
@pytest.mark.unit
def test_sandbox_initializes():
"""Test PodmanSandbox creates client on init."""
with patch("sandbox.session.podman.PodmanClient") as mock_client:
from sandbox.session import PodmanSandbox
sb = PodmanSandbox()
# Client should be created
mock_client.assert_called_once()
assert sb.container is None # Not started yet
@pytest.mark.unit
async def test_sandbox_starts_container():
"""Test that __aenter__ starts a container."""
with patch("sandbox.session.podman.PodmanClient") as mock_client:
# Mock the container
mock_container = MagicMock()
mock_client.return_value.containers.run.return_value = mock_container
from sandbox.session import PodmanSandbox
sb = PodmanSandbox()
await sb.__aenter__()
# Container should be running
mock_client.return_value.containers.run.assert_called_once()
assert sb.container is mock_container
@pytest.mark.unit
async def test_sandbox_stops_container_on_exit():
"""Test that __aexit__ stops the container."""
with patch("sandbox.session.podman.PodmanClient") as mock_client:
mock_container = MagicMock()
mock_client.return_value.containers.run.return_value = mock_container
from sandbox.session import PodmanSandbox
sb = PodmanSandbox()
await sb.__aenter__()
await sb.__aexit__(None, None, None)
# Container should be stopped
mock_container.stop.assert_called_once()
@pytest.mark.unit
async def test_sandbox_run_executes_command():
"""Test that run() passes command to container."""
with patch("sandbox.session.podman.PodmanClient") as mock_client:
# Mock exec_run return value
mock_container = MagicMock()
mock_container.exec_run.return_value = (0, b"hello from sandbox\n")
mock_client.return_value.containers.run.return_value = mock_container
from sandbox.session import PodmanSandbox
sb = PodmanSandbox()
await sb.__aenter__()
result = await sb.run("echo 'hello from sandbox'")
# Verify exec_run was called with shell wrapper
mock_container.exec_run.assert_called_once_with(
["/bin/sh", "-c", "echo 'hello from sandbox'"], workdir="/workspace"
)
assert result == "hello from sandbox\n"
@pytest.mark.unit
async def test_tool_call_fails_if_sandbox_crashes():
"""Test that tool calls fail gracefully when sandbox is unavailable."""
from tools.bash import bash
# Simulate crashed sandbox (container is None)
mock_sandbox = MagicMock()
mock_sandbox.run = AsyncMock(side_effect=RuntimeError("Container crashed"))
result = await bash("ls -la", mock_sandbox)
# Should return error message, not raise exception
assert "error" in result.lower()
@pytest.mark.unit
async def test_tool_call_with_no_sandbox():
"""Test tool call handles None sandbox gracefully."""
from tools.bash import bash
result = await bash("ls -la", sandbox=None)
# Should return error, not crash
assert "error" in result.lower()
@pytest.mark.unit
async def test_sandbox_cleanup_on_crash():
"""Test __aexit__ handles container stop failure gracefully."""
with patch("sandbox.session.podman.PodmanClient") as mock_client:
mock_container = MagicMock()
# Simulate container.stop() failing
mock_container.stop.side_effect = RuntimeError("Container already dead")
mock_client.return_value.containers.run.return_value = mock_container
from sandbox.session import PodmanSandbox
sb = PodmanSandbox()
await sb.__aenter__()
# Should not raise even if stop() fails
try:
await sb.__aexit__(None, None, None)
except RuntimeError:
pytest.fail("__aexit__ should handle container stop failure gracefully")
# ─── Integration Tests ────────────────────────────────────────
@pytest.mark.integration
async def test_real_sandbox_starts():
"""Integration: Actually start a real sandbox."""
from sandbox.session import PodmanSandbox
async with PodmanSandbox() as sb:
assert sb.container is not None
# Verify container is actually running
result = await sb.run("echo 'sandbox started'")
assert "sandbox started" in result
@pytest.mark.integration
async def test_real_bash_tool_executes():
"""Integration: Actually run a command in sandbox."""
from sandbox.session import PodmanSandbox
from tools.bash import bash
async with PodmanSandbox() as sb:
result = await bash("echo 'tool works'", sb)
assert "tool works" in result
@pytest.mark.integration
async def test_real_sandbox_workspace_mounted():
"""Integration: Verify workspace is mounted correctly."""
import os
from agent.config import settings
from sandbox.session import PodmanSandbox
from tools.bash import bash
async with PodmanSandbox() as sb:
# Write file in sandbox
await bash("echo 'mount test' > /workspace/mount_test.txt", sb)
# Verify file exists on host
host_file = f"{settings.safedir}/mount_test.txt"
assert os.path.exists(host_file)
with open(host_file) as f:
content = f.read()
assert "mount test" in content
# Cleanup
os.remove(host_file)
@pytest.mark.integration
async def test_real_sandbox_no_network():
"""Integration: Verify sandbox has no network access."""
from sandbox.session import PodmanSandbox
from tools.bash import bash
async with PodmanSandbox() as sb:
# Try to reach the internet (should fail)
result = await bash("ping -c 1 -W 8.8.8.8 2>&1 || echo 'no network'", sb)
assert "no network" in result