added sandbox and bash tool

2026-02-20 20:00:52 -07:00
parent 8b62f946ca
commit 93ce413c9b
10 changed files with 419 additions and 11 deletions
@@ -1,15 +1,16 @@
 import asyncio
+from http.client import responses

 from anthropic import AsyncAnthropic

 from agent.config import settings
 from agent.history import ConversationHistory
+from agent.tools import TOOL_SCHEMAS, dispatch_tool

 client = AsyncAnthropic(api_key=settings.anthropic_api_key)
-history = ConversationHistory()


-async def run_turn(user_message: str, history: list[dict] = None) -> str:
+async def run_turn(user_message: str, history: list[dict] = None, sandbox=None) -> str:

    if history is None:
        history = []
@@ -17,21 +18,57 @@ async def run_turn(user_message: str, history: list[dict] = None) -> str:
    # add the new user message to history
    messages = history + [{"role": "user", "content": user_message}]

-    message = await client.messages.create(
+    response = await client.messages.create(
        model=settings.model,
        max_tokens=settings.max_tokens,
+        tools=TOOL_SCHEMAS,
        messages=messages,
    )

-    return message
+    while response.stop_reason == "tool_use":
+        tool_results = []
+        for block in response.content:
+            if block.type == "tool_use":
+                result = await dispatch_tool(
+                    tool_name=block.name, tool_input=block.input, sandbox=sandbox
+                )
+                tool_results.append(
+                    {"type": "tool_result", "tool_use_id": block.id, "content": result}
+                )
+
+        messages = messages + [
+            {"role": "assistant", "content": response.content},
+            {"role": "user", "content": tool_results},
+        ]
+
+        response = await client.messages.create(
+            model=settings.model,
+            max_tokens=settings.max_tokens,
+            tools=TOOL_SCHEMAS,
+            messages=messages,
+        )
+
+    return next(block.text for block in response.content if hasattr(block, "text"))


-async def run_session():
+async def run_session(sandbox=None):
+    """simple CLI session - temporary until TUI is built"""
+    history = ConversationHistory()
+
+    print("Codeing agent ready. Type /quit to quit.")
+
    while True:
-        user_input = input("You: ")
+        user_input = input("You: ").strip()
+        # __ UI commands_______
+        if not user_input:
+            continue
+        if user_input == "/quit":
+            print("Goodbye")
+            break
+
        history.add_message("user", user_input)

-        response = await run_turn(user_input, history.get_all())
-        history.add_message("assistant", response.content[0].text)
+        response = await run_turn(user_input, history.get_all(), sandbox)
+        history.add_message("assistant", response)

-        print(f"Assistant: {response.content[0].text}")
+        print(f"\nAssistant: {response}")
@@ -0,0 +1,27 @@
+from tools.bash import bash
+
+TOOL_SCHEMAS = [
+    {
+        "name": "bash",
+        "description": "Execute a bash command in the isolated sandbox environment. Use this to run shell commands, install packages, run scripts, etc.",
+        "input_schema": {
+            "type": "object",
+            "properties": {
+                "command": {
+                    "type": "string",
+                    "description": "The bash command to execute (e.g., 'ls -la', 'python script.py', 'pip install requests')",
+                }
+            },
+            "required": ["command"],
+        },
+    }
+]
+
+
+async def dispatch_tool(tool_name: str, tool_input: dict, sandbox) -> str:
+    """Route tool calls to implementations."""
+
+    if tool_name == "bash":
+        return await bash(command=tool_input["command"], sandbox=sandbox)
+
+    return f"Unknown tool: {tool_name}"
@@ -1,10 +1,16 @@
 import asyncio

 from agent.loop import run_session
+from sandbox.session import PodmanSandbox


 async def run_tui():
-    await run_session()
+    print("Starting sandbox....")
+    async with PodmanSandbox() as sandbox:
+        print("Sandbox ready")
+        await run_session(sandbox)
+
+    print("Sandbox destroyed")


 def main():
@@ -0,0 +1,47 @@
+from pathlib import Path
+
+import podman
+
+from agent.config import settings
+
+
+class PodmanSandbox:
+    def __init__(self):
+        # connect to podman socket (rootless)
+        self.client = podman.PodmanClient()
+        self.container = None
+
+    async def __aenter__(self):
+        self.container = self.client.containers.run(
+            "python:3.14",
+            command=["sleep", "60h"],
+            detach=True,
+            runtime="krun",
+            network_mode="none",
+            mem_limit="512m",
+            volumes={
+                str(Path(settings.safedir).absolute()): {
+                    "bind": "/workspace",
+                    "mode": "rw",
+                }
+            },
+            working_dir="/workspace",
+            remove=True,
+        )
+        return self
+
+    async def run(self, command: str) -> str:
+        """Execute command in microVM/"""
+        exit_code, output = self.container.exec_run(
+            ["/bin/sh", "-c", command], workdir="/workspace"
+        )
+        return output.decode()
+
+    async def __aexit__(self, *args):
+        if self.container:
+            try:
+                self.container.stop()
+            except Exception as e:
+                # log but don't raise, best effort cleanup
+                print(f"Warning: Container cleanup failed: {e}")
+                # possibly use logging.warning if we add loggin later
@@ -0,0 +1,54 @@
+import asyncio
+
+from sandbox.session import PodmanSandbox
+from tools.bash import bash
+
+
+async def main():
+    print("Creating sandbox...")
+    async with PodmanSandbox() as sb:
+        print("✓ Sandbox created\n")
+
+        # Test 1: Basic command
+        print("Test 1: pwd")
+        result = await bash("pwd", sb)
+        print(f"Result: {result}\n")
+
+        # Test 2: List workspace
+        print("Test 2: ls -la")
+        result = await bash("ls -la", sb)
+        print(f"Result: {result}\n")
+
+        # Test 3: Python version
+        print("Test 3: python --version")
+        result = await bash("python --version", sb)
+        print(f"Result: {result}\n")
+
+        # Test 4: Write a file
+        print("Test 4: Write test file")
+        result = await bash("echo 'Hello from sandbox' > test.txt", sb)
+        print(f"Result: {result}")
+
+        # Test 5: Read it back
+        print("Test 5: Read test file")
+        result = await bash("cat test.txt", sb)
+        print(f"Result: {result}\n")
+
+        # Test 6: Check it exists on host
+        print("Test 6: Verify file on host")
+        import os
+
+        from agent.config import settings
+
+        host_file = f"{settings.safedir}/test.txt"
+        if os.path.exists(host_file):
+            with open(host_file) as f:
+                print(f"✓ File exists on host: {f.read()}")
+        else:
+            print("✗ File NOT found on host")
+
+    print("\n✓ Sandbox destroyed")
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
@@ -54,3 +54,29 @@ def sample_history():
        {"role": "assistance", "content": "Hi there!"},
        {"role": "user", "content": "What's 2+2?"},
    ]
+
+
+@pytest.fixture
+def mock_sandbox():
+    """Mock sandbox for unit tests."""
+    sandbox = MagicMock()
+    sandbox.run = AsyncMock(return_value="mock output\n")
+    return sandbox
+
+
+@pytest.fixture
+def crashed_sandbox():
+    """Mock sandbox that simulates a crash."""
+    sandbox = MagicMock()
+    sandbox.run = AsyncMock(side_effect=RuntimeError("Container crashed unexpectedly"))
+    return sandbox
+
+
+@pytest.fixture
+def mock_podman_client():
+    """Mock Podman client for unit tests."""
+    with patch("sandbox.session.podman.PodmanClient") as mock:
+        mock_container = MagicMock()
+        mock_container.exec_run.return_value = (0, b"mock output\n")
+        mock.return_value.containers.run.return_value = mock_container
+        yield mock, mock_container
@@ -19,7 +19,7 @@ async def test_run_turn_basic(mock_anthropic_client):
    mock_anthropic_client.messages.create.assert_called_once()

    # verify message returned
-    assert result.content[0].text == "42"
+    assert result == "42"

    # verify call has correct parameters
    call_args = mock_anthropic_client.messages.create.call_args
@@ -0,0 +1,187 @@
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import pytest
+
+# ─── Unit Tests ───────────────────────────────────────────────
+
+
+@pytest.mark.unit
+def test_sandbox_initializes():
+    """Test PodmanSandbox creates client on init."""
+    with patch("sandbox.session.podman.PodmanClient") as mock_client:
+        from sandbox.session import PodmanSandbox
+
+        sb = PodmanSandbox()
+
+        # Client should be created
+        mock_client.assert_called_once()
+        assert sb.container is None  # Not started yet
+
+
+@pytest.mark.unit
+async def test_sandbox_starts_container():
+    """Test that __aenter__ starts a container."""
+    with patch("sandbox.session.podman.PodmanClient") as mock_client:
+        # Mock the container
+        mock_container = MagicMock()
+        mock_client.return_value.containers.run.return_value = mock_container
+
+        from sandbox.session import PodmanSandbox
+
+        sb = PodmanSandbox()
+        await sb.__aenter__()
+
+        # Container should be running
+        mock_client.return_value.containers.run.assert_called_once()
+        assert sb.container is mock_container
+
+
+@pytest.mark.unit
+async def test_sandbox_stops_container_on_exit():
+    """Test that __aexit__ stops the container."""
+    with patch("sandbox.session.podman.PodmanClient") as mock_client:
+        mock_container = MagicMock()
+        mock_client.return_value.containers.run.return_value = mock_container
+
+        from sandbox.session import PodmanSandbox
+
+        sb = PodmanSandbox()
+        await sb.__aenter__()
+        await sb.__aexit__(None, None, None)
+
+        # Container should be stopped
+        mock_container.stop.assert_called_once()
+
+
+@pytest.mark.unit
+async def test_sandbox_run_executes_command():
+    """Test that run() passes command to container."""
+    with patch("sandbox.session.podman.PodmanClient") as mock_client:
+        # Mock exec_run return value
+        mock_container = MagicMock()
+        mock_container.exec_run.return_value = (0, b"hello from sandbox\n")
+        mock_client.return_value.containers.run.return_value = mock_container
+
+        from sandbox.session import PodmanSandbox
+
+        sb = PodmanSandbox()
+        await sb.__aenter__()
+
+        result = await sb.run("echo 'hello from sandbox'")
+
+        # Verify exec_run was called with shell wrapper
+        mock_container.exec_run.assert_called_once_with(
+            ["/bin/sh", "-c", "echo 'hello from sandbox'"], workdir="/workspace"
+        )
+        assert result == "hello from sandbox\n"
+
+
+@pytest.mark.unit
+async def test_tool_call_fails_if_sandbox_crashes():
+    """Test that tool calls fail gracefully when sandbox is unavailable."""
+    from tools.bash import bash
+
+    # Simulate crashed sandbox (container is None)
+    mock_sandbox = MagicMock()
+    mock_sandbox.run = AsyncMock(side_effect=RuntimeError("Container crashed"))
+
+    result = await bash("ls -la", mock_sandbox)
+
+    # Should return error message, not raise exception
+    assert "error" in result.lower()
+
+
+@pytest.mark.unit
+async def test_tool_call_with_no_sandbox():
+    """Test tool call handles None sandbox gracefully."""
+    from tools.bash import bash
+
+    result = await bash("ls -la", sandbox=None)
+
+    # Should return error, not crash
+    assert "error" in result.lower()
+
+
+@pytest.mark.unit
+async def test_sandbox_cleanup_on_crash():
+    """Test __aexit__ handles container stop failure gracefully."""
+    with patch("sandbox.session.podman.PodmanClient") as mock_client:
+        mock_container = MagicMock()
+        # Simulate container.stop() failing
+        mock_container.stop.side_effect = RuntimeError("Container already dead")
+        mock_client.return_value.containers.run.return_value = mock_container
+
+        from sandbox.session import PodmanSandbox
+
+        sb = PodmanSandbox()
+        await sb.__aenter__()
+
+        # Should not raise even if stop() fails
+        try:
+            await sb.__aexit__(None, None, None)
+        except RuntimeError:
+            pytest.fail("__aexit__ should handle container stop failure gracefully")
+
+
+# ─── Integration Tests ────────────────────────────────────────
+
+
+@pytest.mark.integration
+async def test_real_sandbox_starts():
+    """Integration: Actually start a real sandbox."""
+    from sandbox.session import PodmanSandbox
+
+    async with PodmanSandbox() as sb:
+        assert sb.container is not None
+
+        # Verify container is actually running
+        result = await sb.run("echo 'sandbox started'")
+        assert "sandbox started" in result
+
+
+@pytest.mark.integration
+async def test_real_bash_tool_executes():
+    """Integration: Actually run a command in sandbox."""
+    from sandbox.session import PodmanSandbox
+    from tools.bash import bash
+
+    async with PodmanSandbox() as sb:
+        result = await bash("echo 'tool works'", sb)
+        assert "tool works" in result
+
+
+@pytest.mark.integration
+async def test_real_sandbox_workspace_mounted():
+    """Integration: Verify workspace is mounted correctly."""
+    import os
+
+    from agent.config import settings
+    from sandbox.session import PodmanSandbox
+    from tools.bash import bash
+
+    async with PodmanSandbox() as sb:
+        # Write file in sandbox
+        await bash("echo 'mount test' > /workspace/mount_test.txt", sb)
+
+        # Verify file exists on host
+        host_file = f"{settings.safedir}/mount_test.txt"
+        assert os.path.exists(host_file)
+
+        with open(host_file) as f:
+            content = f.read()
+        assert "mount test" in content
+
+        # Cleanup
+        os.remove(host_file)
+
+
+@pytest.mark.integration
+async def test_real_sandbox_no_network():
+    """Integration: Verify sandbox has no network access."""
+    from sandbox.session import PodmanSandbox
+    from tools.bash import bash
+
+    async with PodmanSandbox() as sb:
+        # Try to reach the internet (should fail)
+        result = await bash("ping -c 1 -W 8.8.8.8 2>&1 || echo 'no network'", sb)
+        assert "no network" in result
@@ -0,0 +1,24 @@
+async def bash(command: str, sandbox=None) -> str:
+    """
+    Execute a bash command in the sandbox.
+
+    Args:
+        command: Shell command to run
+        sandbox: PodmanSandbox instance
+
+    Returns:
+        Command output (stdout + stderr)
+    """
+
+    if sandbox is None:
+        return "Error: Sandbox not available"
+
+    try:
+        result = await sandbox.run(command)
+        return result
+
+    except RuntimeError as e:
+        return f"Error: sandbox execution failed: {e}"
+
+    except Exception as e:
+        return f"Error: Unexpected failure: {e}"