added sandbox and bash tool

This commit is contained in:
2026-02-20 20:00:52 -07:00
parent 8b62f946ca
commit 93ce413c9b
10 changed files with 419 additions and 11 deletions
+46 -9
View File
@@ -1,15 +1,16 @@
import asyncio import asyncio
from http.client import responses
from anthropic import AsyncAnthropic from anthropic import AsyncAnthropic
from agent.config import settings from agent.config import settings
from agent.history import ConversationHistory from agent.history import ConversationHistory
from agent.tools import TOOL_SCHEMAS, dispatch_tool
client = AsyncAnthropic(api_key=settings.anthropic_api_key) client = AsyncAnthropic(api_key=settings.anthropic_api_key)
history = ConversationHistory()
async def run_turn(user_message: str, history: list[dict] = None) -> str: async def run_turn(user_message: str, history: list[dict] = None, sandbox=None) -> str:
if history is None: if history is None:
history = [] history = []
@@ -17,21 +18,57 @@ async def run_turn(user_message: str, history: list[dict] = None) -> str:
# add the new user message to history # add the new user message to history
messages = history + [{"role": "user", "content": user_message}] messages = history + [{"role": "user", "content": user_message}]
message = await client.messages.create( response = await client.messages.create(
model=settings.model, model=settings.model,
max_tokens=settings.max_tokens, max_tokens=settings.max_tokens,
tools=TOOL_SCHEMAS,
messages=messages, messages=messages,
) )
return message while response.stop_reason == "tool_use":
tool_results = []
for block in response.content:
if block.type == "tool_use":
result = await dispatch_tool(
tool_name=block.name, tool_input=block.input, sandbox=sandbox
)
tool_results.append(
{"type": "tool_result", "tool_use_id": block.id, "content": result}
)
messages = messages + [
{"role": "assistant", "content": response.content},
{"role": "user", "content": tool_results},
]
response = await client.messages.create(
model=settings.model,
max_tokens=settings.max_tokens,
tools=TOOL_SCHEMAS,
messages=messages,
)
return next(block.text for block in response.content if hasattr(block, "text"))
async def run_session(): async def run_session(sandbox=None):
"""simple CLI session - temporary until TUI is built"""
history = ConversationHistory()
print("Codeing agent ready. Type /quit to quit.")
while True: while True:
user_input = input("You: ") user_input = input("You: ").strip()
# __ UI commands_______
if not user_input:
continue
if user_input == "/quit":
print("Goodbye")
break
history.add_message("user", user_input) history.add_message("user", user_input)
response = await run_turn(user_input, history.get_all()) response = await run_turn(user_input, history.get_all(), sandbox)
history.add_message("assistant", response.content[0].text) history.add_message("assistant", response)
print(f"Assistant: {response.content[0].text}") print(f"\nAssistant: {response}")
+27
View File
@@ -0,0 +1,27 @@
from tools.bash import bash
TOOL_SCHEMAS = [
{
"name": "bash",
"description": "Execute a bash command in the isolated sandbox environment. Use this to run shell commands, install packages, run scripts, etc.",
"input_schema": {
"type": "object",
"properties": {
"command": {
"type": "string",
"description": "The bash command to execute (e.g., 'ls -la', 'python script.py', 'pip install requests')",
}
},
"required": ["command"],
},
}
]
async def dispatch_tool(tool_name: str, tool_input: dict, sandbox) -> str:
"""Route tool calls to implementations."""
if tool_name == "bash":
return await bash(command=tool_input["command"], sandbox=sandbox)
return f"Unknown tool: {tool_name}"
+7 -1
View File
@@ -1,10 +1,16 @@
import asyncio import asyncio
from agent.loop import run_session from agent.loop import run_session
from sandbox.session import PodmanSandbox
async def run_tui(): async def run_tui():
await run_session() print("Starting sandbox....")
async with PodmanSandbox() as sandbox:
print("Sandbox ready")
await run_session(sandbox)
print("Sandbox destroyed")
def main(): def main():
View File
+47
View File
@@ -0,0 +1,47 @@
from pathlib import Path
import podman
from agent.config import settings
class PodmanSandbox:
def __init__(self):
# connect to podman socket (rootless)
self.client = podman.PodmanClient()
self.container = None
async def __aenter__(self):
self.container = self.client.containers.run(
"python:3.14",
command=["sleep", "60h"],
detach=True,
runtime="krun",
network_mode="none",
mem_limit="512m",
volumes={
str(Path(settings.safedir).absolute()): {
"bind": "/workspace",
"mode": "rw",
}
},
working_dir="/workspace",
remove=True,
)
return self
async def run(self, command: str) -> str:
"""Execute command in microVM/"""
exit_code, output = self.container.exec_run(
["/bin/sh", "-c", command], workdir="/workspace"
)
return output.decode()
async def __aexit__(self, *args):
if self.container:
try:
self.container.stop()
except Exception as e:
# log but don't raise, best effort cleanup
print(f"Warning: Container cleanup failed: {e}")
# possibly use logging.warning if we add loggin later
+54
View File
@@ -0,0 +1,54 @@
import asyncio
from sandbox.session import PodmanSandbox
from tools.bash import bash
async def main():
print("Creating sandbox...")
async with PodmanSandbox() as sb:
print("✓ Sandbox created\n")
# Test 1: Basic command
print("Test 1: pwd")
result = await bash("pwd", sb)
print(f"Result: {result}\n")
# Test 2: List workspace
print("Test 2: ls -la")
result = await bash("ls -la", sb)
print(f"Result: {result}\n")
# Test 3: Python version
print("Test 3: python --version")
result = await bash("python --version", sb)
print(f"Result: {result}\n")
# Test 4: Write a file
print("Test 4: Write test file")
result = await bash("echo 'Hello from sandbox' > test.txt", sb)
print(f"Result: {result}")
# Test 5: Read it back
print("Test 5: Read test file")
result = await bash("cat test.txt", sb)
print(f"Result: {result}\n")
# Test 6: Check it exists on host
print("Test 6: Verify file on host")
import os
from agent.config import settings
host_file = f"{settings.safedir}/test.txt"
if os.path.exists(host_file):
with open(host_file) as f:
print(f"✓ File exists on host: {f.read()}")
else:
print("✗ File NOT found on host")
print("\n✓ Sandbox destroyed")
if __name__ == "__main__":
asyncio.run(main())
+26
View File
@@ -54,3 +54,29 @@ def sample_history():
{"role": "assistance", "content": "Hi there!"}, {"role": "assistance", "content": "Hi there!"},
{"role": "user", "content": "What's 2+2?"}, {"role": "user", "content": "What's 2+2?"},
] ]
@pytest.fixture
def mock_sandbox():
"""Mock sandbox for unit tests."""
sandbox = MagicMock()
sandbox.run = AsyncMock(return_value="mock output\n")
return sandbox
@pytest.fixture
def crashed_sandbox():
"""Mock sandbox that simulates a crash."""
sandbox = MagicMock()
sandbox.run = AsyncMock(side_effect=RuntimeError("Container crashed unexpectedly"))
return sandbox
@pytest.fixture
def mock_podman_client():
"""Mock Podman client for unit tests."""
with patch("sandbox.session.podman.PodmanClient") as mock:
mock_container = MagicMock()
mock_container.exec_run.return_value = (0, b"mock output\n")
mock.return_value.containers.run.return_value = mock_container
yield mock, mock_container
+1 -1
View File
@@ -19,7 +19,7 @@ async def test_run_turn_basic(mock_anthropic_client):
mock_anthropic_client.messages.create.assert_called_once() mock_anthropic_client.messages.create.assert_called_once()
# verify message returned # verify message returned
assert result.content[0].text == "42" assert result == "42"
# verify call has correct parameters # verify call has correct parameters
call_args = mock_anthropic_client.messages.create.call_args call_args = mock_anthropic_client.messages.create.call_args
+187
View File
@@ -0,0 +1,187 @@
from unittest.mock import AsyncMock, MagicMock, patch
import pytest
# ─── Unit Tests ───────────────────────────────────────────────
@pytest.mark.unit
def test_sandbox_initializes():
"""Test PodmanSandbox creates client on init."""
with patch("sandbox.session.podman.PodmanClient") as mock_client:
from sandbox.session import PodmanSandbox
sb = PodmanSandbox()
# Client should be created
mock_client.assert_called_once()
assert sb.container is None # Not started yet
@pytest.mark.unit
async def test_sandbox_starts_container():
"""Test that __aenter__ starts a container."""
with patch("sandbox.session.podman.PodmanClient") as mock_client:
# Mock the container
mock_container = MagicMock()
mock_client.return_value.containers.run.return_value = mock_container
from sandbox.session import PodmanSandbox
sb = PodmanSandbox()
await sb.__aenter__()
# Container should be running
mock_client.return_value.containers.run.assert_called_once()
assert sb.container is mock_container
@pytest.mark.unit
async def test_sandbox_stops_container_on_exit():
"""Test that __aexit__ stops the container."""
with patch("sandbox.session.podman.PodmanClient") as mock_client:
mock_container = MagicMock()
mock_client.return_value.containers.run.return_value = mock_container
from sandbox.session import PodmanSandbox
sb = PodmanSandbox()
await sb.__aenter__()
await sb.__aexit__(None, None, None)
# Container should be stopped
mock_container.stop.assert_called_once()
@pytest.mark.unit
async def test_sandbox_run_executes_command():
"""Test that run() passes command to container."""
with patch("sandbox.session.podman.PodmanClient") as mock_client:
# Mock exec_run return value
mock_container = MagicMock()
mock_container.exec_run.return_value = (0, b"hello from sandbox\n")
mock_client.return_value.containers.run.return_value = mock_container
from sandbox.session import PodmanSandbox
sb = PodmanSandbox()
await sb.__aenter__()
result = await sb.run("echo 'hello from sandbox'")
# Verify exec_run was called with shell wrapper
mock_container.exec_run.assert_called_once_with(
["/bin/sh", "-c", "echo 'hello from sandbox'"], workdir="/workspace"
)
assert result == "hello from sandbox\n"
@pytest.mark.unit
async def test_tool_call_fails_if_sandbox_crashes():
"""Test that tool calls fail gracefully when sandbox is unavailable."""
from tools.bash import bash
# Simulate crashed sandbox (container is None)
mock_sandbox = MagicMock()
mock_sandbox.run = AsyncMock(side_effect=RuntimeError("Container crashed"))
result = await bash("ls -la", mock_sandbox)
# Should return error message, not raise exception
assert "error" in result.lower()
@pytest.mark.unit
async def test_tool_call_with_no_sandbox():
"""Test tool call handles None sandbox gracefully."""
from tools.bash import bash
result = await bash("ls -la", sandbox=None)
# Should return error, not crash
assert "error" in result.lower()
@pytest.mark.unit
async def test_sandbox_cleanup_on_crash():
"""Test __aexit__ handles container stop failure gracefully."""
with patch("sandbox.session.podman.PodmanClient") as mock_client:
mock_container = MagicMock()
# Simulate container.stop() failing
mock_container.stop.side_effect = RuntimeError("Container already dead")
mock_client.return_value.containers.run.return_value = mock_container
from sandbox.session import PodmanSandbox
sb = PodmanSandbox()
await sb.__aenter__()
# Should not raise even if stop() fails
try:
await sb.__aexit__(None, None, None)
except RuntimeError:
pytest.fail("__aexit__ should handle container stop failure gracefully")
# ─── Integration Tests ────────────────────────────────────────
@pytest.mark.integration
async def test_real_sandbox_starts():
"""Integration: Actually start a real sandbox."""
from sandbox.session import PodmanSandbox
async with PodmanSandbox() as sb:
assert sb.container is not None
# Verify container is actually running
result = await sb.run("echo 'sandbox started'")
assert "sandbox started" in result
@pytest.mark.integration
async def test_real_bash_tool_executes():
"""Integration: Actually run a command in sandbox."""
from sandbox.session import PodmanSandbox
from tools.bash import bash
async with PodmanSandbox() as sb:
result = await bash("echo 'tool works'", sb)
assert "tool works" in result
@pytest.mark.integration
async def test_real_sandbox_workspace_mounted():
"""Integration: Verify workspace is mounted correctly."""
import os
from agent.config import settings
from sandbox.session import PodmanSandbox
from tools.bash import bash
async with PodmanSandbox() as sb:
# Write file in sandbox
await bash("echo 'mount test' > /workspace/mount_test.txt", sb)
# Verify file exists on host
host_file = f"{settings.safedir}/mount_test.txt"
assert os.path.exists(host_file)
with open(host_file) as f:
content = f.read()
assert "mount test" in content
# Cleanup
os.remove(host_file)
@pytest.mark.integration
async def test_real_sandbox_no_network():
"""Integration: Verify sandbox has no network access."""
from sandbox.session import PodmanSandbox
from tools.bash import bash
async with PodmanSandbox() as sb:
# Try to reach the internet (should fail)
result = await bash("ping -c 1 -W 8.8.8.8 2>&1 || echo 'no network'", sb)
assert "no network" in result
+24
View File
@@ -0,0 +1,24 @@
async def bash(command: str, sandbox=None) -> str:
"""
Execute a bash command in the sandbox.
Args:
command: Shell command to run
sandbox: PodmanSandbox instance
Returns:
Command output (stdout + stderr)
"""
if sandbox is None:
return "Error: Sandbox not available"
try:
result = await sandbox.run(command)
return result
except RuntimeError as e:
return f"Error: sandbox execution failed: {e}"
except Exception as e:
return f"Error: Unexpected failure: {e}"