added sandbox and bash tool
This commit is contained in:
+46
-9
@@ -1,15 +1,16 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
|
from http.client import responses
|
||||||
|
|
||||||
from anthropic import AsyncAnthropic
|
from anthropic import AsyncAnthropic
|
||||||
|
|
||||||
from agent.config import settings
|
from agent.config import settings
|
||||||
from agent.history import ConversationHistory
|
from agent.history import ConversationHistory
|
||||||
|
from agent.tools import TOOL_SCHEMAS, dispatch_tool
|
||||||
|
|
||||||
client = AsyncAnthropic(api_key=settings.anthropic_api_key)
|
client = AsyncAnthropic(api_key=settings.anthropic_api_key)
|
||||||
history = ConversationHistory()
|
|
||||||
|
|
||||||
|
|
||||||
async def run_turn(user_message: str, history: list[dict] = None) -> str:
|
async def run_turn(user_message: str, history: list[dict] = None, sandbox=None) -> str:
|
||||||
|
|
||||||
if history is None:
|
if history is None:
|
||||||
history = []
|
history = []
|
||||||
@@ -17,21 +18,57 @@ async def run_turn(user_message: str, history: list[dict] = None) -> str:
|
|||||||
# add the new user message to history
|
# add the new user message to history
|
||||||
messages = history + [{"role": "user", "content": user_message}]
|
messages = history + [{"role": "user", "content": user_message}]
|
||||||
|
|
||||||
message = await client.messages.create(
|
response = await client.messages.create(
|
||||||
model=settings.model,
|
model=settings.model,
|
||||||
max_tokens=settings.max_tokens,
|
max_tokens=settings.max_tokens,
|
||||||
|
tools=TOOL_SCHEMAS,
|
||||||
messages=messages,
|
messages=messages,
|
||||||
)
|
)
|
||||||
|
|
||||||
return message
|
while response.stop_reason == "tool_use":
|
||||||
|
tool_results = []
|
||||||
|
for block in response.content:
|
||||||
|
if block.type == "tool_use":
|
||||||
|
result = await dispatch_tool(
|
||||||
|
tool_name=block.name, tool_input=block.input, sandbox=sandbox
|
||||||
|
)
|
||||||
|
tool_results.append(
|
||||||
|
{"type": "tool_result", "tool_use_id": block.id, "content": result}
|
||||||
|
)
|
||||||
|
|
||||||
|
messages = messages + [
|
||||||
|
{"role": "assistant", "content": response.content},
|
||||||
|
{"role": "user", "content": tool_results},
|
||||||
|
]
|
||||||
|
|
||||||
|
response = await client.messages.create(
|
||||||
|
model=settings.model,
|
||||||
|
max_tokens=settings.max_tokens,
|
||||||
|
tools=TOOL_SCHEMAS,
|
||||||
|
messages=messages,
|
||||||
|
)
|
||||||
|
|
||||||
|
return next(block.text for block in response.content if hasattr(block, "text"))
|
||||||
|
|
||||||
|
|
||||||
async def run_session():
|
async def run_session(sandbox=None):
|
||||||
|
"""simple CLI session - temporary until TUI is built"""
|
||||||
|
history = ConversationHistory()
|
||||||
|
|
||||||
|
print("Codeing agent ready. Type /quit to quit.")
|
||||||
|
|
||||||
while True:
|
while True:
|
||||||
user_input = input("You: ")
|
user_input = input("You: ").strip()
|
||||||
|
# __ UI commands_______
|
||||||
|
if not user_input:
|
||||||
|
continue
|
||||||
|
if user_input == "/quit":
|
||||||
|
print("Goodbye")
|
||||||
|
break
|
||||||
|
|
||||||
history.add_message("user", user_input)
|
history.add_message("user", user_input)
|
||||||
|
|
||||||
response = await run_turn(user_input, history.get_all())
|
response = await run_turn(user_input, history.get_all(), sandbox)
|
||||||
history.add_message("assistant", response.content[0].text)
|
history.add_message("assistant", response)
|
||||||
|
|
||||||
print(f"Assistant: {response.content[0].text}")
|
print(f"\nAssistant: {response}")
|
||||||
|
|||||||
@@ -0,0 +1,27 @@
|
|||||||
|
from tools.bash import bash
|
||||||
|
|
||||||
|
TOOL_SCHEMAS = [
|
||||||
|
{
|
||||||
|
"name": "bash",
|
||||||
|
"description": "Execute a bash command in the isolated sandbox environment. Use this to run shell commands, install packages, run scripts, etc.",
|
||||||
|
"input_schema": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"command": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "The bash command to execute (e.g., 'ls -la', 'python script.py', 'pip install requests')",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["command"],
|
||||||
|
},
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
async def dispatch_tool(tool_name: str, tool_input: dict, sandbox) -> str:
|
||||||
|
"""Route tool calls to implementations."""
|
||||||
|
|
||||||
|
if tool_name == "bash":
|
||||||
|
return await bash(command=tool_input["command"], sandbox=sandbox)
|
||||||
|
|
||||||
|
return f"Unknown tool: {tool_name}"
|
||||||
@@ -1,10 +1,16 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
|
|
||||||
from agent.loop import run_session
|
from agent.loop import run_session
|
||||||
|
from sandbox.session import PodmanSandbox
|
||||||
|
|
||||||
|
|
||||||
async def run_tui():
|
async def run_tui():
|
||||||
await run_session()
|
print("Starting sandbox....")
|
||||||
|
async with PodmanSandbox() as sandbox:
|
||||||
|
print("Sandbox ready")
|
||||||
|
await run_session(sandbox)
|
||||||
|
|
||||||
|
print("Sandbox destroyed")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
|||||||
@@ -0,0 +1,47 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import podman
|
||||||
|
|
||||||
|
from agent.config import settings
|
||||||
|
|
||||||
|
|
||||||
|
class PodmanSandbox:
|
||||||
|
def __init__(self):
|
||||||
|
# connect to podman socket (rootless)
|
||||||
|
self.client = podman.PodmanClient()
|
||||||
|
self.container = None
|
||||||
|
|
||||||
|
async def __aenter__(self):
|
||||||
|
self.container = self.client.containers.run(
|
||||||
|
"python:3.14",
|
||||||
|
command=["sleep", "60h"],
|
||||||
|
detach=True,
|
||||||
|
runtime="krun",
|
||||||
|
network_mode="none",
|
||||||
|
mem_limit="512m",
|
||||||
|
volumes={
|
||||||
|
str(Path(settings.safedir).absolute()): {
|
||||||
|
"bind": "/workspace",
|
||||||
|
"mode": "rw",
|
||||||
|
}
|
||||||
|
},
|
||||||
|
working_dir="/workspace",
|
||||||
|
remove=True,
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def run(self, command: str) -> str:
|
||||||
|
"""Execute command in microVM/"""
|
||||||
|
exit_code, output = self.container.exec_run(
|
||||||
|
["/bin/sh", "-c", command], workdir="/workspace"
|
||||||
|
)
|
||||||
|
return output.decode()
|
||||||
|
|
||||||
|
async def __aexit__(self, *args):
|
||||||
|
if self.container:
|
||||||
|
try:
|
||||||
|
self.container.stop()
|
||||||
|
except Exception as e:
|
||||||
|
# log but don't raise, best effort cleanup
|
||||||
|
print(f"Warning: Container cleanup failed: {e}")
|
||||||
|
# possibly use logging.warning if we add loggin later
|
||||||
@@ -0,0 +1,54 @@
|
|||||||
|
import asyncio
|
||||||
|
|
||||||
|
from sandbox.session import PodmanSandbox
|
||||||
|
from tools.bash import bash
|
||||||
|
|
||||||
|
|
||||||
|
async def main():
|
||||||
|
print("Creating sandbox...")
|
||||||
|
async with PodmanSandbox() as sb:
|
||||||
|
print("✓ Sandbox created\n")
|
||||||
|
|
||||||
|
# Test 1: Basic command
|
||||||
|
print("Test 1: pwd")
|
||||||
|
result = await bash("pwd", sb)
|
||||||
|
print(f"Result: {result}\n")
|
||||||
|
|
||||||
|
# Test 2: List workspace
|
||||||
|
print("Test 2: ls -la")
|
||||||
|
result = await bash("ls -la", sb)
|
||||||
|
print(f"Result: {result}\n")
|
||||||
|
|
||||||
|
# Test 3: Python version
|
||||||
|
print("Test 3: python --version")
|
||||||
|
result = await bash("python --version", sb)
|
||||||
|
print(f"Result: {result}\n")
|
||||||
|
|
||||||
|
# Test 4: Write a file
|
||||||
|
print("Test 4: Write test file")
|
||||||
|
result = await bash("echo 'Hello from sandbox' > test.txt", sb)
|
||||||
|
print(f"Result: {result}")
|
||||||
|
|
||||||
|
# Test 5: Read it back
|
||||||
|
print("Test 5: Read test file")
|
||||||
|
result = await bash("cat test.txt", sb)
|
||||||
|
print(f"Result: {result}\n")
|
||||||
|
|
||||||
|
# Test 6: Check it exists on host
|
||||||
|
print("Test 6: Verify file on host")
|
||||||
|
import os
|
||||||
|
|
||||||
|
from agent.config import settings
|
||||||
|
|
||||||
|
host_file = f"{settings.safedir}/test.txt"
|
||||||
|
if os.path.exists(host_file):
|
||||||
|
with open(host_file) as f:
|
||||||
|
print(f"✓ File exists on host: {f.read()}")
|
||||||
|
else:
|
||||||
|
print("✗ File NOT found on host")
|
||||||
|
|
||||||
|
print("\n✓ Sandbox destroyed")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
asyncio.run(main())
|
||||||
@@ -54,3 +54,29 @@ def sample_history():
|
|||||||
{"role": "assistance", "content": "Hi there!"},
|
{"role": "assistance", "content": "Hi there!"},
|
||||||
{"role": "user", "content": "What's 2+2?"},
|
{"role": "user", "content": "What's 2+2?"},
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_sandbox():
|
||||||
|
"""Mock sandbox for unit tests."""
|
||||||
|
sandbox = MagicMock()
|
||||||
|
sandbox.run = AsyncMock(return_value="mock output\n")
|
||||||
|
return sandbox
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def crashed_sandbox():
|
||||||
|
"""Mock sandbox that simulates a crash."""
|
||||||
|
sandbox = MagicMock()
|
||||||
|
sandbox.run = AsyncMock(side_effect=RuntimeError("Container crashed unexpectedly"))
|
||||||
|
return sandbox
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture
|
||||||
|
def mock_podman_client():
|
||||||
|
"""Mock Podman client for unit tests."""
|
||||||
|
with patch("sandbox.session.podman.PodmanClient") as mock:
|
||||||
|
mock_container = MagicMock()
|
||||||
|
mock_container.exec_run.return_value = (0, b"mock output\n")
|
||||||
|
mock.return_value.containers.run.return_value = mock_container
|
||||||
|
yield mock, mock_container
|
||||||
|
|||||||
+1
-1
@@ -19,7 +19,7 @@ async def test_run_turn_basic(mock_anthropic_client):
|
|||||||
mock_anthropic_client.messages.create.assert_called_once()
|
mock_anthropic_client.messages.create.assert_called_once()
|
||||||
|
|
||||||
# verify message returned
|
# verify message returned
|
||||||
assert result.content[0].text == "42"
|
assert result == "42"
|
||||||
|
|
||||||
# verify call has correct parameters
|
# verify call has correct parameters
|
||||||
call_args = mock_anthropic_client.messages.create.call_args
|
call_args = mock_anthropic_client.messages.create.call_args
|
||||||
|
|||||||
@@ -0,0 +1,187 @@
|
|||||||
|
from unittest.mock import AsyncMock, MagicMock, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
# ─── Unit Tests ───────────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
def test_sandbox_initializes():
|
||||||
|
"""Test PodmanSandbox creates client on init."""
|
||||||
|
with patch("sandbox.session.podman.PodmanClient") as mock_client:
|
||||||
|
from sandbox.session import PodmanSandbox
|
||||||
|
|
||||||
|
sb = PodmanSandbox()
|
||||||
|
|
||||||
|
# Client should be created
|
||||||
|
mock_client.assert_called_once()
|
||||||
|
assert sb.container is None # Not started yet
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
async def test_sandbox_starts_container():
|
||||||
|
"""Test that __aenter__ starts a container."""
|
||||||
|
with patch("sandbox.session.podman.PodmanClient") as mock_client:
|
||||||
|
# Mock the container
|
||||||
|
mock_container = MagicMock()
|
||||||
|
mock_client.return_value.containers.run.return_value = mock_container
|
||||||
|
|
||||||
|
from sandbox.session import PodmanSandbox
|
||||||
|
|
||||||
|
sb = PodmanSandbox()
|
||||||
|
await sb.__aenter__()
|
||||||
|
|
||||||
|
# Container should be running
|
||||||
|
mock_client.return_value.containers.run.assert_called_once()
|
||||||
|
assert sb.container is mock_container
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
async def test_sandbox_stops_container_on_exit():
|
||||||
|
"""Test that __aexit__ stops the container."""
|
||||||
|
with patch("sandbox.session.podman.PodmanClient") as mock_client:
|
||||||
|
mock_container = MagicMock()
|
||||||
|
mock_client.return_value.containers.run.return_value = mock_container
|
||||||
|
|
||||||
|
from sandbox.session import PodmanSandbox
|
||||||
|
|
||||||
|
sb = PodmanSandbox()
|
||||||
|
await sb.__aenter__()
|
||||||
|
await sb.__aexit__(None, None, None)
|
||||||
|
|
||||||
|
# Container should be stopped
|
||||||
|
mock_container.stop.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
async def test_sandbox_run_executes_command():
|
||||||
|
"""Test that run() passes command to container."""
|
||||||
|
with patch("sandbox.session.podman.PodmanClient") as mock_client:
|
||||||
|
# Mock exec_run return value
|
||||||
|
mock_container = MagicMock()
|
||||||
|
mock_container.exec_run.return_value = (0, b"hello from sandbox\n")
|
||||||
|
mock_client.return_value.containers.run.return_value = mock_container
|
||||||
|
|
||||||
|
from sandbox.session import PodmanSandbox
|
||||||
|
|
||||||
|
sb = PodmanSandbox()
|
||||||
|
await sb.__aenter__()
|
||||||
|
|
||||||
|
result = await sb.run("echo 'hello from sandbox'")
|
||||||
|
|
||||||
|
# Verify exec_run was called with shell wrapper
|
||||||
|
mock_container.exec_run.assert_called_once_with(
|
||||||
|
["/bin/sh", "-c", "echo 'hello from sandbox'"], workdir="/workspace"
|
||||||
|
)
|
||||||
|
assert result == "hello from sandbox\n"
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
async def test_tool_call_fails_if_sandbox_crashes():
|
||||||
|
"""Test that tool calls fail gracefully when sandbox is unavailable."""
|
||||||
|
from tools.bash import bash
|
||||||
|
|
||||||
|
# Simulate crashed sandbox (container is None)
|
||||||
|
mock_sandbox = MagicMock()
|
||||||
|
mock_sandbox.run = AsyncMock(side_effect=RuntimeError("Container crashed"))
|
||||||
|
|
||||||
|
result = await bash("ls -la", mock_sandbox)
|
||||||
|
|
||||||
|
# Should return error message, not raise exception
|
||||||
|
assert "error" in result.lower()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
async def test_tool_call_with_no_sandbox():
|
||||||
|
"""Test tool call handles None sandbox gracefully."""
|
||||||
|
from tools.bash import bash
|
||||||
|
|
||||||
|
result = await bash("ls -la", sandbox=None)
|
||||||
|
|
||||||
|
# Should return error, not crash
|
||||||
|
assert "error" in result.lower()
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.unit
|
||||||
|
async def test_sandbox_cleanup_on_crash():
|
||||||
|
"""Test __aexit__ handles container stop failure gracefully."""
|
||||||
|
with patch("sandbox.session.podman.PodmanClient") as mock_client:
|
||||||
|
mock_container = MagicMock()
|
||||||
|
# Simulate container.stop() failing
|
||||||
|
mock_container.stop.side_effect = RuntimeError("Container already dead")
|
||||||
|
mock_client.return_value.containers.run.return_value = mock_container
|
||||||
|
|
||||||
|
from sandbox.session import PodmanSandbox
|
||||||
|
|
||||||
|
sb = PodmanSandbox()
|
||||||
|
await sb.__aenter__()
|
||||||
|
|
||||||
|
# Should not raise even if stop() fails
|
||||||
|
try:
|
||||||
|
await sb.__aexit__(None, None, None)
|
||||||
|
except RuntimeError:
|
||||||
|
pytest.fail("__aexit__ should handle container stop failure gracefully")
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Integration Tests ────────────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
async def test_real_sandbox_starts():
|
||||||
|
"""Integration: Actually start a real sandbox."""
|
||||||
|
from sandbox.session import PodmanSandbox
|
||||||
|
|
||||||
|
async with PodmanSandbox() as sb:
|
||||||
|
assert sb.container is not None
|
||||||
|
|
||||||
|
# Verify container is actually running
|
||||||
|
result = await sb.run("echo 'sandbox started'")
|
||||||
|
assert "sandbox started" in result
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
async def test_real_bash_tool_executes():
|
||||||
|
"""Integration: Actually run a command in sandbox."""
|
||||||
|
from sandbox.session import PodmanSandbox
|
||||||
|
from tools.bash import bash
|
||||||
|
|
||||||
|
async with PodmanSandbox() as sb:
|
||||||
|
result = await bash("echo 'tool works'", sb)
|
||||||
|
assert "tool works" in result
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
async def test_real_sandbox_workspace_mounted():
|
||||||
|
"""Integration: Verify workspace is mounted correctly."""
|
||||||
|
import os
|
||||||
|
|
||||||
|
from agent.config import settings
|
||||||
|
from sandbox.session import PodmanSandbox
|
||||||
|
from tools.bash import bash
|
||||||
|
|
||||||
|
async with PodmanSandbox() as sb:
|
||||||
|
# Write file in sandbox
|
||||||
|
await bash("echo 'mount test' > /workspace/mount_test.txt", sb)
|
||||||
|
|
||||||
|
# Verify file exists on host
|
||||||
|
host_file = f"{settings.safedir}/mount_test.txt"
|
||||||
|
assert os.path.exists(host_file)
|
||||||
|
|
||||||
|
with open(host_file) as f:
|
||||||
|
content = f.read()
|
||||||
|
assert "mount test" in content
|
||||||
|
|
||||||
|
# Cleanup
|
||||||
|
os.remove(host_file)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.integration
|
||||||
|
async def test_real_sandbox_no_network():
|
||||||
|
"""Integration: Verify sandbox has no network access."""
|
||||||
|
from sandbox.session import PodmanSandbox
|
||||||
|
from tools.bash import bash
|
||||||
|
|
||||||
|
async with PodmanSandbox() as sb:
|
||||||
|
# Try to reach the internet (should fail)
|
||||||
|
result = await bash("ping -c 1 -W 8.8.8.8 2>&1 || echo 'no network'", sb)
|
||||||
|
assert "no network" in result
|
||||||
@@ -0,0 +1,24 @@
|
|||||||
|
async def bash(command: str, sandbox=None) -> str:
|
||||||
|
"""
|
||||||
|
Execute a bash command in the sandbox.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
command: Shell command to run
|
||||||
|
sandbox: PodmanSandbox instance
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Command output (stdout + stderr)
|
||||||
|
"""
|
||||||
|
|
||||||
|
if sandbox is None:
|
||||||
|
return "Error: Sandbox not available"
|
||||||
|
|
||||||
|
try:
|
||||||
|
result = await sandbox.run(command)
|
||||||
|
return result
|
||||||
|
|
||||||
|
except RuntimeError as e:
|
||||||
|
return f"Error: sandbox execution failed: {e}"
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return f"Error: Unexpected failure: {e}"
|
||||||
Reference in New Issue
Block a user