Sandbox
Testing
Test your own sandbox-backed workflows.
Test sandbox behavior at your application boundary. Your tests should create a session, run the workflow your app exposes, assert the result, and always destroy the session.
import { afterEach, expect, test } from "vitest";
import { DockerSandbox, type SandboxSession } from "@anvia/sandbox";
const sessions: SandboxSession[] = [];
afterEach(async () => {
await Promise.allSettled(sessions.splice(0).map((session) => session.destroy()));
});
test("runs generated code in an isolated workspace", async () => {
const sandbox = new DockerSandbox({
image: "node:22-bookworm",
limits: {
timeoutMs: 10_000,
maxOutputBytes: 16_000,
},
});
const session = await sandbox.createSession({
manifest: {
files: {
"index.js": "console.log(2 + 2)",
},
},
});
sessions.push(session);
const result = await session.exec({
command: "node",
args: ["index.js"],
});
expect(result.exitCode).toBe(0);
expect(result.stdout.trim()).toBe("4");
});Agent Tool Tests
When your agent receives sandbox tools, test the tool path without relying on a model call:
import { expect, test } from "vitest";
import { createSandboxTools, DockerSandbox } from "@anvia/sandbox";
test("writes and reads sandbox files through tools", async () => {
const sandbox = new DockerSandbox();
const session = await sandbox.createSession();
try {
const tools = createSandboxTools(session);
const writeFile = tools.find((tool) => tool.name === "write_file");
const readFile = tools.find((tool) => tool.name === "read_file");
if (!writeFile || !readFile) {
throw new Error("Sandbox file tools were not created.");
}
await writeFile.call({
path: "notes/result.txt",
content: "ready",
});
const output = await readFile.call({
path: "notes/result.txt",
});
expect(output).toContain("ready");
} finally {
await session.destroy();
}
});Keep model behavior in separate evals. Sandbox tests should verify your command policy, manifest setup, file paths, cleanup, and tool wiring.
GitHub Actions
GitHub-hosted Linux runners include Docker, so you can run your app's sandbox tests in CI:
name: App tests
on:
pull_request:
push:
branches: [main]
jobs:
test:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v4
- uses: actions/setup-node@v4
with:
node-version: 22
cache: pnpm
- run: pnpm install --frozen-lockfile
- run: pnpm testIf sandbox tests are slower than your normal unit suite, split them into a dedicated script:
name: Sandbox workflow tests
on:
pull_request:
workflow_dispatch:
jobs:
sandbox:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v4
- uses: actions/setup-node@v4
with:
node-version: 22
cache: pnpm
- run: pnpm install --frozen-lockfile
- run: pnpm test:sandbox