Audit and Replay

This example demonstrates using Statehouse for auditing and debugging agent behavior.

Why Audit?

AI agents make decisions that affect real systems:

What decisions did the agent make?
Why did it choose a particular action?
When did something go wrong?
Can we reproduce the issue?

Statehouse's event log enables complete audit trails.

Basic Audit Trail

Print all events for an agent:

from statehouse import Statehouse

def audit_agent(agent_id: str):
    client = Statehouse()
    
    print(f"Audit trail for {agent_id}")
    print("-" * 50)
    
    for event in client.replay(agent_id=agent_id):
        print(f"\n[{event.commit_ts}] Transaction {event.txn_id}")
        for op in event.operations:
            if op.value is None:
                print(f"  DELETE {op.key}")
            else:
                print(f"  WRITE {op.key}")
                for k, v in op.value.items():
                    print(f"    {k}: {v}")

# Usage
audit_agent("research-agent-001")

Filtering by Time

Audit specific time windows:

def audit_time_range(agent_id: str, start: int, end: int):
    client = Statehouse()
    
    for event in client.replay(
        agent_id=agent_id,
        start_ts=start,
        end_ts=end,
    ):
        print(f"[{event.commit_ts}] {event.txn_id}")

State Reconstruction

Reconstruct state at any point in time:

def reconstruct_state_at(agent_id: str, at_ts: int) -> dict:
    """Reconstruct state as of a specific timestamp."""
    client = Statehouse()
    state = {}
    
    for event in client.replay(agent_id=agent_id, end_ts=at_ts):
        for op in event.operations:
            if op.value is None:
                state.pop(op.key, None)
            else:
                state[op.key] = op.value
    
    return state

# What was the state at timestamp 12345?
past_state = reconstruct_state_at("agent-001", 12345)

Decision Analysis

Analyze agent decisions:

def analyze_decisions(agent_id: str):
    client = Statehouse()
    
    decisions = []
    for event in client.replay(agent_id=agent_id):
        for op in event.operations:
            if op.key.startswith("step:") and "tool" not in op.key:
                decisions.append({
                    "step": op.key,
                    "timestamp": event.commit_ts,
                    "action": op.value,
                })
    
    print(f"Total decisions: {len(decisions)}")
    
    # Count action types
    action_types = {}
    for d in decisions:
        action_type = d["action"].get("type", "unknown")
        action_types[action_type] = action_types.get(action_type, 0) + 1
    
    print("Action breakdown:")
    for action_type, count in action_types.items():
        print(f"  {action_type}: {count}")

Tool Usage Report

Track tool usage:

def tool_usage_report(agent_id: str):
    client = Statehouse()
    
    tools = {}
    for event in client.replay(agent_id=agent_id):
        for op in event.operations:
            if ":tool" in op.key and op.value:
                tool_name = op.value.get("tool", "unknown")
                if tool_name not in tools:
                    tools[tool_name] = {"count": 0, "calls": []}
                tools[tool_name]["count"] += 1
                tools[tool_name]["calls"].append({
                    "timestamp": event.commit_ts,
                    "args": op.value.get("args"),
                })
    
    print("Tool Usage Report")
    print("-" * 40)
    for tool, data in tools.items():
        print(f"\n{tool}: {data['count']} calls")
        for call in data["calls"][:3]:  # Show first 3
            print(f"  [{call['timestamp']}] {call['args']}")

Error Investigation

Find when errors occurred:

def find_errors(agent_id: str):
    client = Statehouse()
    
    errors = []
    for event in client.replay(agent_id=agent_id):
        for op in event.operations:
            if op.value and op.value.get("error"):
                errors.append({
                    "timestamp": event.commit_ts,
                    "key": op.key,
                    "error": op.value["error"],
                })
    
    if errors:
        print(f"Found {len(errors)} errors:")
        for e in errors:
            print(f"  [{e['timestamp']}] {e['key']}: {e['error']}")
    else:
        print("No errors found")

Export to JSON

Export audit trail for external analysis:

import json

def export_audit_json(agent_id: str, output_file: str):
    client = Statehouse()
    
    events = []
    for event in client.replay(agent_id=agent_id):
        events.append({
            "txn_id": event.txn_id,
            "commit_ts": event.commit_ts,
            "operations": [
                {
                    "key": op.key,
                    "value": op.value,
                    "version": op.version,
                }
                for op in event.operations
            ],
        })
    
    with open(output_file, "w") as f:
        json.dump(events, f, indent=2)
    
    print(f"Exported {len(events)} events to {output_file}")

Determinism Verification

Verify replay produces same state:

def verify_determinism(agent_id: str):
    client = Statehouse()
    
    # Reconstruct state via replay
    reconstructed = {}
    for event in client.replay(agent_id=agent_id):
        for op in event.operations:
            if op.value is None:
                reconstructed.pop(op.key, None)
            else:
                reconstructed[op.key] = op.value
    
    # Get current state
    current = {}
    for key in client.list_keys(agent_id=agent_id):
        state = client.get_state(agent_id=agent_id, key=key)
        if state.exists:
            current[key] = state.value
    
    # Compare
    if reconstructed == current:
        print("Determinism verified: replay matches current state")
    else:
        print("MISMATCH detected")
        print(f"Reconstructed keys: {set(reconstructed.keys())}")
        print(f"Current keys: {set(current.keys())}")

Why Audit?​

Basic Audit Trail​

Filtering by Time​

State Reconstruction​

Decision Analysis​

Tool Usage Report​

Error Investigation​

Export to JSON​

Determinism Verification​