Skip to main content

Audit and Replay

This example demonstrates using Statehouse for auditing and debugging agent behavior.

Why Audit?

AI agents make decisions that affect real systems:

  • What decisions did the agent make?
  • Why did it choose a particular action?
  • When did something go wrong?
  • Can we reproduce the issue?

Statehouse's event log enables complete audit trails.

Basic Audit Trail

Print all events for an agent:

from statehouse import Statehouse

def audit_agent(agent_id: str):
client = Statehouse()

print(f"Audit trail for {agent_id}")
print("-" * 50)

for event in client.replay(agent_id=agent_id):
print(f"\n[{event.commit_ts}] Transaction {event.txn_id}")
for op in event.operations:
if op.value is None:
print(f" DELETE {op.key}")
else:
print(f" WRITE {op.key}")
for k, v in op.value.items():
print(f" {k}: {v}")

# Usage
audit_agent("research-agent-001")

Filtering by Time

Audit specific time windows:

def audit_time_range(agent_id: str, start: int, end: int):
client = Statehouse()

for event in client.replay(
agent_id=agent_id,
start_ts=start,
end_ts=end,
):
print(f"[{event.commit_ts}] {event.txn_id}")

State Reconstruction

Reconstruct state at any point in time:

def reconstruct_state_at(agent_id: str, at_ts: int) -> dict:
"""Reconstruct state as of a specific timestamp."""
client = Statehouse()
state = {}

for event in client.replay(agent_id=agent_id, end_ts=at_ts):
for op in event.operations:
if op.value is None:
state.pop(op.key, None)
else:
state[op.key] = op.value

return state

# What was the state at timestamp 12345?
past_state = reconstruct_state_at("agent-001", 12345)

Decision Analysis

Analyze agent decisions:

def analyze_decisions(agent_id: str):
client = Statehouse()

decisions = []
for event in client.replay(agent_id=agent_id):
for op in event.operations:
if op.key.startswith("step:") and "tool" not in op.key:
decisions.append({
"step": op.key,
"timestamp": event.commit_ts,
"action": op.value,
})

print(f"Total decisions: {len(decisions)}")

# Count action types
action_types = {}
for d in decisions:
action_type = d["action"].get("type", "unknown")
action_types[action_type] = action_types.get(action_type, 0) + 1

print("Action breakdown:")
for action_type, count in action_types.items():
print(f" {action_type}: {count}")

Tool Usage Report

Track tool usage:

def tool_usage_report(agent_id: str):
client = Statehouse()

tools = {}
for event in client.replay(agent_id=agent_id):
for op in event.operations:
if ":tool" in op.key and op.value:
tool_name = op.value.get("tool", "unknown")
if tool_name not in tools:
tools[tool_name] = {"count": 0, "calls": []}
tools[tool_name]["count"] += 1
tools[tool_name]["calls"].append({
"timestamp": event.commit_ts,
"args": op.value.get("args"),
})

print("Tool Usage Report")
print("-" * 40)
for tool, data in tools.items():
print(f"\n{tool}: {data['count']} calls")
for call in data["calls"][:3]: # Show first 3
print(f" [{call['timestamp']}] {call['args']}")

Error Investigation

Find when errors occurred:

def find_errors(agent_id: str):
client = Statehouse()

errors = []
for event in client.replay(agent_id=agent_id):
for op in event.operations:
if op.value and op.value.get("error"):
errors.append({
"timestamp": event.commit_ts,
"key": op.key,
"error": op.value["error"],
})

if errors:
print(f"Found {len(errors)} errors:")
for e in errors:
print(f" [{e['timestamp']}] {e['key']}: {e['error']}")
else:
print("No errors found")

Export to JSON

Export audit trail for external analysis:

import json

def export_audit_json(agent_id: str, output_file: str):
client = Statehouse()

events = []
for event in client.replay(agent_id=agent_id):
events.append({
"txn_id": event.txn_id,
"commit_ts": event.commit_ts,
"operations": [
{
"key": op.key,
"value": op.value,
"version": op.version,
}
for op in event.operations
],
})

with open(output_file, "w") as f:
json.dump(events, f, indent=2)

print(f"Exported {len(events)} events to {output_file}")

Determinism Verification

Verify replay produces same state:

def verify_determinism(agent_id: str):
client = Statehouse()

# Reconstruct state via replay
reconstructed = {}
for event in client.replay(agent_id=agent_id):
for op in event.operations:
if op.value is None:
reconstructed.pop(op.key, None)
else:
reconstructed[op.key] = op.value

# Get current state
current = {}
for key in client.list_keys(agent_id=agent_id):
state = client.get_state(agent_id=agent_id, key=key)
if state.exists:
current[key] = state.value

# Compare
if reconstructed == current:
print("Determinism verified: replay matches current state")
else:
print("MISMATCH detected")
print(f"Reconstructed keys: {set(reconstructed.keys())}")
print(f"Current keys: {set(current.keys())}")