diff --git a/CHANGELOG.md b/CHANGELOG.md index bc7c82d..79ba35e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,14 +5,20 @@ All notable changes to DeepWork will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [0.2.0] - 2026-01-16 +## [0.3.0] - 2026-01-16 ### Added +- Cross-platform hook wrapper system for writing hooks once and running on multiple platforms + - `wrapper.py`: Normalizes input/output between Claude Code and Gemini CLI + - `claude_hook.sh` and `gemini_hook.sh`: Platform-specific shell wrappers + - `policy_check.py`: Cross-platform policy evaluation hook +- Platform documentation in `doc/platform/` and `doc/platforms/` with hook references and learnings +- Claude Code platform documentation (`doc/platforms/claude/`) - `update.job` for maintaining standard jobs (#41) - `make_new_job.sh` script and templates directory for job scaffolding (#37) - Default policy template file created during `deepwork install` (#42) - Full e2e test suite: define → implement → execute workflow (#45) -- Automated tests for all shell scripts (#40) +- Automated tests for all shell scripts and hook wrappers (#40) ### Changed - Standardized on "ask structured questions" phrasing across all jobs (#48) @@ -58,6 +64,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Initial version. -[0.2.0]: https://github.com/anthropics/deepwork/releases/tag/0.2.0 +[0.3.0]: https://github.com/anthropics/deepwork/releases/tag/0.3.0 [0.1.1]: https://github.com/anthropics/deepwork/releases/tag/0.1.1 [0.1.0]: https://github.com/anthropics/deepwork/releases/tag/0.1.0 diff --git a/README.md b/README.md index 2b2c04e..3331996 100644 --- a/README.md +++ b/README.md @@ -209,14 +209,18 @@ deepwork/ │ │ ├── parser.py # Job definition parsing │ │ ├── detector.py # Platform detection │ │ └── generator.py # Skill file generation +│ ├── hooks/ # Cross-platform hook wrappers +│ │ ├── wrapper.py # Input/output normalization +│ │ ├── claude_hook.sh # Claude Code adapter +│ │ └── gemini_hook.sh # Gemini CLI adapter │ ├── templates/ # Jinja2 templates │ │ ├── claude/ # Claude Code templates │ │ └── gemini/ # Gemini CLI templates │ ├── schemas/ # JSON schemas │ └── utils/ # Utilities (fs, yaml, git, validation) ├── tests/ -│ ├── unit/ # Unit tests (147 tests) -│ ├── integration/ # Integration tests (19 tests) +│ ├── unit/ # Unit tests +│ ├── integration/ # Integration tests │ └── fixtures/ # Test fixtures └── doc/ # Documentation ``` diff --git a/doc/architecture.md b/doc/architecture.md index b5c3a9f..33ac973 100644 --- a/doc/architecture.md +++ b/doc/architecture.md @@ -48,9 +48,13 @@ deepwork/ # DeepWork tool repository │ │ ├── parser.py # Job definition parsing │ │ ├── policy_parser.py # Policy definition parsing │ │ └── hooks_syncer.py # Hook syncing to platforms -│ ├── hooks/ # Hook evaluation modules +│ ├── hooks/ # Hook system and cross-platform wrappers │ │ ├── __init__.py -│ │ └── evaluate_policies.py # Policy evaluation CLI +│ │ ├── wrapper.py # Cross-platform input/output normalization +│ │ ├── claude_hook.sh # Shell wrapper for Claude Code +│ │ ├── gemini_hook.sh # Shell wrapper for Gemini CLI +│ │ ├── policy_check.py # Cross-platform policy evaluation hook +│ │ └── evaluate_policies.py # Legacy policy evaluation CLI │ ├── templates/ # Command templates for each platform │ │ ├── claude/ │ │ │ └── command-job-step.md.jinja @@ -1064,6 +1068,56 @@ The hooks are installed to `.claude/settings.json` during `deepwork sync`: } ``` +### Cross-Platform Hook Wrapper System + +The `hooks/` module provides a wrapper system that allows writing hooks once in Python and running them on multiple platforms. This normalizes the differences between Claude Code and Gemini CLI hook systems. + +**Architecture:** +``` +┌─────────────────┐ ┌─────────────────┐ +│ Claude Code │ │ Gemini CLI │ +│ (Stop event) │ │ (AfterAgent) │ +└────────┬────────┘ └────────┬────────┘ + │ │ + ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ +│ claude_hook.sh │ │ gemini_hook.sh │ +│ (shell wrapper) │ │ (shell wrapper) │ +└────────┬────────┘ └────────┬────────┘ + │ │ + └───────────┬───────────┘ + ▼ + ┌─────────────────┐ + │ wrapper.py │ + │ (normalization) │ + └────────┬────────┘ + ▼ + ┌─────────────────┐ + │ Python Hook │ + │ (common logic) │ + └─────────────────┘ +``` + +**Key normalizations:** +- Event names: `Stop` ↔ `AfterAgent`, `PreToolUse` ↔ `BeforeTool`, `UserPromptSubmit` ↔ `BeforeAgent` +- Tool names: `Write` ↔ `write_file`, `Bash` ↔ `shell`, `Read` ↔ `read_file` +- Decision values: `block` → `deny` for Gemini CLI +- Environment variables: `CLAUDE_PROJECT_DIR` ↔ `GEMINI_PROJECT_DIR` + +**Usage:** +```python +from deepwork.hooks.wrapper import HookInput, HookOutput, run_hook, Platform + +def my_hook(input: HookInput) -> HookOutput: + if input.event == NormalizedEvent.AFTER_AGENT: + return HookOutput(decision="block", reason="Complete X first") + return HookOutput() + +# Called via: claude_hook.sh mymodule or gemini_hook.sh mymodule +``` + +See `doc/platform/` for detailed platform-specific hook documentation. + ### Policy Schema Policies are validated against a JSON Schema: diff --git a/doc/platform/README.md b/doc/platform/README.md new file mode 100644 index 0000000..6ae8b4c --- /dev/null +++ b/doc/platform/README.md @@ -0,0 +1,54 @@ +# Platform Documentation + +This directory contains internal documentation about how different AI CLI platforms behave in ways that matter for DeepWork's hook system and adapter implementations. + +## Purpose + +These documents capture: + +1. **Hook System Behavior** - Input/output formats, blocking mechanisms, event types +2. **Environment Variables** - What each platform provides to hook scripts +3. **Quirks and Edge Cases** - Platform-specific behaviors discovered during development +4. **Learnings** - Insights gained from implementing and testing adapters + +## Adding Learnings + +**IMPORTANT**: As you work on platform-specific code, document learnings here! + +When you discover something about how a platform behaves that isn't obvious from official documentation, add it to the relevant platform's folder. Examples: + +- "Gemini CLI's AfterAgent hook doesn't receive transcript_path when the session was resumed" +- "Claude Code's Stop hook JSON must have `decision: block` exactly, not `deny`" +- "Exit code 2 blocks in both platforms but stderr handling differs" + +## Directory Structure + +``` +doc/platform/ +├── README.md # This file +├── claude/ +│ ├── hooks.md # Claude Code hooks system documentation +│ └── learnings.md # Discovered behaviors and quirks +└── gemini/ + ├── hooks.md # Gemini CLI hooks system documentation + └── learnings.md # Discovered behaviors and quirks +``` + +## Platform Comparison Summary + +| Feature | Claude Code | Gemini CLI | +|---------|-------------|------------| +| Event: After agent | `Stop` | `AfterAgent` | +| Event: Before tool | `PreToolUse` | `BeforeTool` | +| Event: Before prompt | `UserPromptSubmit` | `BeforeAgent` | +| Project dir env var | `CLAUDE_PROJECT_DIR` | `GEMINI_PROJECT_DIR` | +| Block exit code | `2` | `2` | +| Block decision | `"block"` | `"deny"` or `"block"` | +| Input format | JSON via stdin | JSON via stdin | +| Output format | JSON via stdout | JSON via stdout | + +## Related Files + +- `src/deepwork/core/adapters.py` - Platform adapter implementations +- `src/deepwork/hooks/` - Hook wrapper scripts +- `doc/platforms/` - External platform documentation (configuration, commands) diff --git a/doc/platform/claude/hooks.md b/doc/platform/claude/hooks.md new file mode 100644 index 0000000..6dbda02 --- /dev/null +++ b/doc/platform/claude/hooks.md @@ -0,0 +1,327 @@ + + +# Claude Code Hooks System + +## Overview + +Claude Code hooks are scripts that execute at specific points in Claude's workflow. They enable intercepting and controlling tool execution, validating user input, and performing custom actions. + +## Configuration + +Hooks are configured in JSON settings files with this precedence (lowest to highest): + +1. `~/.claude/settings.json` - User settings +2. `.claude/settings.json` - Project settings +3. `.claude/settings.local.json` - Local project settings (not committed) +4. Managed policy settings + +### Configuration Format + +```json +{ + "hooks": { + "EventName": [ + { + "matcher": "ToolPattern", + "hooks": [ + { + "type": "command", + "command": "your-command-here", + "timeout": 60 + } + ] + } + ] + } +} +``` + +### Configuration Fields + +| Field | Required | Description | +|-------|----------|-------------| +| `matcher` | For tool events | Pattern to match tool names (regex, `*` wildcard) | +| `type` | Yes | `"command"` for bash or `"prompt"` for LLM evaluation | +| `command` | For type=command | Bash command to execute | +| `prompt` | For type=prompt | LLM prompt to evaluate | +| `timeout` | No | Timeout in seconds (default: 60) | + +## Hook Events + +### Tool-Related Events (require matcher) + +| Event | Description | Timing | +|-------|-------------|--------| +| `PreToolUse` | Before tool execution | Can block or modify | +| `PermissionRequest` | When permission dialog shown | Can auto-approve/deny | +| `PostToolUse` | After tool completes | Can add context | + +Common matchers: `Bash`, `Write`, `Edit`, `Read`, `WebFetch`, `Task`, `mcp__*` + +### Workflow Events (no matcher needed) + +| Event | Description | +|-------|-------------| +| `UserPromptSubmit` | When user submits a prompt | +| `Stop` | When main agent finishes responding | +| `SubagentStop` | When a subagent finishes | +| `PreCompact` | Before compact operation | +| `SessionStart` | When session starts/resumes | +| `SessionEnd` | When session ends | + +### Notification Events + +| Event | Matchers | +|-------|----------| +| `Notification` | `permission_prompt`, `idle_prompt`, `auth_success`, `elicitation_dialog` | + +## Input Schema (stdin) + +All hooks receive JSON via stdin: + +```json +{ + "session_id": "abc123", + "transcript_path": "/path/to/transcript.jsonl", + "cwd": "/current/working/directory", + "permission_mode": "default", + "hook_event_name": "Stop", + "tool_name": "ToolName", + "tool_input": { /* tool-specific fields */ }, + "tool_use_id": "toolu_..." +} +``` + +### Common Input Fields + +| Field | Type | Description | +|-------|------|-------------| +| `session_id` | string | Current session identifier | +| `transcript_path` | string | Path to session transcript JSONL | +| `cwd` | string | Current working directory | +| `permission_mode` | string | One of: `default`, `plan`, `acceptEdits`, `dontAsk`, `bypassPermissions` | +| `hook_event_name` | string | The event that triggered this hook | +| `tool_name` | string | Name of the tool (for tool events) | +| `tool_input` | object | Tool-specific input parameters | +| `tool_use_id` | string | Unique identifier for this tool use | + +### Tool-Specific Input Examples + +**Bash Tool:** +```json +{ + "tool_name": "Bash", + "tool_input": { + "command": "npm test", + "description": "Run tests", + "timeout": 120000 + } +} +``` + +**Write Tool:** +```json +{ + "tool_name": "Write", + "tool_input": { + "file_path": "/path/to/file.txt", + "content": "file content" + } +} +``` + +**Edit Tool:** +```json +{ + "tool_name": "Edit", + "tool_input": { + "file_path": "/path/to/file.txt", + "old_string": "original text", + "new_string": "replacement text", + "replace_all": false + } +} +``` + +**Read Tool:** +```json +{ + "tool_name": "Read", + "tool_input": { + "file_path": "/path/to/file.txt", + "offset": 0, + "limit": 100 + } +} +``` + +## Output Schema (stdout) + +### Exit Codes + +| Code | Meaning | Behavior | +|------|---------|----------| +| `0` | Success | stdout parsed as JSON | +| `2` | Blocking error | stderr shown as error, operation blocked | +| Other | Warning | stderr logged, operation continues | + +### Common Output Fields + +```json +{ + "continue": true, + "stopReason": "Message shown when continue is false", + "suppressOutput": true, + "systemMessage": "Optional warning message", + "decision": "block", + "reason": "Explanation for blocking", + "hookSpecificOutput": { + "hookEventName": "EventName" + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `continue` | boolean | `false` terminates agent loop | +| `stopReason` | string | Message when stopping | +| `suppressOutput` | boolean | Hide from transcript | +| `systemMessage` | string | Warning to display | +| `decision` | string | `"block"` to prevent action | +| `reason` | string | Explanation for decision | + +### Event-Specific Output + +#### Stop / SubagentStop + +Block the agent from stopping: + +```json +{ + "decision": "block", + "reason": "You must complete task X before stopping" +} +``` + +Allow stopping (default): +```json +{} +``` + +#### UserPromptSubmit + +Block the prompt: +```json +{ + "decision": "block", + "reason": "Cannot process this type of request" +} +``` + +Add context (text output): +```bash +echo "Current time: $(date)" +exit 0 +``` + +#### PreToolUse + +```json +{ + "hookSpecificOutput": { + "hookEventName": "PreToolUse", + "permissionDecision": "allow", + "permissionDecisionReason": "Auto-approved", + "updatedInput": { + "field_to_modify": "new value" + } + } +} +``` + +| permissionDecision | Effect | +|--------------------|--------| +| `"allow"` | Bypass permission, execute tool | +| `"deny"` | Block tool execution | +| `"ask"` | Show permission dialog | + +#### PermissionRequest + +```json +{ + "hookSpecificOutput": { + "hookEventName": "PermissionRequest", + "decision": { + "behavior": "allow", + "updatedInput": {}, + "message": "Auto-approved", + "interrupt": false + } + } +} +``` + +#### PostToolUse + +```json +{ + "decision": "block", + "reason": "Tool output indicates error", + "hookSpecificOutput": { + "hookEventName": "PostToolUse", + "additionalContext": "Additional info for Claude" + } +} +``` + +#### SessionStart + +```json +{ + "hookSpecificOutput": { + "hookEventName": "SessionStart", + "additionalContext": "Context to load at session start" + } +} +``` + +## Environment Variables + +| Variable | Availability | Description | +|----------|--------------|-------------| +| `CLAUDE_PROJECT_DIR` | All hooks | Absolute path to project root | +| `CLAUDE_ENV_FILE` | SessionStart only | File path for persisting env vars | +| `CLAUDE_CODE_REMOTE` | All hooks | `"true"` in web environment | + +### Persisting Environment Variables + +In SessionStart hooks only: + +```bash +#!/bin/bash +if [ -n "$CLAUDE_ENV_FILE" ]; then + echo 'export NODE_ENV=production' >> "$CLAUDE_ENV_FILE" +fi +exit 0 +``` + +## DeepWork Event Mapping + +| DeepWork Generic | Claude Code | +|------------------|-------------| +| `after_agent` | `Stop` | +| `before_tool` | `PreToolUse` | +| `before_prompt` | `UserPromptSubmit` | + +## Key Behaviors + +1. **Exit code 2** is the primary blocking mechanism +2. **JSON with `decision: "block"`** also blocks for Stop hooks +3. **stderr** on exit code 2 is shown to the agent +4. **stdout** on exit code 0 is parsed as JSON +5. **Plain text stdout** is added as context for some events +6. **Multiple hooks** matching the same event run in parallel +7. **Timeout** default is 60 seconds per hook diff --git a/doc/platform/claude/learnings.md b/doc/platform/claude/learnings.md new file mode 100644 index 0000000..08fa641 --- /dev/null +++ b/doc/platform/claude/learnings.md @@ -0,0 +1,57 @@ +# Claude Code Platform Learnings + +This document captures behaviors, quirks, and insights discovered while implementing DeepWork's Claude Code adapter. + +## Add Your Learnings Here + +When you discover something about Claude Code behavior that isn't obvious from documentation, add it to the appropriate section below. + +--- + +## Hook System + +### Stop Hook Behavior + +- **Blocking requires `decision: "block"`** - Using `"deny"` does not work +- **Exit code 2 also blocks** - stderr message is shown to the agent +- **Empty JSON `{}` allows stopping** - No explicit allow needed +- **`reason` field is shown to Claude** - Use it to explain what needs to be done + +### Transcript Path + +- **transcript_path is JSONL format** - Each line is a separate JSON object +- **Assistant messages have nested structure** - Content is in `.message.content[].text` +- **Role is at top level** - Check `.role == "assistant"` to find assistant messages + +### Environment Variables + +- **CLAUDE_PROJECT_DIR is always set** - Reliable for finding project root +- **CLAUDE_ENV_FILE only in SessionStart** - Not available in other hooks + +## JSON Format Quirks + +### Input Parsing + +- **jq works well** - Standard JSON parsing, no special escaping needed +- **tool_input varies by tool** - Check tool_name to know the structure + +### Output Formatting + +- **Use heredocs for complex JSON** - Avoids shell escaping issues +- **Newlines in reason are preserved** - Can use multi-line explanations + +## Testing Notes + +### Local Testing + +- **PYTHONPATH must include src/** - For Python module imports to work +- **Working directory matters** - Hook scripts expect to be run from project root +- **Create .deepwork directory** - Required for prompt baseline tracking + +--- + +## Date Log + +| Date | Finding | Author | +|------|---------|--------| +| 2026-01-15 | Initial documentation created | Claude | diff --git a/doc/platform/gemini/hooks.md b/doc/platform/gemini/hooks.md new file mode 100644 index 0000000..b9103a6 --- /dev/null +++ b/doc/platform/gemini/hooks.md @@ -0,0 +1,446 @@ + + +# Gemini CLI Hooks System + +## Overview + +Gemini CLI hooks are scripts that execute at specific points in the agent's lifecycle. They enable intercepting and customizing behavior without modifying CLI source code. + +**Note**: Hooks are marked as experimental in Gemini CLI. + +## Configuration + +Hooks are configured in `settings.json` at various levels: + +1. System defaults: `/etc/gemini-cli/system-defaults.json` +2. User settings: `~/.gemini/settings.json` +3. Project settings: `.gemini/settings.json` +4. System settings: `/etc/gemini-cli/settings.json` +5. Extension hooks + +### Configuration Format + +```json +{ + "hooks": { + "enabled": true, + "AfterAgent": [ + { + "matcher": "*", + "hooks": [ + { + "name": "policy-check", + "type": "command", + "command": ".gemini/hooks/policy_check.sh", + "timeout": 60000, + "description": "Evaluates DeepWork policies" + } + ] + } + ] + } +} +``` + +### Configuration Fields + +| Field | Required | Description | +|-------|----------|-------------| +| `name` | Recommended | Unique identifier for enable/disable commands | +| `type` | Yes | Hook type - currently only `"command"` supported | +| `command` | Yes | Path to script or command to execute | +| `description` | No | Human-readable description for `/hooks` panel | +| `timeout` | No | Timeout in milliseconds (default: 60000) | +| `matcher` | No | Pattern to filter when hook runs | + +### Matchers + +Matchers support: +- Exact match: `write_file` +- Regex: `write_file|replace` +- Wildcard: `*` + +## Hook Events + +### All Available Events + +| Event | Description | Timing | +|-------|-------------|--------| +| `SessionStart` | Session initialization | When CLI starts or resumes | +| `SessionEnd` | Session cleanup | When CLI exits | +| `BeforeAgent` | Before agent processes input | After user prompt, before planning | +| `AfterAgent` | After agent completes | Agent loop finished | +| `BeforeModel` | Before LLM request | Can modify request | +| `AfterModel` | After LLM response | Can modify response | +| `BeforeToolSelection` | Before tool selection | Can filter available tools | +| `BeforeTool` | Before tool execution | Can block or modify | +| `AfterTool` | After tool execution | Can add context | +| `PreCompress` | Before context compression | Triggered by auto or manual | +| `Notification` | Permission/notification events | Various UI events | + +### Event Comparison with Claude Code + +| DeepWork Generic | Gemini CLI | Claude Code | +|------------------|------------|-------------| +| `after_agent` | `AfterAgent` | `Stop` | +| `before_tool` | `BeforeTool` | `PreToolUse` | +| `before_prompt` | `BeforeAgent` | `UserPromptSubmit` | + +## Input Schema (stdin) + +All hooks receive JSON via stdin with common base fields plus event-specific fields. + +### Universal Base Fields + +```json +{ + "session_id": "abc123", + "transcript_path": "/path/to/transcript.json", + "cwd": "/current/working/directory", + "hook_event_name": "AfterAgent", + "timestamp": "2026-01-15T10:30:00Z" +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `session_id` | string | Current CLI session identifier | +| `transcript_path` | string | Path to session's JSON transcript | +| `cwd` | string | Current working directory | +| `hook_event_name` | string | Event name that fired this hook | +| `timestamp` | string | ISO 8601 timestamp | + +### Event-Specific Input Fields + +#### Tool Events (BeforeTool, AfterTool) + +```json +{ + "tool_name": "write_file", + "tool_input": { + "file_path": "/path/to/file.txt", + "content": "file content" + }, + "tool_response": "File written successfully", + "mcp_context": { + "server": "my-mcp-server" + } +} +``` + +| Field | Event | Description | +|-------|-------|-------------| +| `tool_name` | Both | Tool identifier (e.g., `write_file`) | +| `tool_input` | Both | Tool arguments | +| `tool_response` | AfterTool only | Output from execution | +| `mcp_context` | Optional | Server identity for MCP tools | + +#### Agent Events (BeforeAgent, AfterAgent) + +```json +{ + "prompt": "User's submitted prompt", + "prompt_response": "Final model response", + "stop_hook_active": false +} +``` + +| Field | Event | Description | +|-------|-------|-------------| +| `prompt` | Both | User's submitted prompt | +| `prompt_response` | AfterAgent only | Final model response | +| `stop_hook_active` | AfterAgent only | Whether stop hook is preventing exit | + +#### Model Events (BeforeModel, AfterModel) + +```json +{ + "llm_request": { + "model": "gemini-2.5-pro", + "messages": [ + {"role": "user", "content": "Hello"} + ], + "config": { + "temperature": 0.7, + "maxOutputTokens": 8192, + "topP": 0.95, + "topK": 40 + }, + "toolConfig": { + "functionCallingConfig": { + "mode": "AUTO", + "allowedFunctionNames": ["read_file", "write_file"] + } + } + }, + "llm_response": { + "text": "Hello! How can I help?", + "candidates": [ + { + "content": {"role": "model", "parts": []}, + "finishReason": "STOP", + "safetyRatings": [] + } + ], + "usageMetadata": { + "promptTokenCount": 100, + "candidatesTokenCount": 50 + } + } +} +``` + +#### Session Events + +```json +{ + "source": "startup", + "reason": "exit" +} +``` + +| Field | Event | Values | +|-------|-------|--------| +| `source` | SessionStart | `startup`, `resume`, `clear` | +| `reason` | SessionEnd | `exit`, `clear`, `logout`, `prompt_input_exit`, `other` | + +#### Notification Events + +```json +{ + "notification_type": "ToolPermission", + "message": "Allow write to file?", + "details": {} +} +``` + +## Output Schema (stdout) + +### Exit Codes + +| Code | Meaning | Behavior | +|------|---------|----------| +| `0` | Success | stdout parsed as JSON; non-JSON treated as systemMessage | +| `2` | Blocking error | Operation interrupted; stderr shown to agent | +| Other | Warning | Execution continues; stderr logged as warning | + +### Common Output Fields + +```json +{ + "decision": "allow", + "reason": "Explanation for decision", + "systemMessage": "Message displayed to user", + "continue": true, + "stopReason": "Message when stopping", + "suppressOutput": false, + "hookSpecificOutput": { + "hookEventName": "AfterAgent", + "additionalContext": "Extra context for agent" + } +} +``` + +| Field | Type | Description | +|-------|------|-------------| +| `decision` | string | `allow`, `deny`, `block`, `ask`, `approve` | +| `reason` | string | Explanation shown to agent on deny/block | +| `systemMessage` | string | Message displayed to user in terminal | +| `continue` | boolean | `false` terminates agent loop immediately | +| `stopReason` | string | User-facing message when stopping | +| `suppressOutput` | boolean | Hide execution from transcript | +| `hookSpecificOutput` | object | Event-specific data container | + +### Event-Specific Output + +#### AfterAgent (equivalent to Claude's Stop) + +Block the agent from completing: +```json +{ + "decision": "deny", + "reason": "Policy X requires attention before completing" +} +``` + +Or use `continue: false` to force stop: +```json +{ + "continue": false, + "stopReason": "Critical error detected" +} +``` + +Allow completion (default): +```json +{} +``` + +#### BeforeTool + +Block tool execution: +```json +{ + "decision": "deny", + "reason": "Security policy violation" +} +``` + +Allow and modify input: +```json +{ + "decision": "allow", + "hookSpecificOutput": { + "hookEventName": "BeforeTool", + "additionalContext": "Proceeding with modified parameters" + } +} +``` + +#### AfterTool + +Add context after tool execution: +```json +{ + "hookSpecificOutput": { + "hookEventName": "AfterTool", + "additionalContext": "Note: File was formatted after write" + } +} +``` + +#### BeforeAgent + +Block user prompt: +```json +{ + "decision": "deny", + "reason": "This type of request is not allowed" +} +``` + +Add context: +```json +{ + "hookSpecificOutput": { + "hookEventName": "BeforeAgent", + "additionalContext": "Current branch: main, Last commit: abc123" + } +} +``` + +#### BeforeModel + +Modify the LLM request: +```json +{ + "hookSpecificOutput": { + "hookEventName": "BeforeModel", + "llm_request": { + "messages": [ + {"role": "system", "content": "Additional instructions..."} + ] + } + } +} +``` + +Return synthetic response (skip model call): +```json +{ + "hookSpecificOutput": { + "hookEventName": "BeforeModel", + "llm_response": { + "text": "Cached response", + "candidates": [{"content": {"role": "model", "parts": []}}] + } + } +} +``` + +#### BeforeToolSelection + +Filter available tools: +```json +{ + "hookSpecificOutput": { + "hookEventName": "BeforeToolSelection", + "toolConfig": { + "functionCallingConfig": { + "mode": "ANY", + "allowedFunctionNames": ["read_file", "write_file", "shell"] + } + } + } +} +``` + +Or output comma-separated tool names: +```bash +echo "read_file,write_file,shell" +exit 0 +``` + +## Environment Variables + +| Variable | Description | +|----------|-------------| +| `GEMINI_PROJECT_DIR` | Absolute path to project root | + +## Blocking Mechanisms + +### Exit Code 2 + +The primary blocking mechanism. stderr content is shown to the agent. + +```bash +#!/bin/bash +echo "Security violation: API key detected in file" >&2 +exit 2 +``` + +### Decision Field + +For events that support it: + +```json +{ + "decision": "deny", + "reason": "Explanation for why action is blocked" +} +``` + +### Accepted Decision Values + +| Value | Effect | +|-------|--------| +| `allow` | Permit the action | +| `deny` | Block the action | +| `block` | Block the action (alias for deny) | +| `ask` | Prompt user for confirmation | +| `approve` | Auto-approve (for permission events) | + +## Performance Notes + +- Hooks add latency to the execution pipeline +- Default timeout is 60 seconds (60000ms) +- Hooks exceeding timeout are terminated and logged as warnings +- Set appropriate timeouts based on hook complexity + +## Migration from Claude Code + +Gemini CLI includes a migration utility: + +```bash +/hooks migrate +``` + +This converts: +- Event names (Stop → AfterAgent, PreToolUse → BeforeTool) +- Environment variables (CLAUDE_PROJECT_DIR → GEMINI_PROJECT_DIR) +- Tool names (Write → write_file, Bash → shell) diff --git a/doc/platform/gemini/learnings.md b/doc/platform/gemini/learnings.md new file mode 100644 index 0000000..9e5acc6 --- /dev/null +++ b/doc/platform/gemini/learnings.md @@ -0,0 +1,82 @@ +# Gemini CLI Platform Learnings + +This document captures behaviors, quirks, and insights discovered while implementing DeepWork's Gemini CLI adapter. + +## Add Your Learnings Here + +When you discover something about Gemini CLI behavior that isn't obvious from documentation, add it to the appropriate section below. + +--- + +## Hook System + +### AfterAgent vs Stop + +- **`AfterAgent` is the equivalent of Claude's `Stop`** - Fires when agent loop completes +- **`decision: "deny"` blocks completion** - Different from Claude's `"block"` +- **Both `deny` and `block` work** - Gemini accepts either value +- **`continue: false` is an alternative** - Forces immediate termination + +### Tool Name Differences + +- **Gemini uses snake_case tool names** - `write_file` not `Write` +- **Shell command is `shell`** - Not `Bash` +- **Read file is `read_file`** - Not `Read` + +### Transcript Format + +- **Transcript is JSON, not JSONL** - Different from Claude's line-delimited format +- **Structure differs from Claude** - Need to parse differently + +## Configuration Differences + +### Hooks in Settings + +- **Hooks are global only** - No per-command hooks in TOML files +- **settings.json controls all hooks** - Unlike Claude's frontmatter support +- **`enabled` flag controls hooks** - Can globally enable/disable + +### Environment Variables + +- **`GEMINI_PROJECT_DIR`** - Equivalent to `CLAUDE_PROJECT_DIR` +- **No `GEMINI_ENV_FILE`** - Cannot persist env vars like Claude's SessionStart + +## JSON Format Differences + +### Input Schema + +- **Includes `timestamp` field** - ISO 8601 format, not in Claude +- **`tool_response` in AfterTool** - Claude doesn't provide this directly +- **`stop_hook_active` in AfterAgent** - Indicates if another hook is blocking + +### Output Schema + +- **`hookSpecificOutput` is optional** - Can just return decision/reason +- **Plain text stdout becomes `systemMessage`** - When exit code is 0 +- **`additionalContext` field** - For injecting context into agent + +## Migration Notes + +### Converting Claude Hooks + +- **Use `/hooks migrate` command** - Built-in conversion utility +- **Event names change** - Stop→AfterAgent, PreToolUse→BeforeTool +- **Tool names change** - Need to update matchers +- **Decision values change** - `"block"` → `"deny"` + +### Wrapper Script Approach + +When supporting both platforms: + +1. Use platform-specific shell wrapper +2. Normalize input JSON +3. Run common Python logic +4. Denormalize output JSON + +--- + +## Date Log + +| Date | Finding | Author | +|------|---------|--------| +| 2026-01-15 | Initial documentation created from web research | Claude | diff --git a/doc/platforms/claude/cli_configuration.md b/doc/platforms/claude/cli_configuration.md new file mode 100644 index 0000000..722464f --- /dev/null +++ b/doc/platforms/claude/cli_configuration.md @@ -0,0 +1,260 @@ + + +# Claude Code CLI Configuration + +## Overview + +Claude Code is Anthropic's official CLI for Claude, providing an agentic coding assistant that runs in your terminal. It uses markdown-based slash commands and JSON-based configuration files. + +## Configuration Files + +Claude Code uses JSON-based settings with a hierarchical precedence system. + +### File Locations + +Configuration is applied in this order (lowest to highest priority): + +| Priority | File Type | Path | +|----------|-----------|------| +| 1 | Default values | Hardcoded | +| 2 | User settings | `~/.claude/settings.json` | +| 3 | Project settings | `.claude/settings.json` | +| 4 | Local settings | `.claude/settings.local.json` | +| 5 | Managed policy | Enterprise policies | + +### Configuration Format + +The `settings.json` file uses a flat JSON structure: + +```json +{ + "permissions": { + "allow": ["Bash(npm test)", "Read"], + "deny": ["Bash(rm -rf)"] + }, + "hooks": { + "Stop": [...], + "PreToolUse": [...] + }, + "env": { + "NODE_ENV": "development" + } +} +``` + +Key configuration sections: + +- **permissions**: Tool access control (allow/deny patterns) +- **hooks**: Lifecycle hook configurations +- **env**: Environment variables for the session +- **apiKeyHelper**: Custom API key provider script + +## Custom Commands/Slash Commands + +Custom commands allow you to create reusable prompts and workflows. + +### Command Location + +Commands are discovered from: + +1. **Global commands**: `~/.claude/commands/` - Available across all projects +2. **Project commands**: `/.claude/commands/` - Project-specific + +Project commands override identically-named global commands. + +### Command File Format + +Commands use **Markdown format** with `.md` extension and YAML frontmatter. + +### Metadata/Frontmatter + +Commands support these frontmatter fields: + +| Field | Required | Type | Description | +|-------|----------|------|-------------| +| `description` | No | String | One-line description shown in `/help` | +| `hooks` | No | Object | Lifecycle hooks for this command | + +```markdown +--- +description: Review code for security issues +hooks: + Stop: + - hooks: + - type: prompt + prompt: Verify all security issues are addressed +--- + +# security-review + +Review the following code for security vulnerabilities: + +$ARGUMENTS +``` + +### Argument Handling + +#### 1. Argument Injection with `$ARGUMENTS` + +The `$ARGUMENTS` placeholder is replaced with user-provided text: + +```markdown +--- +description: Explain a concept +--- + +Explain the following in simple terms: $ARGUMENTS +``` + +Usage: `/explain recursion` + +#### 2. File Content Injection + +Reference files by path in the command arguments: + +``` +/review src/main.js +``` + +Claude will read the file content automatically. + +## Command Discovery + +### Naming & Namespacing + +Command names derive from file paths relative to the commands directory. Dots create namespaced commands: + +| File Path | Command | +|-----------|---------| +| `~/.claude/commands/test.md` | `/test` | +| `.claude/commands/git.commit.md` | `/git.commit` | +| `.claude/commands/review.security.md` | `/review.security` | + +### Discovery Order + +1. Built-in commands (prefixed with `/`) +2. User global commands (`~/.claude/commands/`) +3. Project commands (`/.claude/commands/`) + +Project commands take precedence over global commands with the same name. + +## Context Files (CLAUDE.md) + +Context files provide persistent instructions to the model. + +### Loading Hierarchy + +1. **Global context**: `~/.claude/CLAUDE.md` - Instructions for all projects +2. **Project root**: `./CLAUDE.md` - Project-specific context +3. **Subdirectories**: `./subdir/CLAUDE.md` - Directory-specific context + +The CLI concatenates all discovered files. Files in the current working directory and its ancestors are loaded. + +## Platform-Specific Features + +### Permission System + +Claude Code uses a permission system for tool access: + +```json +{ + "permissions": { + "allow": [ + "Bash(npm *)", + "Read", + "Write(*.md)" + ], + "deny": [ + "Bash(rm -rf *)" + ] + } +} +``` + +Patterns support: +- Exact matches: `"Bash(npm test)"` +- Wildcards: `"Bash(npm *)"` +- Tool-only: `"Read"` (allows all Read operations) + +### Session Management + +- `claude` - Start new session +- `claude --resume` - Resume last session +- `claude --continue` - Continue with specific prompt + +### Environment Variables + +Set via settings or `.env` file: + +```json +{ + "env": { + "NODE_ENV": "development", + "DEBUG": "true" + } +} +``` + +### MCP Server Integration + +Configure Model Context Protocol servers: + +```json +{ + "mcpServers": { + "my-server": { + "command": "npx", + "args": ["-y", "@my-org/mcp-server"] + } + } +} +``` + +## Key Differences from Gemini CLI + +| Feature | Claude Code | Gemini CLI | +|---------|-------------|------------| +| Command format | Markdown | TOML | +| Command directory | `.claude/commands/` | `.gemini/commands/` | +| Context file | `CLAUDE.md` | `GEMINI.md` | +| Config format | JSON (`settings.json`) | JSON (`settings.json`) | +| Namespacing | Dot (`.`) | Colon (`:`) | +| Command-level hooks | Yes | No | +| Argument placeholder | `$ARGUMENTS` | `{{args}}` | + +## DeepWork Integration + +DeepWork integrates with Claude Code by: + +1. **Installing commands** to `.claude/commands/` as markdown files +2. **Generating hooks** in command frontmatter (YAML format) +3. **Using dot namespacing** for job.step commands (e.g., `/my_job.step_one`) +4. **Syncing global hooks** to `.claude/settings.json` + +### Generated Command Structure + +```markdown +--- +description: Step description +hooks: + Stop: + - hooks: + - type: prompt + prompt: Quality validation prompt... +--- + +# job_name.step_id + +Step instructions... +``` + +## References + +- [Claude Code Documentation](https://docs.anthropic.com/en/docs/claude-code) +- [Claude Code Settings](https://docs.anthropic.com/en/docs/claude-code/settings) +- [Claude Code Slash Commands](https://docs.anthropic.com/en/docs/claude-code/slash-commands) +- DeepWork adapter: `src/deepwork/core/adapters.py` diff --git a/doc/platforms/claude/hooks_system.md b/doc/platforms/claude/hooks_system.md new file mode 100644 index 0000000..037e253 --- /dev/null +++ b/doc/platforms/claude/hooks_system.md @@ -0,0 +1,267 @@ + + +# Claude Code Hooks System (Command Definitions) + +## Overview + +Claude Code supports **command-level hooks** within slash command definitions. This is a key differentiator from platforms like Gemini CLI, where hooks are only configurable globally. + +Hooks in command definitions allow per-command quality validation, input preprocessing, and output verification. These hooks are defined in the YAML frontmatter of markdown command files. + +## Command-Level Hook Support + +Claude Code slash commands (defined in `.md` files) support hooks in the YAML frontmatter: + +- `hooks.Stop` - Triggered when the agent finishes responding +- `hooks.PreToolUse` - Triggered before a tool is used +- `hooks.UserPromptSubmit` - Triggered when the user submits a prompt + +### Command File Format + +```markdown +--- +description: Brief description of the command +hooks: + Stop: + - hooks: + - type: command + command: "./scripts/validate.sh" + - type: prompt + prompt: | + Validate the output meets criteria... +--- + +# Command Name + +Instructions for the command... +``` + +### Hook Configuration Fields + +| Field | Required | Type | Description | +|-------|----------|------|-------------| +| `hooks` | No | Object | Container for lifecycle hooks | +| `hooks.` | No | Array | Array of hook configurations for the event | +| `type` | Yes | String | `"command"` for shell scripts, `"prompt"` for LLM evaluation | +| `command` | For type=command | String | Path to shell script to execute | +| `prompt` | For type=prompt | String | Prompt for LLM to evaluate | +| `timeout` | No | Number | Timeout in seconds (default: 60) | + +## Available Hook Events + +### Stop + +Triggered when the main agent finishes responding. Use for: +- Quality validation loops +- Output verification +- Completion criteria checking + +```yaml +hooks: + Stop: + - hooks: + - type: prompt + prompt: | + Verify all acceptance criteria are met. + If met, respond: {"ok": true} + If not met, respond: {"ok": false, "reason": "..."} +``` + +**Blocking behavior**: Return JSON with `{"decision": "block", "reason": "..."}` or exit code 2 with stderr message. + +### PreToolUse + +Triggered before the agent uses a tool. Use for: +- Tool input validation +- Security checks +- Pre-processing + +```yaml +hooks: + PreToolUse: + - matcher: "Bash|Write|Edit" + hooks: + - type: command + command: "./hooks/security-check.sh" +``` + +**Note**: PreToolUse hooks require a `matcher` field to specify which tools to intercept. + +**Blocking behavior**: Return `{"hookSpecificOutput": {"permissionDecision": "deny"}}` or exit code 2. + +### UserPromptSubmit + +Triggered when the user submits a prompt. Use for: +- Input validation +- Context injection +- Session initialization + +```yaml +hooks: + UserPromptSubmit: + - hooks: + - type: command + command: "./hooks/inject-context.sh" +``` + +**Blocking behavior**: Return `{"decision": "block", "reason": "..."}` or exit code 2. + +## Hook Input/Output Contract + +### Input (stdin) + +All hooks receive JSON via stdin: + +```json +{ + "session_id": "abc123", + "transcript_path": "/path/to/transcript.jsonl", + "cwd": "/current/working/directory", + "permission_mode": "default", + "hook_event_name": "Stop", + "tool_name": "ToolName", + "tool_input": {} +} +``` + +### Output (stdout) + +Hooks return JSON via stdout: + +```json +{ + "ok": true +} +``` + +Or to block: + +```json +{ + "decision": "block", + "reason": "Explanation of what needs to be done" +} +``` + +### Exit Codes + +| Code | Meaning | Behavior | +|------|---------|----------| +| `0` | Success | stdout parsed as JSON | +| `2` | Blocking error | stderr shown, operation blocked | +| Other | Warning | stderr logged, continues | + +## DeepWork Generic Event Mapping + +DeepWork uses generic event names that map to Claude Code's platform-specific names: + +| DeepWork Generic | Claude Code Event | +|------------------|-------------------| +| `after_agent` | `Stop` | +| `before_tool` | `PreToolUse` | +| `before_prompt` | `UserPromptSubmit` | + +## Hook Types + +### Command Hooks + +Execute a shell script: + +```yaml +hooks: + Stop: + - hooks: + - type: command + command: ".deepwork/jobs/my_job/hooks/validate.sh" + timeout: 30 +``` + +### Prompt Hooks + +Use LLM evaluation: + +```yaml +hooks: + Stop: + - hooks: + - type: prompt + prompt: | + Evaluate whether the response meets all criteria. + Respond with {"ok": true} or {"ok": false, "reason": "..."} +``` + +## Quality Validation Loop Pattern + +Claude Code's Stop hooks enable iterative quality validation: + +1. Agent completes its response +2. Stop hook evaluates quality criteria +3. If criteria not met, agent continues working +4. Loop repeats until criteria are satisfied + +This pattern is unique to Claude Code among DeepWork-supported platforms. + +### Implementation Example + +```yaml +hooks: + Stop: + - hooks: + - type: prompt + prompt: | + ## Quality Criteria + 1. All tests pass + 2. Code follows style guide + 3. Documentation updated + + Review the conversation. If ALL criteria met and + tag present, respond: {"ok": true} + + Otherwise respond: {"ok": false, "reason": "..."} +``` + +## Comparison with Other Platforms + +| Feature | Claude Code | Gemini CLI | +|---------|-------------|------------| +| Command-level hooks | Yes | No | +| Global hooks | Yes | Yes | +| Hook types | `command`, `prompt` | `command` only | +| Quality validation loops | Yes (Stop hooks) | No (workarounds only) | +| Per-command customization | Full | None | + +## Implications for DeepWork + +Since Claude Code fully supports command-level hooks: + +1. **`stop_hooks` are fully supported** - Quality validation loops work as designed +2. **Job definitions** can use `hooks.after_agent` (maps to Stop) +3. **Platform adapter** implements all hook mappings +4. **Command templates** generate YAML frontmatter with hook configurations + +## Environment Variables + +Available to hook scripts: + +| Variable | Description | +|----------|-------------| +| `CLAUDE_PROJECT_DIR` | Absolute path to project root | +| `CLAUDE_ENV_FILE` | Path to env file (SessionStart only) | +| `CLAUDE_CODE_REMOTE` | `"true"` in web environment | + +## Limitations + +1. **Prompt hooks are evaluated by the model** - May have latency +2. **Timeout default is 60 seconds** - Long-running hooks may fail +3. **Multiple hooks run in parallel** - Cannot depend on order +4. **Transcript path is JSONL** - Requires line-by-line parsing + +## References + +- [Claude Code Hooks Documentation](https://docs.anthropic.com/en/docs/claude-code/hooks) +- [Claude Code Settings](https://docs.anthropic.com/en/docs/claude-code/settings) +- DeepWork adapter: `src/deepwork/core/adapters.py` diff --git a/pyproject.toml b/pyproject.toml index 06b5151..f3d38af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "deepwork" -version = "0.2.0" +version = "0.3.0" description = "Framework for enabling AI agents to perform complex, multi-step work tasks" readme = "README.md" requires-python = ">=3.11" diff --git a/src/deepwork/core/adapters.py b/src/deepwork/core/adapters.py index 82afa86..9e8e27c 100644 --- a/src/deepwork/core/adapters.py +++ b/src/deepwork/core/adapters.py @@ -244,7 +244,9 @@ def _hook_already_present(hooks: list[dict[str, Any]], script_path: str) -> bool # Use an empty dict {} for platforms that don't support command-level hooks. # # Hook support reviewed: -# - Claude Code: Full support (Stop, PreToolUse, UserPromptSubmit) - 2025-01 +# - Claude Code: Full support (Stop, PreToolUse, UserPromptSubmit) - reviewed 2026-01-16 +# All three command lifecycle hooks are supported in markdown frontmatter. +# See: doc/platforms/claude/hooks_system.md # - Gemini CLI: No command-level hooks (reviewed 2026-01-12) # Gemini's hooks are global/project-level in settings.json, not per-command. # TOML command files only support 'prompt' and 'description' fields. diff --git a/src/deepwork/hooks/README.md b/src/deepwork/hooks/README.md new file mode 100644 index 0000000..7cf5155 --- /dev/null +++ b/src/deepwork/hooks/README.md @@ -0,0 +1,183 @@ +# DeepWork Hooks + +This directory contains the cross-platform hook system for DeepWork. Hooks allow validating and controlling AI agent behavior during execution. + +## Overview + +The hook system provides: + +1. **Platform-specific shell wrappers** that normalize input/output: + - `claude_hook.sh` - For Claude Code + - `gemini_hook.sh` - For Gemini CLI + +2. **Common Python module** (`wrapper.py`) that handles: + - Input normalization (event names, tool names, JSON structure) + - Output denormalization (decision values, JSON structure) + - Cross-platform compatibility + +3. **Hook implementations**: + - `policy_check.py` - Evaluates DeepWork policies on `after_agent` events + - `evaluate_policies.py` - Legacy Claude-specific policy evaluation + +## Usage + +### Registering Hooks + +#### Claude Code (`.claude/settings.json`) + +```json +{ + "hooks": { + "Stop": [ + { + "hooks": [ + { + "type": "command", + "command": "path/to/claude_hook.sh deepwork.hooks.policy_check" + } + ] + } + ] + } +} +``` + +#### Gemini CLI (`.gemini/settings.json`) + +```json +{ + "hooks": { + "AfterAgent": [ + { + "hooks": [ + { + "type": "command", + "command": "path/to/gemini_hook.sh deepwork.hooks.policy_check" + } + ] + } + ] + } +} +``` + +### Writing Custom Hooks + +1. Create a new Python module in `deepwork/hooks/`: + +```python +"""my_custom_hook.py - Example custom hook.""" + +import os +import sys + +from deepwork.hooks.wrapper import ( + HookInput, + HookOutput, + NormalizedEvent, + Platform, + run_hook, +) + + +def my_hook(hook_input: HookInput) -> HookOutput: + """Hook logic that works on any platform.""" + + # Check the normalized event type + if hook_input.event == NormalizedEvent.AFTER_AGENT: + # Example: block if certain condition is met + if some_condition(): + return HookOutput( + decision="block", + reason="Cannot complete until X is done" + ) + + elif hook_input.event == NormalizedEvent.BEFORE_TOOL: + # Example: validate tool usage + if hook_input.tool_name == "write_file": + file_path = hook_input.tool_input.get("file_path", "") + if "/secrets/" in file_path: + return HookOutput( + decision="deny", + reason="Cannot write to secrets directory" + ) + + # Allow the action + return HookOutput() + + +def main() -> None: + """Entry point called by shell wrappers.""" + platform_str = os.environ.get("DEEPWORK_HOOK_PLATFORM", "claude") + try: + platform = Platform(platform_str) + except ValueError: + platform = Platform.CLAUDE + + exit_code = run_hook(my_hook, platform) + sys.exit(exit_code) + + +if __name__ == "__main__": + main() +``` + +2. Register the hook using the appropriate shell wrapper. + +## Event Mapping + +| DeepWork Normalized | Claude Code | Gemini CLI | +|---------------------|-------------|------------| +| `after_agent` | Stop | AfterAgent | +| `before_tool` | PreToolUse | BeforeTool | +| `after_tool` | PostToolUse | AfterTool | +| `before_prompt` | UserPromptSubmit | BeforeAgent | +| `session_start` | SessionStart | SessionStart | +| `session_end` | SessionEnd | SessionEnd | + +## Tool Name Mapping + +| Normalized | Claude Code | Gemini CLI | +|------------|-------------|------------| +| `write_file` | Write | write_file | +| `read_file` | Read | read_file | +| `edit_file` | Edit | edit_file | +| `shell` | Bash | shell | +| `glob` | Glob | glob | +| `grep` | Grep | grep | + +## Decision Values + +| Effect | Claude Code | Gemini CLI | +|--------|-------------|------------| +| Block action | `"block"` | `"deny"` (auto-converted) | +| Allow action | `"allow"` or `{}` | `"allow"` or `{}` | +| Deny tool use | `"deny"` | `"deny"` | + +The wrapper automatically converts `"block"` to `"deny"` for Gemini CLI. + +## Exit Codes + +| Code | Meaning | +|------|---------| +| 0 | Success (allow action) | +| 2 | Blocking error (prevent action) | + +## Testing + +Run the hook wrapper tests: + +```bash +pytest tests/unit/test_hook_wrapper.py -v +pytest tests/shell_script_tests/test_hook_wrappers.py -v +``` + +## Files + +| File | Purpose | +|------|---------| +| `wrapper.py` | Cross-platform input/output normalization | +| `claude_hook.sh` | Shell wrapper for Claude Code | +| `gemini_hook.sh` | Shell wrapper for Gemini CLI | +| `policy_check.py` | Cross-platform policy evaluation hook | +| `evaluate_policies.py` | Legacy Claude-specific policy evaluation | diff --git a/src/deepwork/hooks/__init__.py b/src/deepwork/hooks/__init__.py index ed52e43..277080b 100644 --- a/src/deepwork/hooks/__init__.py +++ b/src/deepwork/hooks/__init__.py @@ -1 +1,78 @@ -"""DeepWork hooks package for policy enforcement and lifecycle events.""" +"""DeepWork hooks package for policy enforcement and lifecycle events. + +This package provides: + +1. Cross-platform hook wrapper system: + - wrapper.py: Normalizes input/output between Claude Code and Gemini CLI + - claude_hook.sh: Shell wrapper for Claude Code hooks + - gemini_hook.sh: Shell wrapper for Gemini CLI hooks + +2. Hook implementations: + - policy_check.py: Evaluates policies on after_agent events + - evaluate_policies.py: Legacy policy evaluation (Claude-specific) + +Usage with wrapper system: + # Register hook in .claude/settings.json: + { + "hooks": { + "Stop": [{ + "hooks": [{ + "type": "command", + "command": ".deepwork/hooks/claude_hook.sh deepwork.hooks.policy_check" + }] + }] + } + } + + # Register hook in .gemini/settings.json: + { + "hooks": { + "AfterAgent": [{ + "hooks": [{ + "type": "command", + "command": ".gemini/hooks/gemini_hook.sh deepwork.hooks.policy_check" + }] + }] + } + } + +Writing custom hooks: + from deepwork.hooks.wrapper import ( + HookInput, + HookOutput, + NormalizedEvent, + Platform, + run_hook, + ) + + def my_hook(input: HookInput) -> HookOutput: + if input.event == NormalizedEvent.AFTER_AGENT: + if should_block(): + return HookOutput(decision="block", reason="Complete X first") + return HookOutput() + + if __name__ == "__main__": + import os, sys + platform = Platform(os.environ.get("DEEPWORK_HOOK_PLATFORM", "claude")) + sys.exit(run_hook(my_hook, platform)) +""" + +from deepwork.hooks.wrapper import ( + HookInput, + HookOutput, + NormalizedEvent, + Platform, + denormalize_output, + normalize_input, + run_hook, +) + +__all__ = [ + "HookInput", + "HookOutput", + "NormalizedEvent", + "Platform", + "normalize_input", + "denormalize_output", + "run_hook", +] diff --git a/src/deepwork/hooks/claude_hook.sh b/src/deepwork/hooks/claude_hook.sh new file mode 100755 index 0000000..b9c4fd3 --- /dev/null +++ b/src/deepwork/hooks/claude_hook.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# claude_hook.sh - Claude Code hook wrapper +# +# This script wraps Python hooks to work with Claude Code's hook system. +# It handles input/output normalization so Python hooks can be written once +# and work on any supported platform. +# +# Usage: +# claude_hook.sh +# +# Example: +# claude_hook.sh deepwork.hooks.policy_check +# +# The Python module should implement a main() function that: +# 1. Calls deepwork.hooks.wrapper.run_hook() with a hook function +# 2. The hook function receives HookInput and returns HookOutput +# +# Environment variables set by Claude Code: +# CLAUDE_PROJECT_DIR - Absolute path to project root +# +# Input (stdin): JSON from Claude Code hook system +# Output (stdout): JSON response for Claude Code +# Exit codes: +# 0 - Success (allow action) +# 2 - Blocking error (prevent action) + +set -e + +# Get the Python module to run +PYTHON_MODULE="${1:-}" + +if [ -z "${PYTHON_MODULE}" ]; then + echo "Usage: claude_hook.sh " >&2 + echo "Example: claude_hook.sh deepwork.hooks.policy_check" >&2 + exit 1 +fi + +# Read stdin into variable +HOOK_INPUT="" +if [ ! -t 0 ]; then + HOOK_INPUT=$(cat) +fi + +# Set platform environment variable for the Python module +export DEEPWORK_HOOK_PLATFORM="claude" + +# Run the Python module, passing the input via stdin +# The Python module is responsible for: +# 1. Reading stdin (normalized by wrapper) +# 2. Processing the hook logic +# 3. Writing JSON to stdout +echo "${HOOK_INPUT}" | python -m "${PYTHON_MODULE}" +exit_code=$? + +exit ${exit_code} diff --git a/src/deepwork/hooks/gemini_hook.sh b/src/deepwork/hooks/gemini_hook.sh new file mode 100755 index 0000000..add66df --- /dev/null +++ b/src/deepwork/hooks/gemini_hook.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# gemini_hook.sh - Gemini CLI hook wrapper +# +# This script wraps Python hooks to work with Gemini CLI's hook system. +# It handles input/output normalization so Python hooks can be written once +# and work on any supported platform. +# +# Usage: +# gemini_hook.sh +# +# Example: +# gemini_hook.sh deepwork.hooks.policy_check +# +# The Python module should implement a main() function that: +# 1. Calls deepwork.hooks.wrapper.run_hook() with a hook function +# 2. The hook function receives HookInput and returns HookOutput +# +# Environment variables set by Gemini CLI: +# GEMINI_PROJECT_DIR - Absolute path to project root +# +# Input (stdin): JSON from Gemini CLI hook system +# Output (stdout): JSON response for Gemini CLI +# Exit codes: +# 0 - Success (allow action) +# 2 - Blocking error (prevent action) + +set -e + +# Get the Python module to run +PYTHON_MODULE="${1:-}" + +if [ -z "${PYTHON_MODULE}" ]; then + echo "Usage: gemini_hook.sh " >&2 + echo "Example: gemini_hook.sh deepwork.hooks.policy_check" >&2 + exit 1 +fi + +# Read stdin into variable +HOOK_INPUT="" +if [ ! -t 0 ]; then + HOOK_INPUT=$(cat) +fi + +# Set platform environment variable for the Python module +export DEEPWORK_HOOK_PLATFORM="gemini" + +# Run the Python module, passing the input via stdin +# The Python module is responsible for: +# 1. Reading stdin (normalized by wrapper) +# 2. Processing the hook logic +# 3. Writing JSON to stdout +echo "${HOOK_INPUT}" | python -m "${PYTHON_MODULE}" +exit_code=$? + +exit ${exit_code} diff --git a/src/deepwork/hooks/policy_check.py b/src/deepwork/hooks/policy_check.py new file mode 100644 index 0000000..287852b --- /dev/null +++ b/src/deepwork/hooks/policy_check.py @@ -0,0 +1,347 @@ +""" +Policy check hook for DeepWork. + +This hook evaluates policies when the agent finishes (after_agent event). +It uses the wrapper system for cross-platform compatibility. + +Usage (via shell wrapper): + claude_hook.sh deepwork.hooks.policy_check + gemini_hook.sh deepwork.hooks.policy_check + +Or directly with platform environment variable: + DEEPWORK_HOOK_PLATFORM=claude python -m deepwork.hooks.policy_check +""" + +from __future__ import annotations + +import json +import os +import re +import subprocess +import sys +from pathlib import Path + +from deepwork.core.policy_parser import ( + Policy, + PolicyParseError, + evaluate_policy, + parse_policy_file, +) +from deepwork.hooks.wrapper import ( + HookInput, + HookOutput, + NormalizedEvent, + Platform, + run_hook, +) + + +def get_default_branch() -> str: + """Get the default branch name (main or master).""" + try: + result = subprocess.run( + ["git", "symbolic-ref", "refs/remotes/origin/HEAD"], + capture_output=True, + text=True, + check=True, + ) + return result.stdout.strip().split("/")[-1] + except subprocess.CalledProcessError: + pass + + for branch in ["main", "master"]: + try: + subprocess.run( + ["git", "rev-parse", "--verify", f"origin/{branch}"], + capture_output=True, + check=True, + ) + return branch + except subprocess.CalledProcessError: + continue + + return "main" + + +def get_changed_files_base() -> list[str]: + """Get files changed relative to branch base.""" + default_branch = get_default_branch() + + try: + result = subprocess.run( + ["git", "merge-base", "HEAD", f"origin/{default_branch}"], + capture_output=True, + text=True, + check=True, + ) + merge_base = result.stdout.strip() + + subprocess.run(["git", "add", "-A"], capture_output=True, check=False) + + result = subprocess.run( + ["git", "diff", "--name-only", merge_base, "HEAD"], + capture_output=True, + text=True, + check=True, + ) + committed_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + + result = subprocess.run( + ["git", "diff", "--name-only", "--cached"], + capture_output=True, + text=True, + check=False, + ) + staged_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + + result = subprocess.run( + ["git", "ls-files", "--others", "--exclude-standard"], + capture_output=True, + text=True, + check=False, + ) + untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + + all_files = committed_files | staged_files | untracked_files + return sorted([f for f in all_files if f]) + + except subprocess.CalledProcessError: + return [] + + +def get_changed_files_default_tip() -> list[str]: + """Get files changed compared to default branch tip.""" + default_branch = get_default_branch() + + try: + subprocess.run(["git", "add", "-A"], capture_output=True, check=False) + + result = subprocess.run( + ["git", "diff", "--name-only", f"origin/{default_branch}..HEAD"], + capture_output=True, + text=True, + check=True, + ) + committed_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + + result = subprocess.run( + ["git", "diff", "--name-only", "--cached"], + capture_output=True, + text=True, + check=False, + ) + staged_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + + result = subprocess.run( + ["git", "ls-files", "--others", "--exclude-standard"], + capture_output=True, + text=True, + check=False, + ) + untracked_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + + all_files = committed_files | staged_files | untracked_files + return sorted([f for f in all_files if f]) + + except subprocess.CalledProcessError: + return [] + + +def get_changed_files_prompt() -> list[str]: + """Get files changed since prompt was submitted.""" + baseline_path = Path(".deepwork/.last_work_tree") + + try: + subprocess.run(["git", "add", "-A"], capture_output=True, check=False) + + result = subprocess.run( + ["git", "diff", "--name-only", "--cached"], + capture_output=True, + text=True, + check=False, + ) + current_files = set(result.stdout.strip().split("\n")) if result.stdout.strip() else set() + current_files = {f for f in current_files if f} + + if baseline_path.exists(): + baseline_files = set(baseline_path.read_text().strip().split("\n")) + baseline_files = {f for f in baseline_files if f} + new_files = current_files - baseline_files + return sorted(new_files) + else: + return sorted(current_files) + + except (subprocess.CalledProcessError, OSError): + return [] + + +def get_changed_files_for_mode(mode: str) -> list[str]: + """Get changed files for a specific compare_to mode.""" + if mode == "base": + return get_changed_files_base() + elif mode == "default_tip": + return get_changed_files_default_tip() + elif mode == "prompt": + return get_changed_files_prompt() + else: + return get_changed_files_base() + + +def extract_promise_tags(text: str) -> set[str]: + """Extract policy names from tags in text.""" + pattern = r"✓\s*([^<]+)" + matches = re.findall(pattern, text, re.IGNORECASE | re.DOTALL) + return {m.strip() for m in matches} + + +def extract_conversation_from_transcript(transcript_path: str, platform: Platform) -> str: + """ + Extract conversation text from a transcript file. + + Handles platform-specific transcript formats. + """ + if not transcript_path or not Path(transcript_path).exists(): + return "" + + try: + content = Path(transcript_path).read_text() + + if platform == Platform.CLAUDE: + # Claude uses JSONL format - each line is a JSON object + conversation_parts = [] + for line in content.strip().split("\n"): + if not line.strip(): + continue + try: + entry = json.loads(line) + if entry.get("role") == "assistant": + message_content = entry.get("message", {}).get("content", []) + for part in message_content: + if part.get("type") == "text": + conversation_parts.append(part.get("text", "")) + except json.JSONDecodeError: + continue + return "\n".join(conversation_parts) + + elif platform == Platform.GEMINI: + # Gemini uses JSON format + try: + data = json.loads(content) + # Extract text from messages + conversation_parts = [] + messages = data.get("messages", []) + for msg in messages: + if msg.get("role") == "model": + parts = msg.get("parts", []) + for part in parts: + if isinstance(part, dict) and "text" in part: + conversation_parts.append(part["text"]) + elif isinstance(part, str): + conversation_parts.append(part) + return "\n".join(conversation_parts) + except json.JSONDecodeError: + return "" + + return "" + except Exception: + return "" + + +def format_policy_message(policies: list[Policy]) -> str: + """Format triggered policies into a message for the agent.""" + lines = ["## DeepWork Policies Triggered", ""] + lines.append( + "Comply with the following policies. " + "To mark a policy as addressed, include `✓ Policy Name` " + "in your response (replace Policy Name with the actual policy name)." + ) + lines.append("") + + for policy in policies: + lines.append(f"### Policy: {policy.name}") + lines.append("") + lines.append(policy.instructions.strip()) + lines.append("") + + return "\n".join(lines) + + +def policy_check_hook(hook_input: HookInput) -> HookOutput: + """ + Main hook logic for policy evaluation. + + This is called for after_agent events to check if policies need attention + before allowing the agent to complete. + """ + # Only process after_agent events + if hook_input.event != NormalizedEvent.AFTER_AGENT: + return HookOutput() + + # Check if policy file exists + policy_path = Path(".deepwork.policy.yml") + if not policy_path.exists(): + return HookOutput() + + # Extract conversation context from transcript + conversation_context = extract_conversation_from_transcript( + hook_input.transcript_path, hook_input.platform + ) + + # Extract promise tags + promised_policies = extract_promise_tags(conversation_context) + + # Parse policies + try: + policies = parse_policy_file(policy_path) + except PolicyParseError as e: + print(f"Error parsing policy file: {e}", file=sys.stderr) + return HookOutput() + + if not policies: + return HookOutput() + + # Group policies by compare_to mode + policies_by_mode: dict[str, list[Policy]] = {} + for policy in policies: + mode = policy.compare_to + if mode not in policies_by_mode: + policies_by_mode[mode] = [] + policies_by_mode[mode].append(policy) + + # Evaluate policies + fired_policies: list[Policy] = [] + for mode, mode_policies in policies_by_mode.items(): + changed_files = get_changed_files_for_mode(mode) + if not changed_files: + continue + + for policy in mode_policies: + if policy.name in promised_policies: + continue + if evaluate_policy(policy, changed_files): + fired_policies.append(policy) + + if not fired_policies: + return HookOutput() + + # Format message and return blocking response + message = format_policy_message(fired_policies) + return HookOutput(decision="block", reason=message) + + +def main() -> None: + """Entry point for the policy check hook.""" + # Determine platform from environment + platform_str = os.environ.get("DEEPWORK_HOOK_PLATFORM", "claude") + try: + platform = Platform(platform_str) + except ValueError: + platform = Platform.CLAUDE + + # Run the hook with the wrapper + exit_code = run_hook(policy_check_hook, platform) + sys.exit(exit_code) + + +if __name__ == "__main__": + main() diff --git a/src/deepwork/hooks/wrapper.py b/src/deepwork/hooks/wrapper.py new file mode 100644 index 0000000..4733b5f --- /dev/null +++ b/src/deepwork/hooks/wrapper.py @@ -0,0 +1,364 @@ +""" +Hook wrapper module for cross-platform hook compatibility. + +This module provides utilities for normalizing hook input/output between +different AI CLI platforms (Claude Code, Gemini CLI, etc.). + +The wrapper system allows writing hooks once in Python and running them +on any supported platform. Platform-specific shell scripts handle the +input/output translation, while Python hooks work with a normalized format. + +Normalized Format: + Input: + - session_id: str + - transcript_path: str + - cwd: str + - event: str (normalized: 'after_agent', 'before_tool', 'before_prompt') + - tool_name: str (normalized: 'write_file', 'shell', etc.) + - tool_input: dict + - prompt: str (for agent events) + - raw_input: dict (original platform-specific input) + + Output: + - decision: str ('block', 'allow', 'deny') + - reason: str (explanation for blocking) + - context: str (additional context to add) + - raw_output: dict (will be merged into final output) + +Usage: + # In a Python hook: + from deepwork.hooks.wrapper import HookInput, HookOutput, normalize_input, denormalize_output + + def my_hook(input_data: HookInput) -> HookOutput: + if should_block: + return HookOutput(decision='block', reason='Must do X first') + return HookOutput() # Allow +""" + +from __future__ import annotations + +import json +import sys +from collections.abc import Callable +from dataclasses import dataclass, field +from enum import Enum +from typing import Any + + +class Platform(str, Enum): + """Supported AI CLI platforms.""" + + CLAUDE = "claude" + GEMINI = "gemini" + + +class NormalizedEvent(str, Enum): + """Normalized hook event names.""" + + AFTER_AGENT = "after_agent" + BEFORE_TOOL = "before_tool" + BEFORE_PROMPT = "before_prompt" + SESSION_START = "session_start" + SESSION_END = "session_end" + AFTER_TOOL = "after_tool" + BEFORE_MODEL = "before_model" + AFTER_MODEL = "after_model" + + +# Event name mappings from platform-specific to normalized +EVENT_TO_NORMALIZED: dict[Platform, dict[str, NormalizedEvent]] = { + Platform.CLAUDE: { + "Stop": NormalizedEvent.AFTER_AGENT, + "SubagentStop": NormalizedEvent.AFTER_AGENT, + "PreToolUse": NormalizedEvent.BEFORE_TOOL, + "PostToolUse": NormalizedEvent.AFTER_TOOL, + "UserPromptSubmit": NormalizedEvent.BEFORE_PROMPT, + "SessionStart": NormalizedEvent.SESSION_START, + "SessionEnd": NormalizedEvent.SESSION_END, + }, + Platform.GEMINI: { + "AfterAgent": NormalizedEvent.AFTER_AGENT, + "BeforeTool": NormalizedEvent.BEFORE_TOOL, + "AfterTool": NormalizedEvent.AFTER_TOOL, + "BeforeAgent": NormalizedEvent.BEFORE_PROMPT, + "SessionStart": NormalizedEvent.SESSION_START, + "SessionEnd": NormalizedEvent.SESSION_END, + "BeforeModel": NormalizedEvent.BEFORE_MODEL, + "AfterModel": NormalizedEvent.AFTER_MODEL, + }, +} + +# Normalized event to platform-specific event name +NORMALIZED_TO_EVENT: dict[Platform, dict[NormalizedEvent, str]] = { + Platform.CLAUDE: { + NormalizedEvent.AFTER_AGENT: "Stop", + NormalizedEvent.BEFORE_TOOL: "PreToolUse", + NormalizedEvent.AFTER_TOOL: "PostToolUse", + NormalizedEvent.BEFORE_PROMPT: "UserPromptSubmit", + NormalizedEvent.SESSION_START: "SessionStart", + NormalizedEvent.SESSION_END: "SessionEnd", + }, + Platform.GEMINI: { + NormalizedEvent.AFTER_AGENT: "AfterAgent", + NormalizedEvent.BEFORE_TOOL: "BeforeTool", + NormalizedEvent.AFTER_TOOL: "AfterTool", + NormalizedEvent.BEFORE_PROMPT: "BeforeAgent", + NormalizedEvent.SESSION_START: "SessionStart", + NormalizedEvent.SESSION_END: "SessionEnd", + NormalizedEvent.BEFORE_MODEL: "BeforeModel", + NormalizedEvent.AFTER_MODEL: "AfterModel", + }, +} + +# Tool name mappings from platform-specific to normalized (snake_case) +TOOL_TO_NORMALIZED: dict[Platform, dict[str, str]] = { + Platform.CLAUDE: { + "Write": "write_file", + "Edit": "edit_file", + "Read": "read_file", + "Bash": "shell", + "Glob": "glob", + "Grep": "grep", + "WebFetch": "web_fetch", + "WebSearch": "web_search", + "Task": "task", + }, + Platform.GEMINI: { + # Gemini already uses snake_case + "write_file": "write_file", + "edit_file": "edit_file", + "read_file": "read_file", + "shell": "shell", + "glob": "glob", + "grep": "grep", + "web_fetch": "web_fetch", + "web_search": "web_search", + }, +} + +# Normalized tool names to platform-specific +NORMALIZED_TO_TOOL: dict[Platform, dict[str, str]] = { + Platform.CLAUDE: { + "write_file": "Write", + "edit_file": "Edit", + "read_file": "Read", + "shell": "Bash", + "glob": "Glob", + "grep": "Grep", + "web_fetch": "WebFetch", + "web_search": "WebSearch", + "task": "Task", + }, + Platform.GEMINI: { + # Gemini already uses snake_case + "write_file": "write_file", + "edit_file": "edit_file", + "read_file": "read_file", + "shell": "shell", + "glob": "glob", + "grep": "grep", + "web_fetch": "web_fetch", + "web_search": "web_search", + }, +} + + +@dataclass +class HookInput: + """Normalized hook input data.""" + + platform: Platform + event: NormalizedEvent + session_id: str = "" + transcript_path: str = "" + cwd: str = "" + tool_name: str = "" + tool_input: dict[str, Any] = field(default_factory=dict) + tool_response: str = "" + prompt: str = "" + raw_input: dict[str, Any] = field(default_factory=dict) + + @classmethod + def from_dict(cls, data: dict[str, Any], platform: Platform) -> HookInput: + """Create HookInput from raw platform-specific input.""" + # Get event name and normalize + raw_event = data.get("hook_event_name", "") + event_map = EVENT_TO_NORMALIZED.get(platform, {}) + event = event_map.get(raw_event, NormalizedEvent.AFTER_AGENT) + + # Get tool name and normalize + raw_tool = data.get("tool_name", "") + tool_map = TOOL_TO_NORMALIZED.get(platform, {}) + tool_name = tool_map.get(raw_tool, raw_tool.lower()) + + return cls( + platform=platform, + event=event, + session_id=data.get("session_id", ""), + transcript_path=data.get("transcript_path", ""), + cwd=data.get("cwd", ""), + tool_name=tool_name, + tool_input=data.get("tool_input", {}), + tool_response=data.get("tool_response", ""), + prompt=data.get("prompt", ""), + raw_input=data, + ) + + +@dataclass +class HookOutput: + """Normalized hook output data.""" + + decision: str = "" # 'block', 'allow', 'deny', '' (empty = allow) + reason: str = "" # Explanation for blocking + context: str = "" # Additional context to add + continue_loop: bool = True # False to terminate agent loop + stop_reason: str = "" # Message when stopping + suppress_output: bool = False # Hide from transcript + raw_output: dict[str, Any] = field(default_factory=dict) + + def to_dict(self, platform: Platform, event: NormalizedEvent) -> dict[str, Any]: + """Convert to platform-specific output format.""" + result: dict[str, Any] = {} + + # Handle decision + if self.decision: + if platform == Platform.GEMINI and self.decision == "block": + # Gemini prefers 'deny' + result["decision"] = "deny" + else: + result["decision"] = self.decision + + # Handle reason + if self.reason: + result["reason"] = self.reason + + # Handle continue_loop + if not self.continue_loop: + result["continue"] = False + if self.stop_reason: + result["stopReason"] = self.stop_reason + + # Handle suppress_output + if self.suppress_output: + result["suppressOutput"] = True + + # Handle context (platform-specific) + if self.context: + if platform == Platform.CLAUDE: + # Claude uses different fields depending on event + if event == NormalizedEvent.SESSION_START: + result.setdefault("hookSpecificOutput", {}) + result["hookSpecificOutput"]["hookEventName"] = NORMALIZED_TO_EVENT[platform][ + event + ] + result["hookSpecificOutput"]["additionalContext"] = self.context + else: + result["systemMessage"] = self.context + else: + # Gemini + result.setdefault("hookSpecificOutput", {}) + result["hookSpecificOutput"]["hookEventName"] = NORMALIZED_TO_EVENT[platform].get( + event, str(event) + ) + result["hookSpecificOutput"]["additionalContext"] = self.context + + # Merge any raw output + for key, value in self.raw_output.items(): + if key not in result: + result[key] = value + + return result + + +def normalize_input(raw_json: str, platform: Platform) -> HookInput: + """ + Parse raw JSON input and normalize it. + + Args: + raw_json: JSON string from stdin + platform: Source platform + + Returns: + Normalized HookInput + """ + try: + data = json.loads(raw_json) if raw_json.strip() else {} + except json.JSONDecodeError: + data = {} + + return HookInput.from_dict(data, platform) + + +def denormalize_output(output: HookOutput, platform: Platform, event: NormalizedEvent) -> str: + """ + Convert normalized output to platform-specific JSON. + + Args: + output: Normalized HookOutput + platform: Target platform + event: The event being processed + + Returns: + JSON string for stdout + """ + result = output.to_dict(platform, event) + return json.dumps(result) if result else "{}" + + +def read_stdin() -> str: + """Read all input from stdin.""" + if sys.stdin.isatty(): + return "" + try: + return sys.stdin.read() + except Exception: + return "" + + +def write_stdout(data: str) -> None: + """Write output to stdout.""" + print(data) + + +def run_hook( + hook_fn: Callable[[HookInput], HookOutput], + platform: Platform, +) -> int: + """ + Run a hook function with normalized input/output. + + This is the main entry point for Python hooks. It: + 1. Reads raw input from stdin + 2. Normalizes the input + 3. Calls the hook function + 4. Denormalizes the output + 5. Writes to stdout + + Args: + hook_fn: Function that takes HookInput and returns HookOutput + platform: The platform calling this hook + + Returns: + Exit code (0 for success, 2 for blocking) + """ + # Read and normalize input + raw_input = read_stdin() + hook_input = normalize_input(raw_input, platform) + + # Call the hook + try: + hook_output = hook_fn(hook_input) + except Exception as e: + # On error, allow the action but log + print(f"Hook error: {e}", file=sys.stderr) + hook_output = HookOutput() + + # Denormalize and write output + output_json = denormalize_output(hook_output, platform, hook_input.event) + write_stdout(output_json) + + # Return exit code based on decision + if hook_output.decision in ("block", "deny"): + return 2 + return 0 diff --git a/src/deepwork/standard_jobs/deepwork_jobs/job.yml b/src/deepwork/standard_jobs/deepwork_jobs/job.yml index ac861a5..e1afa5e 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/job.yml +++ b/src/deepwork/standard_jobs/deepwork_jobs/job.yml @@ -1,12 +1,12 @@ name: deepwork_jobs -version: "0.4.0" +version: "0.5.0" summary: "DeepWork job management commands" description: | Core commands for managing DeepWork jobs. These commands help you define new multi-step workflows and learn from running them. The `define` command guides you through an interactive process to create a new job by - asking detailed questions about your workflow, understanding each step's inputs and outputs, + asking structured questions about your workflow, understanding each step's inputs and outputs, and generating all necessary files. The `learn` command reflects on conversations where DeepWork jobs were run, identifies @@ -22,6 +22,8 @@ changelog: changes: "Added make_new_job.sh script and templates directory; updated instructions to reference templates instead of inline examples" - version: "0.4.0" changes: "Removed implementation_summary and learning_summary outputs; simplified step outputs" + - version: "0.5.0" + changes: "Standardized on 'ask structured questions' phrasing for user input; Updated quality criteria hooks to verify phrase usage; Added guidance in implement.md to use phrase in generated instructions" steps: - id: define @@ -39,13 +41,14 @@ steps: - prompt: | Verify the job.yml output meets ALL quality criteria before completing: - 1. **User Understanding**: Did you fully understand the user's workflow through interactive Q&A? - 2. **Clear Inputs/Outputs**: Does every step have clearly defined inputs and outputs? - 3. **Logical Dependencies**: Do step dependencies make sense and avoid circular references? - 4. **Concise Summary**: Is the summary under 200 characters and descriptive? - 5. **Rich Description**: Does the description provide enough context for future refinement? - 6. **Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)? - 7. **File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`? + 1. **User Understanding**: Did you fully understand the user's workflow by asking structured questions? + 2. **Structured Questions Used**: Did you ask structured questions (using the AskUserQuestion tool) to gather user input? + 3. **Clear Inputs/Outputs**: Does every step have clearly defined inputs and outputs? + 4. **Logical Dependencies**: Do step dependencies make sense and avoid circular references? + 5. **Concise Summary**: Is the summary under 200 characters and descriptive? + 6. **Rich Description**: Does the description provide enough context for future refinement? + 7. **Valid Schema**: Does the job.yml follow the required schema (name, version, summary, steps)? + 8. **File Created**: Has the job.yml file been created in `.deepwork/jobs/[job_name]/job.yml`? If ANY criterion is not met, continue working to address it. If ALL criteria are satisfied, include `✓ Quality Criteria Met` in your response. @@ -71,9 +74,10 @@ steps: 3. **Specific & Actionable**: Are instructions tailored to each step's purpose, not generic? 4. **Output Examples**: Does each instruction file show what good output looks like? 5. **Quality Criteria**: Does each instruction file define quality criteria for its outputs? - 6. **Sync Complete**: Has `deepwork sync` been run successfully? - 7. **Commands Available**: Are the slash-commands generated in `.claude/commands/`? - 8. **Policies Considered**: Have you thought about whether policies would benefit this job? + 6. **Ask Structured Questions**: Do step instructions that gather user input explicitly use the phrase "ask structured questions"? + 7. **Sync Complete**: Has `deepwork sync` been run successfully? + 8. **Commands Available**: Are the slash-commands generated in `.claude/commands/`? + 9. **Policies Considered**: Have you thought about whether policies would benefit this job? - If relevant policies were identified, did you explain them and offer to run `/deepwork_policy.define`? - Not every job needs policies - only suggest when genuinely helpful. diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md index 0a79561..2b0b19f 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md +++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/define.md @@ -6,13 +6,15 @@ Create a `job.yml` specification file that defines the structure of a new DeepWo ## Task -Guide the user through defining a job specification by asking clarifying questions. **Do not attempt to create the specification without first fully understanding the user's needs.** +Guide the user through defining a job specification by asking structured questions. **Do not attempt to create the specification without first fully understanding the user's needs.** + +**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices. The output of this step is **only** the `job.yml` file - a complete specification of the workflow. The actual step instruction files will be created in the next step (`implement`). ### Step 1: Understand the Job Purpose -Start by asking questions to understand what the user wants to accomplish: +Start by asking structured questions to understand what the user wants to accomplish: 1. **What is the overall goal of this workflow?** - What complex task are they trying to accomplish? @@ -31,7 +33,7 @@ Start by asking questions to understand what the user wants to accomplish: ### Step 2: Define Each Step -For each major phase they mentioned, ask detailed questions: +For each major phase they mentioned, ask structured questions to gather details: 1. **Step Purpose** - What exactly does this step accomplish? @@ -92,7 +94,7 @@ After gathering information about all steps: For each step, consider whether it would benefit from **quality validation loops**. Stop hooks allow the AI agent to iteratively refine its work until quality criteria are met. -**Ask the user about quality validation:** +**Ask structured questions about quality validation:** - "Are there specific quality criteria that must be met for this step?" - "Would you like the agent to validate its work before completing?" - "What would make you send the work back for revision?" @@ -281,11 +283,11 @@ Run `/deepwork_jobs.implement` to generate the instruction files for each step b ## Important Guidelines 1. **Focus on specification only** - Don't create instruction files yet -2. **Ask clarifying questions** - Never skip the discovery phase +2. **Ask structured questions** - Never skip the discovery phase; use the AskUserQuestion tool 3. **Rich context in description** - This helps with future refinement 4. **Validate understanding** - Summarize and confirm before creating 5. **Use examples** - Help users understand what good specifications look like -6. **Understand file organization** - Always ask where outputs should be saved and if subdirectories are needed +6. **Understand file organization** - Always ask structured questions about where outputs should be saved and if subdirectories are needed ## Validation Rules @@ -319,6 +321,7 @@ After creating the file: ## Quality Criteria +- Asked structured questions to fully understand user requirements - User fully understands what job they're creating - All steps have clear inputs and outputs - Dependencies make logical sense diff --git a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md index 05967b6..a3a790f 100644 --- a/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md +++ b/src/deepwork/standard_jobs/deepwork_jobs/steps/implement.md @@ -69,6 +69,7 @@ For each step in the job.yml, create a comprehensive instruction file at `.deepw 4. **Explain the "why"** - Help the user understand the step's role in the workflow 5. **Quality over quantity** - Detailed, actionable instructions are better than vague ones 6. **Align with stop hooks** - If the step has `stop_hooks` defined, ensure the quality criteria in the instruction file match the validation criteria in the hooks +7. **Ask structured questions** - When a step has user inputs, the instructions MUST explicitly tell the agent to "ask structured questions" using the AskUserQuestion tool to gather that information. Never use generic phrasing like "ask the user" - always use "ask structured questions" ### Handling Stop Hooks @@ -231,6 +232,7 @@ Before marking this step complete, ensure: - Instructions are specific and actionable - Output examples are provided in each instruction file - Quality criteria defined for each step +- Steps with user inputs explicitly use "ask structured questions" phrasing - Sync completed successfully - Commands available for use - Thoughtfully considered relevant policies for the job domain diff --git a/src/deepwork/standard_jobs/deepwork_policy/job.yml b/src/deepwork/standard_jobs/deepwork_policy/job.yml index 0aacc87..777894e 100644 --- a/src/deepwork/standard_jobs/deepwork_policy/job.yml +++ b/src/deepwork/standard_jobs/deepwork_policy/job.yml @@ -1,5 +1,5 @@ name: deepwork_policy -version: "0.1.0" +version: "0.2.0" summary: "Policy enforcement for AI agent sessions" description: | Manages policies that automatically trigger when certain files change during an AI agent session. @@ -21,6 +21,8 @@ description: | changelog: - version: "0.1.0" changes: "Initial version" + - version: "0.2.0" + changes: "Standardized on 'ask structured questions' phrasing for user input" steps: - id: define diff --git a/src/deepwork/standard_jobs/deepwork_policy/steps/define.md b/src/deepwork/standard_jobs/deepwork_policy/steps/define.md index 85c2b63..302eda7 100644 --- a/src/deepwork/standard_jobs/deepwork_policy/steps/define.md +++ b/src/deepwork/standard_jobs/deepwork_policy/steps/define.md @@ -6,11 +6,13 @@ Create or update policy entries in the `.deepwork.policy.yml` file to enforce te ## Task -Guide the user through defining a new policy by asking clarifying questions. **Do not create the policy without first understanding what they want to enforce.** +Guide the user through defining a new policy by asking structured questions. **Do not create the policy without first understanding what they want to enforce.** + +**Important**: Use the AskUserQuestion tool to ask structured questions when gathering information from the user. This provides a better user experience with clear options and guided choices. ### Step 1: Understand the Policy Purpose -Start by asking questions to understand what the user wants to enforce: +Start by asking structured questions to understand what the user wants to enforce: 1. **What guideline or constraint should this policy enforce?** - What situation triggers the need for action? @@ -175,6 +177,7 @@ Create or update this file at the project root with the new policy entry. ## Quality Criteria +- Asked structured questions to understand user requirements - Policy name is clear and descriptive - Trigger patterns accurately match the intended files - Safety patterns prevent unnecessary triggering diff --git a/tests/shell_script_tests/test_hook_wrappers.py b/tests/shell_script_tests/test_hook_wrappers.py new file mode 100644 index 0000000..7b3b143 --- /dev/null +++ b/tests/shell_script_tests/test_hook_wrappers.py @@ -0,0 +1,311 @@ +"""Tests for the platform hook wrapper shell scripts. + +These tests verify that claude_hook.sh and gemini_hook.sh correctly +invoke Python hooks and handle input/output. +""" + +import json +import os +import subprocess +from pathlib import Path + +import pytest + + +@pytest.fixture +def hooks_dir() -> Path: + """Return the path to the hooks directory.""" + return Path(__file__).parent.parent.parent / "src" / "deepwork" / "hooks" + + +@pytest.fixture +def src_dir() -> Path: + """Return the path to the src directory for PYTHONPATH.""" + return Path(__file__).parent.parent.parent / "src" + + +def run_hook_script( + script_path: Path, + python_module: str, + hook_input: dict, + platform: str, + src_dir: Path, +) -> tuple[str, str, int]: + """ + Run a hook wrapper script with the given input. + + Args: + script_path: Path to the wrapper script (claude_hook.sh or gemini_hook.sh) + python_module: Python module to invoke + hook_input: JSON input to pass via stdin + platform: Platform identifier for env var + src_dir: Path to src directory for PYTHONPATH + + Returns: + Tuple of (stdout, stderr, return_code) + """ + env = os.environ.copy() + env["PYTHONPATH"] = str(src_dir) + env["DEEPWORK_HOOK_PLATFORM"] = platform + + result = subprocess.run( + ["bash", str(script_path), python_module], + capture_output=True, + text=True, + input=json.dumps(hook_input), + env=env, + ) + + return result.stdout, result.stderr, result.returncode + + +class TestClaudeHookWrapper: + """Tests for claude_hook.sh wrapper script.""" + + def test_script_exists_and_is_executable(self, hooks_dir: Path) -> None: + """Test that the Claude hook script exists and is executable.""" + script_path = hooks_dir / "claude_hook.sh" + assert script_path.exists(), "claude_hook.sh should exist" + assert os.access(script_path, os.X_OK), "claude_hook.sh should be executable" + + def test_usage_error_without_module(self, hooks_dir: Path, src_dir: Path) -> None: + """Test that script shows usage error when no module provided.""" + script_path = hooks_dir / "claude_hook.sh" + env = os.environ.copy() + env["PYTHONPATH"] = str(src_dir) + + result = subprocess.run( + ["bash", str(script_path)], + capture_output=True, + text=True, + env=env, + ) + + assert result.returncode == 1 + assert "Usage:" in result.stderr + + def test_sets_platform_environment_variable(self, hooks_dir: Path, src_dir: Path) -> None: + """Test that the script sets DEEPWORK_HOOK_PLATFORM correctly.""" + # Create a simple test module that outputs the platform env var + # We'll use a Python one-liner via -c + script_path = hooks_dir / "claude_hook.sh" + env = os.environ.copy() + env["PYTHONPATH"] = str(src_dir) + + # We can't easily test this without a real module, so we'll verify + # the script exists and has the right content + content = script_path.read_text() + assert 'DEEPWORK_HOOK_PLATFORM="claude"' in content + + +class TestGeminiHookWrapper: + """Tests for gemini_hook.sh wrapper script.""" + + def test_script_exists_and_is_executable(self, hooks_dir: Path) -> None: + """Test that the Gemini hook script exists and is executable.""" + script_path = hooks_dir / "gemini_hook.sh" + assert script_path.exists(), "gemini_hook.sh should exist" + assert os.access(script_path, os.X_OK), "gemini_hook.sh should be executable" + + def test_usage_error_without_module(self, hooks_dir: Path, src_dir: Path) -> None: + """Test that script shows usage error when no module provided.""" + script_path = hooks_dir / "gemini_hook.sh" + env = os.environ.copy() + env["PYTHONPATH"] = str(src_dir) + + result = subprocess.run( + ["bash", str(script_path)], + capture_output=True, + text=True, + env=env, + ) + + assert result.returncode == 1 + assert "Usage:" in result.stderr + + def test_sets_platform_environment_variable(self, hooks_dir: Path, src_dir: Path) -> None: + """Test that the script sets DEEPWORK_HOOK_PLATFORM correctly.""" + script_path = hooks_dir / "gemini_hook.sh" + content = script_path.read_text() + assert 'DEEPWORK_HOOK_PLATFORM="gemini"' in content + + +class TestHookWrapperIntegration: + """Integration tests for hook wrappers with actual Python hooks.""" + + @pytest.fixture + def test_hook_module(self, tmp_path: Path) -> tuple[Path, str]: + """Create a temporary test hook module.""" + module_dir = tmp_path / "test_hooks" + module_dir.mkdir(parents=True) + + # Create __init__.py + (module_dir / "__init__.py").write_text("") + + # Create the hook module + hook_code = ''' +"""Test hook module.""" +import os +import sys + +from deepwork.hooks.wrapper import ( + HookInput, + HookOutput, + NormalizedEvent, + Platform, + run_hook, +) + + +def test_hook(hook_input: HookInput) -> HookOutput: + """Test hook that blocks for after_agent events.""" + if hook_input.event == NormalizedEvent.AFTER_AGENT: + return HookOutput(decision="block", reason="Test block reason") + return HookOutput() + + +def main() -> None: + platform_str = os.environ.get("DEEPWORK_HOOK_PLATFORM", "claude") + try: + platform = Platform(platform_str) + except ValueError: + platform = Platform.CLAUDE + + exit_code = run_hook(test_hook, platform) + sys.exit(exit_code) + + +if __name__ == "__main__": + main() +''' + (module_dir / "test_hook.py").write_text(hook_code) + + return tmp_path, "test_hooks.test_hook" + + def test_claude_wrapper_with_stop_event( + self, + hooks_dir: Path, + src_dir: Path, + test_hook_module: tuple[Path, str], + ) -> None: + """Test Claude wrapper processes Stop event correctly.""" + tmp_path, module_name = test_hook_module + script_path = hooks_dir / "claude_hook.sh" + + hook_input = { + "session_id": "test123", + "hook_event_name": "Stop", + "cwd": "/project", + } + + env = os.environ.copy() + env["PYTHONPATH"] = f"{src_dir}:{tmp_path}" + + result = subprocess.run( + ["bash", str(script_path), module_name], + capture_output=True, + text=True, + input=json.dumps(hook_input), + env=env, + ) + + assert result.returncode == 2, f"Expected exit code 2 for blocking. stderr: {result.stderr}" + + output = json.loads(result.stdout.strip()) + assert output["decision"] == "block" + assert "Test block reason" in output["reason"] + + def test_gemini_wrapper_with_afteragent_event( + self, + hooks_dir: Path, + src_dir: Path, + test_hook_module: tuple[Path, str], + ) -> None: + """Test Gemini wrapper processes AfterAgent event correctly.""" + tmp_path, module_name = test_hook_module + script_path = hooks_dir / "gemini_hook.sh" + + hook_input = { + "session_id": "test456", + "hook_event_name": "AfterAgent", + "cwd": "/project", + } + + env = os.environ.copy() + env["PYTHONPATH"] = f"{src_dir}:{tmp_path}" + + result = subprocess.run( + ["bash", str(script_path), module_name], + capture_output=True, + text=True, + input=json.dumps(hook_input), + env=env, + ) + + assert result.returncode == 2, f"Expected exit code 2 for blocking. stderr: {result.stderr}" + + output = json.loads(result.stdout.strip()) + # Gemini should get "deny" instead of "block" + assert output["decision"] == "deny" + assert "Test block reason" in output["reason"] + + def test_non_blocking_event( + self, + hooks_dir: Path, + src_dir: Path, + test_hook_module: tuple[Path, str], + ) -> None: + """Test that non-blocking events return exit code 0.""" + tmp_path, module_name = test_hook_module + script_path = hooks_dir / "claude_hook.sh" + + # SessionStart is not blocked by the test hook + hook_input = { + "session_id": "test789", + "hook_event_name": "SessionStart", + "cwd": "/project", + } + + env = os.environ.copy() + env["PYTHONPATH"] = f"{src_dir}:{tmp_path}" + + result = subprocess.run( + ["bash", str(script_path), module_name], + capture_output=True, + text=True, + input=json.dumps(hook_input), + env=env, + ) + + assert result.returncode == 0, f"Expected exit code 0. stderr: {result.stderr}" + output = json.loads(result.stdout.strip()) + assert output == {} or output.get("decision", "") not in ("block", "deny") + + +class TestPolicyCheckHook: + """Tests for the policy_check hook module.""" + + def test_module_imports(self) -> None: + """Test that the policy_check module can be imported.""" + from deepwork.hooks import policy_check + + assert hasattr(policy_check, "main") + assert hasattr(policy_check, "policy_check_hook") + + def test_hook_function_returns_output(self) -> None: + """Test that policy_check_hook returns a HookOutput.""" + from deepwork.hooks.policy_check import policy_check_hook + from deepwork.hooks.wrapper import HookInput, HookOutput, NormalizedEvent, Platform + + # Create a minimal hook input + hook_input = HookInput( + platform=Platform.CLAUDE, + event=NormalizedEvent.BEFORE_PROMPT, # Not after_agent, so no blocking + session_id="test", + ) + + output = policy_check_hook(hook_input) + + assert isinstance(output, HookOutput) + # Should not block for before_prompt event + assert output.decision != "block" diff --git a/tests/unit/test_hook_wrapper.py b/tests/unit/test_hook_wrapper.py new file mode 100644 index 0000000..4332c91 --- /dev/null +++ b/tests/unit/test_hook_wrapper.py @@ -0,0 +1,479 @@ +"""Tests for the hook wrapper module. + +These tests verify that the hook wrapper correctly normalizes input/output +between different AI CLI platforms (Claude Code, Gemini CLI). +""" + +import json + +from deepwork.hooks.wrapper import ( + EVENT_TO_NORMALIZED, + NORMALIZED_TO_EVENT, + TOOL_TO_NORMALIZED, + HookInput, + HookOutput, + NormalizedEvent, + Platform, + denormalize_output, + normalize_input, +) + + +class TestHookInput: + """Tests for HookInput normalization.""" + + def test_from_claude_stop_event(self) -> None: + """Test normalizing Claude Stop event.""" + raw_data = { + "session_id": "sess123", + "transcript_path": "/path/to/transcript.jsonl", + "cwd": "/project", + "hook_event_name": "Stop", + "tool_name": "", + "tool_input": {}, + } + + hook_input = HookInput.from_dict(raw_data, Platform.CLAUDE) + + assert hook_input.platform == Platform.CLAUDE + assert hook_input.event == NormalizedEvent.AFTER_AGENT + assert hook_input.session_id == "sess123" + assert hook_input.transcript_path == "/path/to/transcript.jsonl" + assert hook_input.cwd == "/project" + assert hook_input.raw_input == raw_data + + def test_from_gemini_after_agent_event(self) -> None: + """Test normalizing Gemini AfterAgent event.""" + raw_data = { + "session_id": "sess456", + "transcript_path": "/path/to/transcript.json", + "cwd": "/project", + "hook_event_name": "AfterAgent", + "timestamp": "2026-01-15T10:00:00Z", + } + + hook_input = HookInput.from_dict(raw_data, Platform.GEMINI) + + assert hook_input.platform == Platform.GEMINI + assert hook_input.event == NormalizedEvent.AFTER_AGENT + assert hook_input.session_id == "sess456" + + def test_from_claude_pretooluse_event(self) -> None: + """Test normalizing Claude PreToolUse event.""" + raw_data = { + "session_id": "sess123", + "hook_event_name": "PreToolUse", + "tool_name": "Write", + "tool_input": { + "file_path": "/path/to/file.txt", + "content": "hello world", + }, + } + + hook_input = HookInput.from_dict(raw_data, Platform.CLAUDE) + + assert hook_input.event == NormalizedEvent.BEFORE_TOOL + assert hook_input.tool_name == "write_file" + assert hook_input.tool_input["file_path"] == "/path/to/file.txt" + + def test_from_gemini_beforetool_event(self) -> None: + """Test normalizing Gemini BeforeTool event.""" + raw_data = { + "session_id": "sess456", + "hook_event_name": "BeforeTool", + "tool_name": "write_file", + "tool_input": { + "file_path": "/path/to/file.txt", + "content": "hello world", + }, + } + + hook_input = HookInput.from_dict(raw_data, Platform.GEMINI) + + assert hook_input.event == NormalizedEvent.BEFORE_TOOL + assert hook_input.tool_name == "write_file" + + def test_tool_name_normalization_claude(self) -> None: + """Test Claude tool names are normalized to snake_case.""" + test_cases = [ + ("Write", "write_file"), + ("Read", "read_file"), + ("Edit", "edit_file"), + ("Bash", "shell"), + ("Glob", "glob"), + ("Grep", "grep"), + ] + + for claude_name, expected in test_cases: + raw_data = { + "hook_event_name": "PreToolUse", + "tool_name": claude_name, + } + hook_input = HookInput.from_dict(raw_data, Platform.CLAUDE) + assert hook_input.tool_name == expected, f"Expected {expected} for {claude_name}" + + def test_event_normalization_claude(self) -> None: + """Test all Claude events are normalized correctly.""" + test_cases = [ + ("Stop", NormalizedEvent.AFTER_AGENT), + ("SubagentStop", NormalizedEvent.AFTER_AGENT), + ("PreToolUse", NormalizedEvent.BEFORE_TOOL), + ("PostToolUse", NormalizedEvent.AFTER_TOOL), + ("UserPromptSubmit", NormalizedEvent.BEFORE_PROMPT), + ("SessionStart", NormalizedEvent.SESSION_START), + ("SessionEnd", NormalizedEvent.SESSION_END), + ] + + for claude_event, expected in test_cases: + raw_data = {"hook_event_name": claude_event} + hook_input = HookInput.from_dict(raw_data, Platform.CLAUDE) + assert hook_input.event == expected, f"Expected {expected} for {claude_event}" + + def test_event_normalization_gemini(self) -> None: + """Test all Gemini events are normalized correctly.""" + test_cases = [ + ("AfterAgent", NormalizedEvent.AFTER_AGENT), + ("BeforeTool", NormalizedEvent.BEFORE_TOOL), + ("AfterTool", NormalizedEvent.AFTER_TOOL), + ("BeforeAgent", NormalizedEvent.BEFORE_PROMPT), + ("SessionStart", NormalizedEvent.SESSION_START), + ("SessionEnd", NormalizedEvent.SESSION_END), + ("BeforeModel", NormalizedEvent.BEFORE_MODEL), + ("AfterModel", NormalizedEvent.AFTER_MODEL), + ] + + for gemini_event, expected in test_cases: + raw_data = {"hook_event_name": gemini_event} + hook_input = HookInput.from_dict(raw_data, Platform.GEMINI) + assert hook_input.event == expected, f"Expected {expected} for {gemini_event}" + + def test_empty_input(self) -> None: + """Test handling of empty input.""" + hook_input = HookInput.from_dict({}, Platform.CLAUDE) + + assert hook_input.session_id == "" + assert hook_input.transcript_path == "" + assert hook_input.cwd == "" + assert hook_input.tool_name == "" + + +class TestHookOutput: + """Tests for HookOutput denormalization.""" + + def test_empty_output_produces_empty_json(self) -> None: + """Test that empty HookOutput produces empty dict.""" + output = HookOutput() + result = output.to_dict(Platform.CLAUDE, NormalizedEvent.AFTER_AGENT) + + assert result == {} + + def test_block_decision_claude(self) -> None: + """Test blocking output for Claude.""" + output = HookOutput(decision="block", reason="Must complete X first") + result = output.to_dict(Platform.CLAUDE, NormalizedEvent.AFTER_AGENT) + + assert result["decision"] == "block" + assert result["reason"] == "Must complete X first" + + def test_block_decision_gemini_converts_to_deny(self) -> None: + """Test that 'block' is converted to 'deny' for Gemini.""" + output = HookOutput(decision="block", reason="Must complete X first") + result = output.to_dict(Platform.GEMINI, NormalizedEvent.AFTER_AGENT) + + assert result["decision"] == "deny" + assert result["reason"] == "Must complete X first" + + def test_deny_decision_stays_deny(self) -> None: + """Test that 'deny' stays as 'deny' on both platforms.""" + output = HookOutput(decision="deny", reason="Not allowed") + + claude_result = output.to_dict(Platform.CLAUDE, NormalizedEvent.BEFORE_TOOL) + assert claude_result["decision"] == "deny" + + gemini_result = output.to_dict(Platform.GEMINI, NormalizedEvent.BEFORE_TOOL) + assert gemini_result["decision"] == "deny" + + def test_allow_decision(self) -> None: + """Test allow decision.""" + output = HookOutput(decision="allow") + result = output.to_dict(Platform.CLAUDE, NormalizedEvent.BEFORE_TOOL) + + assert result["decision"] == "allow" + + def test_continue_false(self) -> None: + """Test continue=false output.""" + output = HookOutput(continue_loop=False, stop_reason="Critical error") + result = output.to_dict(Platform.GEMINI, NormalizedEvent.AFTER_AGENT) + + assert result["continue"] is False + assert result["stopReason"] == "Critical error" + + def test_suppress_output(self) -> None: + """Test suppressOutput flag.""" + output = HookOutput(suppress_output=True) + result = output.to_dict(Platform.CLAUDE, NormalizedEvent.BEFORE_TOOL) + + assert result["suppressOutput"] is True + + def test_context_for_claude_session_start(self) -> None: + """Test context handling for Claude SessionStart.""" + output = HookOutput(context="Additional context here") + result = output.to_dict(Platform.CLAUDE, NormalizedEvent.SESSION_START) + + assert "hookSpecificOutput" in result + assert result["hookSpecificOutput"]["hookEventName"] == "SessionStart" + assert result["hookSpecificOutput"]["additionalContext"] == "Additional context here" + + def test_context_for_claude_other_events(self) -> None: + """Test context handling for Claude non-SessionStart events.""" + output = HookOutput(context="Warning message") + result = output.to_dict(Platform.CLAUDE, NormalizedEvent.AFTER_AGENT) + + assert result["systemMessage"] == "Warning message" + + def test_context_for_gemini(self) -> None: + """Test context handling for Gemini.""" + output = HookOutput(context="Additional context") + result = output.to_dict(Platform.GEMINI, NormalizedEvent.AFTER_AGENT) + + assert "hookSpecificOutput" in result + assert result["hookSpecificOutput"]["additionalContext"] == "Additional context" + + def test_raw_output_merged(self) -> None: + """Test that raw_output is merged into result.""" + output = HookOutput( + decision="allow", + raw_output={"customField": "customValue"}, + ) + result = output.to_dict(Platform.CLAUDE, NormalizedEvent.BEFORE_TOOL) + + assert result["decision"] == "allow" + assert result["customField"] == "customValue" + + +class TestNormalizeInput: + """Tests for the normalize_input function.""" + + def test_valid_json(self) -> None: + """Test normalizing valid JSON input.""" + raw_json = '{"hook_event_name": "Stop", "session_id": "123"}' + hook_input = normalize_input(raw_json, Platform.CLAUDE) + + assert hook_input.event == NormalizedEvent.AFTER_AGENT + assert hook_input.session_id == "123" + + def test_empty_json(self) -> None: + """Test normalizing empty JSON input.""" + hook_input = normalize_input("{}", Platform.CLAUDE) + + assert hook_input.session_id == "" + assert hook_input.event == NormalizedEvent.AFTER_AGENT # Default + + def test_empty_string(self) -> None: + """Test normalizing empty string input.""" + hook_input = normalize_input("", Platform.CLAUDE) + + assert hook_input.session_id == "" + + def test_whitespace_only(self) -> None: + """Test normalizing whitespace-only input.""" + hook_input = normalize_input(" \n ", Platform.CLAUDE) + + assert hook_input.session_id == "" + + def test_invalid_json(self) -> None: + """Test normalizing invalid JSON input.""" + hook_input = normalize_input("not valid json", Platform.CLAUDE) + + assert hook_input.session_id == "" + + +class TestDenormalizeOutput: + """Tests for the denormalize_output function.""" + + def test_produces_valid_json(self) -> None: + """Test that output is valid JSON.""" + output = HookOutput(decision="block", reason="test") + json_str = denormalize_output(output, Platform.CLAUDE, NormalizedEvent.AFTER_AGENT) + + # Should not raise + parsed = json.loads(json_str) + assert parsed["decision"] == "block" + + def test_empty_output_produces_empty_object(self) -> None: + """Test that empty output produces '{}'.""" + output = HookOutput() + json_str = denormalize_output(output, Platform.CLAUDE, NormalizedEvent.AFTER_AGENT) + + assert json_str == "{}" + + +class TestEventMappings: + """Tests for event name mappings.""" + + def test_all_claude_events_have_normalized_mapping(self) -> None: + """Test that all Claude events have normalized mappings.""" + claude_events = [ + "Stop", + "SubagentStop", + "PreToolUse", + "PostToolUse", + "UserPromptSubmit", + "SessionStart", + "SessionEnd", + ] + + for event in claude_events: + assert event in EVENT_TO_NORMALIZED[Platform.CLAUDE], f"Missing mapping for {event}" + + def test_all_gemini_events_have_normalized_mapping(self) -> None: + """Test that all Gemini events have normalized mappings.""" + gemini_events = [ + "AfterAgent", + "BeforeTool", + "AfterTool", + "BeforeAgent", + "SessionStart", + "SessionEnd", + "BeforeModel", + "AfterModel", + ] + + for event in gemini_events: + assert event in EVENT_TO_NORMALIZED[Platform.GEMINI], f"Missing mapping for {event}" + + def test_normalized_to_event_roundtrip_claude(self) -> None: + """Test that Claude events can be normalized and denormalized.""" + for _platform_event, normalized in EVENT_TO_NORMALIZED[Platform.CLAUDE].items(): + if normalized in NORMALIZED_TO_EVENT[Platform.CLAUDE]: + # SubagentStop maps to AFTER_AGENT but denormalizes to Stop + denormalized = NORMALIZED_TO_EVENT[Platform.CLAUDE][normalized] + # Just verify we get a valid event name back + assert denormalized in EVENT_TO_NORMALIZED[Platform.CLAUDE] + + +class TestToolMappings: + """Tests for tool name mappings.""" + + def test_claude_tools_normalize_to_snake_case(self) -> None: + """Test Claude tool names normalize to snake_case.""" + for _claude_tool, normalized in TOOL_TO_NORMALIZED[Platform.CLAUDE].items(): + assert "_" in normalized or normalized.islower(), f"{normalized} should be snake_case" + + def test_gemini_tools_are_already_snake_case(self) -> None: + """Test Gemini tool names are already snake_case.""" + for gemini_tool, normalized in TOOL_TO_NORMALIZED[Platform.GEMINI].items(): + assert gemini_tool == normalized, "Gemini tools should be identity mapping" + + def test_common_tools_map_to_same_normalized_name(self) -> None: + """Test that common tools map to the same normalized name across platforms.""" + common_tools = ["write_file", "read_file", "shell", "glob", "grep"] + + for tool in common_tools: + # Find Claude tool that maps to this normalized name + claude_tool = None + for k, v in TOOL_TO_NORMALIZED[Platform.CLAUDE].items(): + if v == tool: + claude_tool = k + break + + gemini_tool = None + for k, v in TOOL_TO_NORMALIZED[Platform.GEMINI].items(): + if v == tool: + gemini_tool = k + break + + if claude_tool and gemini_tool: + # Both platforms should normalize to the same name + assert TOOL_TO_NORMALIZED[Platform.CLAUDE][claude_tool] == tool + assert TOOL_TO_NORMALIZED[Platform.GEMINI][gemini_tool] == tool + + +class TestIntegration: + """Integration tests for the full normalization flow.""" + + def test_claude_stop_hook_flow(self) -> None: + """Test complete flow for Claude Stop hook.""" + # Input from Claude + raw_input = json.dumps( + { + "session_id": "sess123", + "transcript_path": "/path/transcript.jsonl", + "cwd": "/project", + "hook_event_name": "Stop", + } + ) + + # Normalize + hook_input = normalize_input(raw_input, Platform.CLAUDE) + assert hook_input.event == NormalizedEvent.AFTER_AGENT + + # Process (would call hook function here) + hook_output = HookOutput(decision="block", reason="Policy X requires attention") + + # Denormalize + output_json = denormalize_output(hook_output, Platform.CLAUDE, hook_input.event) + result = json.loads(output_json) + + assert result["decision"] == "block" + assert "Policy X" in result["reason"] + + def test_gemini_afteragent_hook_flow(self) -> None: + """Test complete flow for Gemini AfterAgent hook.""" + # Input from Gemini + raw_input = json.dumps( + { + "session_id": "sess456", + "transcript_path": "/path/transcript.json", + "cwd": "/project", + "hook_event_name": "AfterAgent", + "timestamp": "2026-01-15T10:00:00Z", + } + ) + + # Normalize + hook_input = normalize_input(raw_input, Platform.GEMINI) + assert hook_input.event == NormalizedEvent.AFTER_AGENT + + # Process (would call hook function here) + hook_output = HookOutput(decision="block", reason="Policy Y requires attention") + + # Denormalize + output_json = denormalize_output(hook_output, Platform.GEMINI, hook_input.event) + result = json.loads(output_json) + + # Gemini should get "deny" instead of "block" + assert result["decision"] == "deny" + assert "Policy Y" in result["reason"] + + def test_cross_platform_same_hook_logic(self) -> None: + """Test that the same hook logic produces correct output for both platforms.""" + + def sample_hook(hook_input: HookInput) -> HookOutput: + """Sample hook that blocks if event is after_agent.""" + if hook_input.event == NormalizedEvent.AFTER_AGENT: + return HookOutput(decision="block", reason="Must review first") + return HookOutput() + + # Test with Claude + claude_input = normalize_input( + '{"hook_event_name": "Stop"}', + Platform.CLAUDE, + ) + claude_output = sample_hook(claude_input) + claude_json = denormalize_output(claude_output, Platform.CLAUDE, claude_input.event) + claude_result = json.loads(claude_json) + + # Test with Gemini + gemini_input = normalize_input( + '{"hook_event_name": "AfterAgent"}', + Platform.GEMINI, + ) + gemini_output = sample_hook(gemini_input) + gemini_json = denormalize_output(gemini_output, Platform.GEMINI, gemini_input.event) + gemini_result = json.loads(gemini_json) + + # Both should block, but with platform-appropriate decision value + assert claude_result["decision"] == "block" + assert gemini_result["decision"] == "deny" + assert claude_result["reason"] == gemini_result["reason"] diff --git a/uv.lock b/uv.lock index 744b19b..c4091ca 100644 --- a/uv.lock +++ b/uv.lock @@ -126,7 +126,7 @@ toml = [ [[package]] name = "deepwork" -version = "0.2.0" +version = "0.3.0" source = { editable = "." } dependencies = [ { name = "click" },