diff --git a/.claude/commands/deepwork_policy.define.md b/.claude/commands/deepwork_policy.define.md index b671de1..3b6fb96 100644 --- a/.claude/commands/deepwork_policy.define.md +++ b/.claude/commands/deepwork_policy.define.md @@ -88,6 +88,22 @@ If there are files that, when also changed, mean the policy shouldn't fire: - Trigger: `src/auth/**/*` - Safety: `SECURITY.md`, `docs/security_review.md` +### Step 3b: Choose the Comparison Mode (Optional) + +The `compare_to` field controls what baseline is used when detecting "changed files": + +**Options:** +- `base` (default) - Compares to the base of the current branch (merge-base with main/master). This is the most common choice for feature branches, as it shows all changes made on the branch. +- `default_tip` - Compares to the current tip of the default branch (main/master). Useful when you want to see the difference from what's currently in production. +- `prompt` - Compares to the state at the start of each prompt. Useful for policies that should only fire based on changes made during a single agent response. + +**When to use each:** +- **base**: Best for most policies. "Did this branch change config files?" → trigger docs review +- **default_tip**: For policies about what's different from production/main +- **prompt**: For policies that should only consider very recent changes within the current session + +Most policies should use the default (`base`) and don't need to specify `compare_to`. + ### Step 4: Write the Instructions Create clear, actionable instructions for what the agent should do when the policy fires. @@ -118,6 +134,7 @@ Create or update `.deepwork.policy.yml` in the project root. - name: "[Friendly name for the policy]" trigger: "[glob pattern]" # or array: ["pattern1", "pattern2"] safety: "[glob pattern]" # optional, or array + compare_to: "base" # optional: "base" (default), "default_tip", or "prompt" instructions: | [Multi-line instructions for the agent...] ``` @@ -127,6 +144,7 @@ Create or update `.deepwork.policy.yml` in the project root. - name: "[Friendly name for the policy]" trigger: "[glob pattern]" safety: "[glob pattern]" + compare_to: "base" # optional instructions_file: "path/to/instructions.md" ``` @@ -198,7 +216,10 @@ Create or update this file at the project root with the new policy entry. ## Context Policies are evaluated automatically when you finish working on a task. The system: -1. Tracks which files you changed during the session +1. Determines which files have changed based on each policy's `compare_to` setting: + - `base` (default): Files changed since the branch diverged from main/master + - `default_tip`: Files different from the current main/master branch + - `prompt`: Files changed since the last prompt submission 2. Checks if any changes match policy trigger patterns 3. Skips policies where safety patterns also matched 4. Prompts you with instructions for any triggered policies diff --git a/.deepwork/jobs/deepwork_policy/hooks/capture_work_tree.sh b/.deepwork/jobs/deepwork_policy/hooks/capture_prompt_work_tree.sh similarity index 78% rename from .deepwork/jobs/deepwork_policy/hooks/capture_work_tree.sh rename to .deepwork/jobs/deepwork_policy/hooks/capture_prompt_work_tree.sh index 04d9a97..a3323e4 100755 --- a/.deepwork/jobs/deepwork_policy/hooks/capture_work_tree.sh +++ b/.deepwork/jobs/deepwork_policy/hooks/capture_prompt_work_tree.sh @@ -1,9 +1,10 @@ #!/bin/bash -# capture_work_tree.sh - Captures the current git work tree state +# capture_prompt_work_tree.sh - Captures the git work tree state at prompt submission # # This script creates a snapshot of the current git state by recording # all files that have been modified, added, or deleted. This baseline -# is used later to detect what changed during an agent session. +# is used for policies with compare_to: prompt to detect what changed +# during an agent response (between user prompts). set -e diff --git a/.deepwork/jobs/deepwork_policy/hooks/get_changed_files.sh b/.deepwork/jobs/deepwork_policy/hooks/get_changed_files.sh deleted file mode 100755 index 03f70d9..0000000 --- a/.deepwork/jobs/deepwork_policy/hooks/get_changed_files.sh +++ /dev/null @@ -1,30 +0,0 @@ -#!/bin/bash -# get_changed_files.sh - Gets files that changed since the last work tree capture -# -# This script compares the current git state against the baseline captured -# at the start of the session to determine what files were modified. - -set -e - -# Stage all current changes -git add -A 2>/dev/null || true - -# Get current state -current_files=$(git diff --name-only HEAD 2>/dev/null || echo "") -untracked=$(git ls-files --others --exclude-standard 2>/dev/null || echo "") - -# Combine and deduplicate current files -all_current=$(echo -e "${current_files}\n${untracked}" | sort -u | grep -v '^$' || true) - -if [ -f .deepwork/.last_work_tree ]; then - # Compare with baseline - files that are new or different - # Get files in current that weren't in baseline - last_files=$(cat .deepwork/.last_work_tree 2>/dev/null || echo "") - - # Output files that are in current state - # This includes both newly changed files and files that were already changed - echo "${all_current}" -else - # No baseline exists - return all currently changed files - echo "${all_current}" -fi diff --git a/.deepwork/jobs/deepwork_policy/hooks/policy_stop_hook.sh b/.deepwork/jobs/deepwork_policy/hooks/policy_stop_hook.sh index 6d598a3..b12d456 100755 --- a/.deepwork/jobs/deepwork_policy/hooks/policy_stop_hook.sh +++ b/.deepwork/jobs/deepwork_policy/hooks/policy_stop_hook.sh @@ -2,16 +2,13 @@ # policy_stop_hook.sh - Evaluates policies when the agent stops # # This script is called as a Claude Code Stop hook. It: -# 1. Gets the list of files changed during the session -# 2. Evaluates policies from .deepwork.policy.yml +# 1. Evaluates policies from .deepwork.policy.yml +# 2. Computes changed files based on each policy's compare_to setting # 3. Checks for tags in the conversation transcript # 4. Returns JSON to block stop if policies need attention -# 5. Resets the work tree baseline for the next iteration set -e -SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" - # Check if policy file exists if [ ! -f .deepwork.policy.yml ]; then # No policies defined, nothing to do @@ -31,16 +28,6 @@ if [ -n "${HOOK_INPUT}" ]; then TRANSCRIPT_PATH=$(echo "${HOOK_INPUT}" | jq -r '.transcript_path // empty' 2>/dev/null || echo "") fi -# Get changed files -changed_files=$("${SCRIPT_DIR}/get_changed_files.sh" 2>/dev/null || echo "") - -# If no files changed, nothing to evaluate -if [ -z "${changed_files}" ]; then - # Reset baseline for next iteration - "${SCRIPT_DIR}/capture_work_tree.sh" 2>/dev/null || true - exit 0 -fi - # Extract conversation text from the JSONL transcript # The transcript is JSONL format - each line is a JSON object # We need to extract the text content from assistant messages @@ -57,16 +44,13 @@ fi # Call the Python evaluator # The Python module handles: # - Parsing the policy file +# - Computing changed files based on each policy's compare_to setting # - Matching changed files against triggers/safety patterns # - Checking for promise tags in the conversation context # - Generating appropriate JSON output result=$(echo "${conversation_context}" | python -m deepwork.hooks.evaluate_policies \ --policy-file .deepwork.policy.yml \ - --changed-files "${changed_files}" \ 2>/dev/null || echo '{}') -# Reset the work tree baseline for the next iteration -"${SCRIPT_DIR}/capture_work_tree.sh" 2>/dev/null || true - # Output the result (JSON for Claude Code hooks) echo "${result}" diff --git a/.deepwork/jobs/deepwork_policy/hooks/user_prompt_submit.sh b/.deepwork/jobs/deepwork_policy/hooks/user_prompt_submit.sh index 970be76..486ad83 100755 --- a/.deepwork/jobs/deepwork_policy/hooks/user_prompt_submit.sh +++ b/.deepwork/jobs/deepwork_policy/hooks/user_prompt_submit.sh @@ -1,17 +1,16 @@ #!/bin/bash # user_prompt_submit.sh - Runs on every user prompt submission # -# This script captures the work tree baseline if it doesn't exist yet. -# This ensures we have a baseline to compare against when evaluating policies. +# This script captures the work tree state at each prompt submission. +# This baseline is used for policies with compare_to: prompt to detect +# what changed during an agent response. set -e SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" -# Only capture if no baseline exists yet (first prompt of session) -if [ ! -f .deepwork/.last_work_tree ]; then - "${SCRIPT_DIR}/capture_work_tree.sh" -fi +# Capture work tree state at each prompt for compare_to: prompt policies +"${SCRIPT_DIR}/capture_prompt_work_tree.sh" # Exit successfully - don't block the prompt exit 0 diff --git a/.deepwork/jobs/deepwork_policy/steps/define.md b/.deepwork/jobs/deepwork_policy/steps/define.md index 0a47e55..85c2b63 100644 --- a/.deepwork/jobs/deepwork_policy/steps/define.md +++ b/.deepwork/jobs/deepwork_policy/steps/define.md @@ -56,6 +56,22 @@ If there are files that, when also changed, mean the policy shouldn't fire: - Trigger: `src/auth/**/*` - Safety: `SECURITY.md`, `docs/security_review.md` +### Step 3b: Choose the Comparison Mode (Optional) + +The `compare_to` field controls what baseline is used when detecting "changed files": + +**Options:** +- `base` (default) - Compares to the base of the current branch (merge-base with main/master). This is the most common choice for feature branches, as it shows all changes made on the branch. +- `default_tip` - Compares to the current tip of the default branch (main/master). Useful when you want to see the difference from what's currently in production. +- `prompt` - Compares to the state at the start of each prompt. Useful for policies that should only fire based on changes made during a single agent response. + +**When to use each:** +- **base**: Best for most policies. "Did this branch change config files?" → trigger docs review +- **default_tip**: For policies about what's different from production/main +- **prompt**: For policies that should only consider very recent changes within the current session + +Most policies should use the default (`base`) and don't need to specify `compare_to`. + ### Step 4: Write the Instructions Create clear, actionable instructions for what the agent should do when the policy fires. @@ -86,6 +102,7 @@ Create or update `.deepwork.policy.yml` in the project root. - name: "[Friendly name for the policy]" trigger: "[glob pattern]" # or array: ["pattern1", "pattern2"] safety: "[glob pattern]" # optional, or array + compare_to: "base" # optional: "base" (default), "default_tip", or "prompt" instructions: | [Multi-line instructions for the agent...] ``` @@ -95,6 +112,7 @@ Create or update `.deepwork.policy.yml` in the project root. - name: "[Friendly name for the policy]" trigger: "[glob pattern]" safety: "[glob pattern]" + compare_to: "base" # optional instructions_file: "path/to/instructions.md" ``` @@ -166,7 +184,10 @@ Create or update this file at the project root with the new policy entry. ## Context Policies are evaluated automatically when you finish working on a task. The system: -1. Tracks which files you changed during the session +1. Determines which files have changed based on each policy's `compare_to` setting: + - `base` (default): Files changed since the branch diverged from main/master + - `default_tip`: Files different from the current main/master branch + - `prompt`: Files changed since the last prompt submission 2. Checks if any changes match policy trigger patterns 3. Skips policies where safety patterns also matched 4. Prompts you with instructions for any triggered policies diff --git a/.gemini/commands/deepwork_jobs/learn.toml b/.gemini/commands/deepwork_jobs/learn.toml index 5330428..f901e96 100644 --- a/.gemini/commands/deepwork_jobs/learn.toml +++ b/.gemini/commands/deepwork_jobs/learn.toml @@ -174,7 +174,42 @@ The AGENTS.md file captures project-specific knowledge that helps future agent r - This keeps AGENTS.md in sync as the codebase evolves - Pattern: "See `path/to/file.ext` for [description]" -3. **AGENTS.md structure**: See `.deepwork/jobs/deepwork_jobs/templates/agents.md.template` for the standard format. +3. **AGENTS.md structure**: + +```markdown +# Project Context for [Job Name] + +## Codebase Structure + + +- Project structure: See `README.md` for overview +- API documentation: See `docs/api.md` +- Configuration: See `config/README.md` + +## Conventions + +### Naming Conventions +- [Convention]: See example in `path/to/example.ext:LINE` + +### File Organization +- [Pattern]: Reference `path/to/pattern/` + +## Job-Specific Context + +### [Job Name] + +#### [Step Name] +- [Learning]: Reference `relevant/file.ext` +- [Context]: [Brief explanation with file reference] + +## Known Issues and Workarounds + +- [Issue]: [Workaround with file reference if applicable] + +## Last Updated +- Date: [YYYY-MM-DD] +- From conversation about: [Brief description] +``` 4. **Writing entries** - Be concise but specific @@ -196,14 +231,36 @@ If instruction files were modified: changes: "Improved [step] instructions based on execution learnings: [brief description]" ``` -### Step 7: Sync and Relay Instructions +### Step 7: Sync and Summarize 1. **Run deepwork sync** (if instructions were modified) ```bash deepwork sync ``` -2. **If commands were regenerated**, look at the "To use the new commands" section in the `deepwork sync` output and **relay these exact reload instructions to the user** (e.g., "Type 'exit' then run 'claude --resume'" for Claude Code) +2. **Create learning_summary.md** in the working folder: + ```markdown + # Learning Summary + + ## Job Analyzed + - Job: [job_name] + - Steps executed: [list of steps] + + ## Generalizable Improvements Made + - [Step]: [What was improved] + + ## Bespoke Learnings Captured + - Location: [path to AGENTS.md] + - Entries added: [list of entries] + + ## Files Modified + - [List of files changed] + + ## Recommendations + - [Any additional suggestions] + ``` + +3. **If commands were regenerated**, look at the "To use the new commands" section in the `deepwork sync` output and **relay these exact reload instructions to the user** (e.g., "Type 'exit' then run 'claude --resume'" for Claude Code) ## File Reference Patterns @@ -234,6 +291,7 @@ When adding entries to AGENTS.md, prefer these patterns: - AGENTS.md created/updated with bespoke learnings - File references used instead of duplicating content - AGENTS.md is in the correct working folder +- learning_summary.md documents all changes - When all criteria are met, include `✓ Quality Criteria Met` ## Example Dialog @@ -293,7 +351,7 @@ I found the following job executions: **Summary** -Updated job instructions and created AGENTS.md with bespoke learnings. To get the updated commands, type 'exit' then run 'claude --resume'. +Created `learning_summary.md` documenting all changes. To get the updated commands, type 'exit' then run 'claude --resume'. ``` ## Handling Edge Cases @@ -345,7 +403,7 @@ All work for this job should be done on a dedicated work branch: ## Output Requirements Create the following output(s): -- `AGENTS.md` +- `learning_summary.md` Ensure all outputs are: - Well-formatted and complete @@ -359,7 +417,7 @@ After completing this step: 2. **Inform the user**: - The learn command is complete - - Outputs created: AGENTS.md + - Outputs created: learning_summary.md - This command can be run again anytime to make further changes ## Command Complete diff --git a/.gemini/commands/deepwork_policy/define.toml b/.gemini/commands/deepwork_policy/define.toml index 821e928..7deb844 100644 --- a/.gemini/commands/deepwork_policy/define.toml +++ b/.gemini/commands/deepwork_policy/define.toml @@ -93,6 +93,22 @@ If there are files that, when also changed, mean the policy shouldn't fire: - Trigger: `src/auth/**/*` - Safety: `SECURITY.md`, `docs/security_review.md` +### Step 3b: Choose the Comparison Mode (Optional) + +The `compare_to` field controls what baseline is used when detecting "changed files": + +**Options:** +- `base` (default) - Compares to the base of the current branch (merge-base with main/master). This is the most common choice for feature branches, as it shows all changes made on the branch. +- `default_tip` - Compares to the current tip of the default branch (main/master). Useful when you want to see the difference from what's currently in production. +- `prompt` - Compares to the state at the start of each prompt. Useful for policies that should only fire based on changes made during a single agent response. + +**When to use each:** +- **base**: Best for most policies. "Did this branch change config files?" → trigger docs review +- **default_tip**: For policies about what's different from production/main +- **prompt**: For policies that should only consider very recent changes within the current session + +Most policies should use the default (`base`) and don't need to specify `compare_to`. + ### Step 4: Write the Instructions Create clear, actionable instructions for what the agent should do when the policy fires. @@ -123,6 +139,7 @@ Create or update `.deepwork.policy.yml` in the project root. - name: "[Friendly name for the policy]" trigger: "[glob pattern]" # or array: ["pattern1", "pattern2"] safety: "[glob pattern]" # optional, or array + compare_to: "base" # optional: "base" (default), "default_tip", or "prompt" instructions: | [Multi-line instructions for the agent...] ``` @@ -132,6 +149,7 @@ Create or update `.deepwork.policy.yml` in the project root. - name: "[Friendly name for the policy]" trigger: "[glob pattern]" safety: "[glob pattern]" + compare_to: "base" # optional instructions_file: "path/to/instructions.md" ``` @@ -203,7 +221,10 @@ Create or update this file at the project root with the new policy entry. ## Context Policies are evaluated automatically when you finish working on a task. The system: -1. Tracks which files you changed during the session +1. Determines which files have changed based on each policy's `compare_to` setting: + - `base` (default): Files changed since the branch diverged from main/master + - `default_tip`: Files different from the current main/master branch + - `prompt`: Files changed since the last prompt submission 2. Checks if any changes match policy trigger patterns 3. Skips policies where safety patterns also matched 4. Prompts you with instructions for any triggered policies diff --git a/CHANGELOG.md b/CHANGELOG.md index b68f0d4..15b421c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Updated README with PyPI install instructions using pipx, uv, and pip (#22) - Updated deepwork_jobs job version to 0.2.0 +### Fixed +- Stop hooks now correctly return blocking JSON when policies fire +- Added shell script tests to verify stop hook blocking behavior + ### Removed - `refine` step (replaced by `learn` command) (#27) - `get_changed_files.sh` hook (logic moved to Python policy evaluator) (#34) diff --git a/claude.md b/claude.md index 75077a1..34a4c01 100644 --- a/claude.md +++ b/claude.md @@ -73,6 +73,21 @@ uv sync # Install dependencies uv run pytest # Run tests ``` +## Running DeepWork CLI (Claude Code Web Environment) + +When running in Claude Code on the web (not local installations), the `deepwork` CLI may not be available. To run DeepWork commands: + +```bash +# Install the package in editable mode (one-time setup) +pip install -e . + +# Then run commands normally +deepwork install --platform claude +deepwork sync +``` + +**Note**: In web environments, you may also need to install dependencies like `jsonschema`, `pyyaml`, `gitpython`, `jinja2`, and `click` if they're not already available. + ## How DeepWork Works ### 1. Installation diff --git a/doc/architecture.md b/doc/architecture.md index cd30f35..30339c1 100644 --- a/doc/architecture.md +++ b/doc/architecture.md @@ -67,8 +67,7 @@ deepwork/ # DeepWork tool repository │ │ └── hooks/ # Hook scripts │ │ ├── global_hooks.yml │ │ ├── user_prompt_submit.sh -│ │ ├── capture_work_tree.sh -│ │ ├── get_changed_files.sh +│ │ ├── capture_prompt_work_tree.sh │ │ └── policy_stop_hook.sh │ ├── schemas/ # Definition schemas │ │ ├── job_schema.py @@ -291,8 +290,7 @@ my-project/ # User's project (target) │ │ └── hooks/ # Hook scripts (installed from standard_jobs) │ │ ├── global_hooks.yml │ │ ├── user_prompt_submit.sh -│ │ ├── capture_work_tree.sh -│ │ ├── get_changed_files.sh +│ │ ├── capture_prompt_work_tree.sh │ │ └── policy_stop_hook.sh │ ├── competitive_research/ │ │ ├── job.yml # Job metadata @@ -1041,11 +1039,10 @@ Policies are implemented using Claude Code's hooks system. The `deepwork_policy` ``` .deepwork/jobs/deepwork_policy/hooks/ -├── global_hooks.yml # Maps lifecycle events to scripts -├── user_prompt_submit.sh # Captures baseline on first prompt -├── capture_work_tree.sh # Creates git state snapshot -├── get_changed_files.sh # Computes changed files -└── policy_stop_hook.sh # Evaluates policies on stop +├── global_hooks.yml # Maps lifecycle events to scripts +├── user_prompt_submit.sh # Captures baseline at each prompt +├── capture_prompt_work_tree.sh # Creates git state snapshot for compare_to: prompt +└── policy_stop_hook.sh # Evaluates policies on stop (calls Python evaluator) ``` The hooks are installed to `.claude/settings.json` during `deepwork sync`: diff --git a/tests/shell_script_tests/__init__.py b/tests/shell_script_tests/__init__.py new file mode 100644 index 0000000..1b0c335 --- /dev/null +++ b/tests/shell_script_tests/__init__.py @@ -0,0 +1 @@ +"""Shell script tests for DeepWork hooks.""" diff --git a/tests/shell_script_tests/test_policy_stop_hook.py b/tests/shell_script_tests/test_policy_stop_hook.py new file mode 100644 index 0000000..1134b26 --- /dev/null +++ b/tests/shell_script_tests/test_policy_stop_hook.py @@ -0,0 +1,344 @@ +"""Tests for policy_stop_hook.sh shell script. + +These tests verify that the policy stop hook correctly outputs JSON +to block or allow the stop event in Claude Code. +""" + +import json +import os +import subprocess +import tempfile +from pathlib import Path + +import pytest +from git import Repo + + +@pytest.fixture +def shell_scripts_dir() -> Path: + """Return the path to the source shell scripts directory.""" + return ( + Path(__file__).parent.parent.parent + / "src" + / "deepwork" + / "standard_jobs" + / "deepwork_policy" + / "hooks" + ) + + +@pytest.fixture +def git_repo_with_policy(tmp_path: Path) -> Path: + """Create a git repo with a policy file and trigger a policy.""" + # Initialize git repo + repo = Repo.init(tmp_path) + + # Create initial commit + readme = tmp_path / "README.md" + readme.write_text("# Test Project\n") + repo.index.add(["README.md"]) + repo.index.commit("Initial commit") + + # Create a policy file that triggers on src/** changes + # Use compare_to: prompt since test repos don't have origin remote + policy_file = tmp_path / ".deepwork.policy.yml" + policy_file.write_text( + """- name: "Test Policy" + trigger: "src/**/*" + compare_to: prompt + instructions: | + This is a test policy that fires when src/ files change. + Please address this policy. +""" + ) + + # Create .deepwork directory with empty baseline + # (so new files are detected as "changed since prompt") + deepwork_dir = tmp_path / ".deepwork" + deepwork_dir.mkdir(exist_ok=True) + # Empty baseline means all current files are "new" + (deepwork_dir / ".last_work_tree").write_text("") + + return tmp_path + + +@pytest.fixture +def git_repo_no_policy(tmp_path: Path) -> Path: + """Create a git repo without a policy file.""" + repo = Repo.init(tmp_path) + + readme = tmp_path / "README.md" + readme.write_text("# Test Project\n") + repo.index.add(["README.md"]) + repo.index.commit("Initial commit") + + return tmp_path + + +def run_stop_hook( + script_path: Path, + cwd: Path, + hook_input: dict | None = None, +) -> tuple[str, str, int]: + """ + Run the policy_stop_hook.sh script and return its output. + + Args: + script_path: Path to the policy_stop_hook.sh script + cwd: Working directory to run the script in + hook_input: Optional JSON input to pass via stdin + + Returns: + Tuple of (stdout, stderr, return_code) + """ + env = os.environ.copy() + # Ensure Python can find the deepwork module + env["PYTHONPATH"] = str(Path(__file__).parent.parent.parent / "src") + + stdin_data = json.dumps(hook_input) if hook_input else "" + + result = subprocess.run( + ["bash", str(script_path)], + cwd=cwd, + capture_output=True, + text=True, + input=stdin_data, + env=env, + ) + + return result.stdout, result.stderr, result.returncode + + +class TestPolicyStopHookBlocking: + """Tests for policy_stop_hook.sh blocking behavior.""" + + def test_outputs_block_json_when_policy_fires( + self, shell_scripts_dir: Path, git_repo_with_policy: Path + ) -> None: + """Test that the hook outputs blocking JSON when a policy fires.""" + # Create a file that triggers the policy + src_dir = git_repo_with_policy / "src" + src_dir.mkdir(exist_ok=True) + (src_dir / "main.py").write_text("# New file\n") + + # Stage the change + repo = Repo(git_repo_with_policy) + repo.index.add(["src/main.py"]) + + # Run the stop hook + script_path = shell_scripts_dir / "policy_stop_hook.sh" + stdout, stderr, code = run_stop_hook(script_path, git_repo_with_policy) + + # Parse the output as JSON + output = stdout.strip() + assert output, f"Expected JSON output but got empty string. stderr: {stderr}" + + try: + result = json.loads(output) + except json.JSONDecodeError as e: + pytest.fail(f"Output is not valid JSON: {output!r}. Error: {e}") + + # Verify the JSON has the blocking structure + assert "decision" in result, f"Expected 'decision' key in JSON: {result}" + assert result["decision"] == "block", f"Expected decision='block', got: {result}" + assert "reason" in result, f"Expected 'reason' key in JSON: {result}" + assert "Test Policy" in result["reason"], f"Policy name not in reason: {result}" + + def test_outputs_empty_json_when_no_policy_fires( + self, shell_scripts_dir: Path, git_repo_with_policy: Path + ) -> None: + """Test that the hook outputs empty JSON when no policy fires.""" + # Don't create any files that would trigger the policy + # (policy triggers on src/** but we haven't created anything in src/) + + # Run the stop hook + script_path = shell_scripts_dir / "policy_stop_hook.sh" + stdout, stderr, code = run_stop_hook(script_path, git_repo_with_policy) + + # Parse the output as JSON + output = stdout.strip() + assert output, f"Expected JSON output but got empty string. stderr: {stderr}" + + try: + result = json.loads(output) + except json.JSONDecodeError as e: + pytest.fail(f"Output is not valid JSON: {output!r}. Error: {e}") + + # Should be empty JSON (no blocking) + assert result == {}, f"Expected empty JSON when no policies fire, got: {result}" + + def test_exits_early_when_no_policy_file( + self, shell_scripts_dir: Path, git_repo_no_policy: Path + ) -> None: + """Test that the hook exits cleanly when no policy file exists.""" + script_path = shell_scripts_dir / "policy_stop_hook.sh" + stdout, stderr, code = run_stop_hook(script_path, git_repo_no_policy) + + # Should exit with code 0 and produce no output (or empty) + assert code == 0, f"Expected exit code 0, got {code}. stderr: {stderr}" + # No output is fine when there's no policy file + output = stdout.strip() + if output: + # If there is output, it should be valid JSON + try: + result = json.loads(output) + assert result == {}, f"Expected empty JSON, got: {result}" + except json.JSONDecodeError: + # Empty or no output is acceptable + pass + + def test_respects_promise_tags( + self, shell_scripts_dir: Path, git_repo_with_policy: Path + ) -> None: + """Test that promised policies are not re-triggered.""" + # Create a file that triggers the policy + src_dir = git_repo_with_policy / "src" + src_dir.mkdir(exist_ok=True) + (src_dir / "main.py").write_text("# New file\n") + + # Stage the change + repo = Repo(git_repo_with_policy) + repo.index.add(["src/main.py"]) + + # Create a mock transcript with the promise tag + with tempfile.NamedTemporaryFile(mode="w", suffix=".jsonl", delete=False) as f: + transcript_path = f.name + # Write a mock assistant message with the promise tag + f.write( + json.dumps( + { + "role": "assistant", + "message": { + "content": [ + { + "type": "text", + "text": "I've addressed the policy. ✓ Test Policy", + } + ] + }, + } + ) + ) + f.write("\n") + + try: + # Run the stop hook with transcript path + script_path = shell_scripts_dir / "policy_stop_hook.sh" + hook_input = {"transcript_path": transcript_path} + stdout, stderr, code = run_stop_hook(script_path, git_repo_with_policy, hook_input) + + # Parse the output + output = stdout.strip() + assert output, f"Expected JSON output. stderr: {stderr}" + + result = json.loads(output) + + # Should be empty JSON because the policy was promised + assert result == {}, f"Expected empty JSON when policy is promised, got: {result}" + finally: + os.unlink(transcript_path) + + def test_safety_pattern_prevents_firing(self, shell_scripts_dir: Path, tmp_path: Path) -> None: + """Test that safety patterns prevent policies from firing.""" + # Initialize git repo + repo = Repo.init(tmp_path) + + readme = tmp_path / "README.md" + readme.write_text("# Test Project\n") + repo.index.add(["README.md"]) + repo.index.commit("Initial commit") + + # Create a policy with a safety pattern + # Use compare_to: prompt since test repos don't have origin remote + policy_file = tmp_path / ".deepwork.policy.yml" + policy_file.write_text( + """- name: "Documentation Policy" + trigger: "src/**/*" + safety: "docs/**/*" + compare_to: prompt + instructions: | + Update documentation when changing source files. +""" + ) + + # Create .deepwork directory with empty baseline + deepwork_dir = tmp_path / ".deepwork" + deepwork_dir.mkdir(exist_ok=True) + (deepwork_dir / ".last_work_tree").write_text("") + + # Create both trigger and safety files + src_dir = tmp_path / "src" + src_dir.mkdir(exist_ok=True) + (src_dir / "main.py").write_text("# Source file\n") + + docs_dir = tmp_path / "docs" + docs_dir.mkdir(exist_ok=True) + (docs_dir / "api.md").write_text("# API docs\n") + + # Stage both changes so they appear in git diff --cached + repo.index.add(["src/main.py", "docs/api.md"]) + + # Run the stop hook + script_path = shell_scripts_dir / "policy_stop_hook.sh" + stdout, stderr, code = run_stop_hook(script_path, tmp_path) + + # Parse the output + output = stdout.strip() + assert output, f"Expected JSON output. stderr: {stderr}" + + result = json.loads(output) + + # Should be empty JSON because safety pattern matched + assert result == {}, f"Expected empty JSON when safety pattern matches, got: {result}" + + +class TestPolicyStopHookJsonFormat: + """Tests for the JSON output format of policy_stop_hook.sh.""" + + def test_json_has_correct_structure( + self, shell_scripts_dir: Path, git_repo_with_policy: Path + ) -> None: + """Test that blocking JSON has the correct Claude Code structure.""" + # Create a file that triggers the policy + src_dir = git_repo_with_policy / "src" + src_dir.mkdir(exist_ok=True) + (src_dir / "main.py").write_text("# New file\n") + + repo = Repo(git_repo_with_policy) + repo.index.add(["src/main.py"]) + + script_path = shell_scripts_dir / "policy_stop_hook.sh" + stdout, stderr, code = run_stop_hook(script_path, git_repo_with_policy) + + result = json.loads(stdout.strip()) + + # Verify exact structure expected by Claude Code + assert set(result.keys()) == { + "decision", + "reason", + }, f"Unexpected keys in JSON: {result.keys()}" + assert result["decision"] == "block" + assert isinstance(result["reason"], str) + assert len(result["reason"]) > 0 + + def test_reason_contains_policy_instructions( + self, shell_scripts_dir: Path, git_repo_with_policy: Path + ) -> None: + """Test that the reason includes the policy instructions.""" + src_dir = git_repo_with_policy / "src" + src_dir.mkdir(exist_ok=True) + (src_dir / "main.py").write_text("# New file\n") + + repo = Repo(git_repo_with_policy) + repo.index.add(["src/main.py"]) + + script_path = shell_scripts_dir / "policy_stop_hook.sh" + stdout, stderr, code = run_stop_hook(script_path, git_repo_with_policy) + + result = json.loads(stdout.strip()) + + # Check that the reason contains the policy content + reason = result["reason"] + assert "DeepWork Policies Triggered" in reason + assert "Test Policy" in reason + assert "test policy that fires" in reason