Merge pull request #18 from axiomhq/sync/gilfoyle #48
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Update Eval Baselines | |
| on: | |
| push: | |
| branches: | |
| - main | |
| paths: | |
| - 'skills/**' | |
| - 'eval-tooling/**' | |
| workflow_dispatch: | |
| inputs: | |
| skill: | |
| description: 'Skill to update baseline for (leave empty for all changed)' | |
| required: false | |
| type: string | |
| force: | |
| description: 'Force update even if skill files unchanged' | |
| required: false | |
| type: boolean | |
| default: false | |
| jobs: | |
| detect-skills: | |
| runs-on: ubuntu-latest | |
| outputs: | |
| skills: ${{ steps.detect.outputs.skills }} | |
| has_skills: ${{ steps.detect.outputs.has_skills }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| fetch-depth: 2 | |
| - name: Detect skills to update | |
| id: detect | |
| run: | | |
| # If skill input is provided, use it directly | |
| if [ -n "${{ inputs.skill }}" ]; then | |
| skill="${{ inputs.skill }}" | |
| eval_file="skills/${skill}/.meta/${skill}.eval.ts" | |
| if [ -f "$eval_file" ]; then | |
| echo "skills=[\"${skill}\"]" >> $GITHUB_OUTPUT | |
| echo "has_skills=true" >> $GITHUB_OUTPUT | |
| echo "Updating baseline for: ${skill}" | |
| exit 0 | |
| else | |
| echo "No eval file found for skill: ${skill}" | |
| echo "skills=[]" >> $GITHUB_OUTPUT | |
| echo "has_skills=false" >> $GITHUB_OUTPUT | |
| exit 0 | |
| fi | |
| fi | |
| # Get changed files in this push | |
| changed_files=$(git diff --name-only HEAD~1 HEAD 2>/dev/null || git diff --name-only HEAD) | |
| # Extract unique skill names from changed paths | |
| skills=$(echo "$changed_files" | grep '^skills/' | cut -d'/' -f2 | sort -u) | |
| # Filter to only skills that have eval files | |
| skills_with_evals="" | |
| for skill in $skills; do | |
| eval_file="skills/${skill}/.meta/${skill}.eval.ts" | |
| if [ -f "$eval_file" ]; then | |
| if [ -n "$skills_with_evals" ]; then | |
| skills_with_evals="${skills_with_evals},${skill}" | |
| else | |
| skills_with_evals="${skill}" | |
| fi | |
| fi | |
| done | |
| if [ -n "$skills_with_evals" ]; then | |
| json_array=$(echo "$skills_with_evals" | tr ',' '\n' | jq -R . | jq -s -c .) | |
| echo "skills=$json_array" >> $GITHUB_OUTPUT | |
| echo "has_skills=true" >> $GITHUB_OUTPUT | |
| echo "Skills to update baselines: $skills_with_evals" | |
| else | |
| echo "skills=[]" >> $GITHUB_OUTPUT | |
| echo "has_skills=false" >> $GITHUB_OUTPUT | |
| echo "No skills with evals changed" | |
| fi | |
| update-baselines: | |
| needs: detect-skills | |
| if: needs.detect-skills.outputs.has_skills == 'true' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 1 | |
| matrix: | |
| skill: ${{ fromJson(needs.detect-skills.outputs.skills) }} | |
| steps: | |
| - uses: actions/checkout@v4 | |
| with: | |
| ref: main | |
| token: ${{ secrets.GITHUB_TOKEN }} | |
| - uses: pnpm/action-setup@v4 | |
| with: | |
| version: 9 | |
| - uses: actions/setup-node@v4 | |
| with: | |
| node-version: '20' | |
| cache: 'pnpm' | |
| cache-dependency-path: eval-tooling/pnpm-lock.yaml | |
| - name: Install dependencies | |
| working-directory: eval-tooling | |
| run: pnpm install | |
| - name: Run eval for ${{ matrix.skill }} | |
| id: run-eval | |
| working-directory: eval-tooling | |
| env: | |
| AI_GATEWAY_API_KEY: ${{ secrets.AI_GATEWAY_API_KEY }} | |
| AXIOM_TOKEN: ${{ secrets.AXIOM_TOKEN }} | |
| AXIOM_DATASET: ${{ secrets.AXIOM_DATASET }} | |
| AXIOM_URL: ${{ secrets.AXIOM_URL }} | |
| AXIOM_PLAY_URL: ${{ secrets.AXIOM_PLAY_URL }} | |
| AXIOM_PLAY_TOKEN: ${{ secrets.AXIOM_PLAY_TOKEN }} | |
| AXIOM_PLAY_ORG_ID: ${{ secrets.AXIOM_PLAY_ORG_ID }} | |
| run: | | |
| echo "Running eval for skill: ${{ matrix.skill }}" | |
| pnpm exec axiom eval ../skills/${{ matrix.skill }}/.meta/${{ matrix.skill }}.eval.ts \ | |
| --flag.git.branch=main \ | |
| --flag.git.commit=${{ github.sha }} \ | |
| 2>&1 | tee eval-output-raw.txt | |
| # Strip ANSI escape codes | |
| sed 's/\x1b\[[0-9;]*m//g' eval-output-raw.txt > eval-output.txt | |
| # Extract baseline ID from output | |
| # URL format: https://app.axiom.co/.../evaluations/<eval-name>/<runId>?baselineId=<id> | |
| # The --baseline flag needs the baselineId query param value | |
| baseline_id=$(grep -oE 'baselineId=[a-f0-9]+' eval-output.txt | head -1 | cut -d= -f2 || true) | |
| if [ -n "$baseline_id" ]; then | |
| echo "baseline_id=$baseline_id" >> $GITHUB_OUTPUT | |
| echo "Extracted baseline ID: $baseline_id" | |
| else | |
| echo "Failed to extract baseline ID from eval output" | |
| cat eval-output.txt | |
| exit 1 | |
| fi | |
| - name: Update baseline file | |
| run: | | |
| baseline_id="${{ steps.run-eval.outputs.baseline_id }}" | |
| skill="${{ matrix.skill }}" | |
| commit="${{ github.sha }}" | |
| timestamp=$(date -u +"%Y-%m-%dT%H:%M:%SZ") | |
| # Update the baseline file using jq | |
| jq --arg skill "$skill" \ | |
| --arg baseline_id "$baseline_id" \ | |
| --arg commit "$commit" \ | |
| --arg timestamp "$timestamp" \ | |
| '.baselines[$skill] = {baselineId: $baseline_id, commit: $commit, updatedAt: $timestamp}' \ | |
| .eval-baselines.json > .eval-baselines.json.tmp | |
| mv .eval-baselines.json.tmp .eval-baselines.json | |
| echo "Updated baseline for $skill:" | |
| cat .eval-baselines.json | |
| - name: Commit baseline update | |
| env: | |
| SKILL_NAME: ${{ matrix.skill }} | |
| BASELINE_ID: ${{ steps.run-eval.outputs.baseline_id }} | |
| COMMIT_SHA: ${{ github.sha }} | |
| run: | | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| git add .eval-baselines.json | |
| if git diff --staged --quiet; then | |
| echo "No changes to commit" | |
| else | |
| git commit -m "chore: update eval baseline for ${SKILL_NAME}" \ | |
| -m "Baseline ID: ${BASELINE_ID}" \ | |
| -m "Commit: ${COMMIT_SHA}" | |
| git push | |
| fi |