Add blazor skills to dotnet-blazor plugin #3704
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Unified evaluation workflow for all PRs (same-repo and fork) and scheduled runs. | |
| # | |
| # IMPORTANT: The /evaluate command uses the `issue_comment` trigger, which | |
| # ALWAYS runs the workflow YAML from the default branch (main), NOT from the | |
| # PR branch. Changes to this file in a PR will not take effect until merged. | |
| # The skill-validator binary IS built from the PR branch for same-repo PRs, | |
| # so eng/skill-validator/ changes are tested before merge. | |
| # | |
| # For PRs (same-repo and fork): | |
| # - On PR open/sync, the `pr-status` job posts an initial commit status: | |
| # - "success" if no skills changed (required check passes immediately) | |
| # - "pending" if skills changed (maintainer must post /evaluate to trigger) | |
| # - When a maintainer posts "/evaluate" on the PR, the `gate` job validates | |
| # permissions and triggers the full evaluation pipeline. | |
| # | |
| # For scheduled runs: | |
| # - Runs daily, evaluates all plugins with skills and tests. | |
| # | |
| # Security model for fork PRs: | |
| # - Workflow YAML: always from the default branch (enforced by issue_comment | |
| # and pull_request_target triggers) | |
| # - Validator binary: built from the default branch (fork PRs) or PR branch | |
| # (same-repo PRs, selected via needs.gate.outputs.is_fork in the ref: expression) | |
| # - Skill/test content: checked out from the fork PR (untrusted data, read-only) | |
| # - Secret access: only users with write+ permission can trigger evaluation | |
| name: evaluation | |
| on: | |
| # Manual trigger for one-off deploys (e.g., AGENTVIZ SPA update) | |
| workflow_dispatch: | |
| # Same-repo PRs: post initial status | |
| pull_request: | |
| # Fork PRs: post initial status (runs from base branch for security) | |
| pull_request_target: | |
| # /evaluate command trigger | |
| issue_comment: | |
| types: [created] | |
| # Daily scheduled evaluation | |
| schedule: | |
| - cron: '0 0 * * *' # Once daily at midnight UTC | |
| concurrency: | |
| group: ${{ github.workflow }}-${{ github.event_name == 'issue_comment' && (startsWith(github.event.comment.body, '/evaluate') && format('eval-{0}', github.event.issue.number) || format('eval-noop-{0}-{1}', github.event.issue.number, github.event.comment.id)) || (github.event_name == 'pull_request' && format('eval-status-{0}', github.event.pull_request.number) || (github.event_name == 'pull_request_target' && format('eval-fork-status-{0}', github.event.pull_request.number) || github.run_id)) }} | |
| cancel-in-progress: true | |
| env: | |
| DASHBOARD_RETENTION_DAYS: 14 | |
| MODEL: claude-opus-4.6 | |
| JUDGE_MODEL: claude-opus-4.6 | |
| permissions: | |
| contents: write | |
| pull-requests: write | |
| statuses: write | |
| jobs: | |
| # ========================================================================== | |
| # PR STATUS JOBS | |
| # Post initial commit status so the required check is never stuck as "Expected". | |
| # Posts success (no skills) or pending (needs /evaluate). | |
| # ========================================================================== | |
| # Same-repo PRs: use pull_request trigger (has direct access to PR content) | |
| pr-status: | |
| if: >- | |
| github.event_name == 'pull_request' && | |
| github.event.pull_request.head.repo.full_name == github.repository | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| statuses: write | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| with: | |
| fetch-depth: 0 | |
| persist-credentials: false | |
| - name: Discover changes requiring evaluation | |
| id: discover | |
| shell: pwsh | |
| run: | | |
| $base = "${{ github.event.pull_request.base.sha }}" | |
| $head = "${{ github.event.pull_request.head.sha }}" | |
| $mergeBase = git merge-base $base $head | |
| $changedFiles = git diff --name-only --diff-filter=ACMR $mergeBase $head | |
| $hasSkillChanges = $changedFiles | | |
| Where-Object { $_ -match '^(plugins/[^/]+/skills|tests/[^/]+)/[^/]+/' } | | |
| Select-Object -First 1 | |
| # Skill-validator and evaluation pipeline changes need evaluation. | |
| # Documentation files under src/ don't affect evaluation. | |
| $hasInfraChanges = $changedFiles | | |
| Where-Object { | |
| ($_ -match '^eng/skill-validator/' -and $_ -notmatch '^eng/skill-validator/src/(README\.md|docs/)') -or | |
| ($_ -match '^eng/vally-adapter/') -or | |
| $_ -match '^\.github/workflows/(evaluation|vally-evaluation)\.yml$' | |
| } | | |
| Select-Object -First 1 | |
| if ($hasSkillChanges -or $hasInfraChanges) { | |
| echo "needs_eval=true" >> $env:GITHUB_OUTPUT | |
| } else { | |
| echo "needs_eval=false" >> $env:GITHUB_OUTPUT | |
| } | |
| - name: Post evaluation commit status | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| if [[ "${{ steps.discover.outputs.needs_eval }}" == "true" ]]; then | |
| STATE="pending" | |
| DESC="Post /evaluate to trigger evaluation" | |
| else | |
| STATE="success" | |
| DESC="No skills to evaluate" | |
| fi | |
| gh api "repos/${{ github.repository }}/statuses/${{ github.event.pull_request.head.sha }}" \ | |
| -f state="$STATE" \ | |
| -f context="evaluation-status" \ | |
| -f description="$DESC" \ | |
| -f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| # Fork PRs: use pull_request_target (runs from base branch, fetches PR metadata safely) | |
| fork-pr-status: | |
| if: >- | |
| github.event_name == 'pull_request_target' && | |
| github.event.pull_request.head.repo.full_name != github.repository | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| statuses: write | |
| steps: | |
| - name: Checkout base branch | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| with: | |
| ref: ${{ github.event.pull_request.base.sha }} | |
| fetch-depth: 0 | |
| persist-credentials: false | |
| - name: Fetch PR head for diff | |
| run: git fetch origin +refs/pull/${{ github.event.pull_request.number }}/head | |
| - name: Discover changes requiring evaluation | |
| id: discover | |
| shell: pwsh | |
| run: | | |
| $base = "${{ github.event.pull_request.base.sha }}" | |
| $head = "FETCH_HEAD" | |
| $mergeBase = git merge-base $base $head | |
| $changedFiles = git diff --name-only --diff-filter=ACMR $mergeBase $head | |
| $hasSkillChanges = $changedFiles | | |
| Where-Object { $_ -match '^(plugins/[^/]+/skills|tests/[^/]+)/[^/]+/' } | | |
| Select-Object -First 1 | |
| $hasInfraChanges = $changedFiles | | |
| Where-Object { | |
| ($_ -match '^eng/skill-validator/' -and $_ -notmatch '^eng/skill-validator/src/(README\.md|docs/)') -or | |
| ($_ -match '^eng/vally-adapter/') -or | |
| $_ -match '^\.github/workflows/(evaluation|vally-evaluation)\.yml$' | |
| } | | |
| Select-Object -First 1 | |
| if ($hasSkillChanges -or $hasInfraChanges) { | |
| echo "needs_eval=true" >> $env:GITHUB_OUTPUT | |
| } else { | |
| echo "needs_eval=false" >> $env:GITHUB_OUTPUT | |
| } | |
| - name: Post evaluation commit status | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| if [[ "${{ steps.discover.outputs.needs_eval }}" == "true" ]]; then | |
| STATE="pending" | |
| DESC="Fork PR — post /evaluate to trigger evaluation" | |
| else | |
| STATE="success" | |
| DESC="No skills to evaluate" | |
| fi | |
| gh api "repos/${{ github.repository }}/statuses/${{ github.event.pull_request.head.sha }}" \ | |
| -f state="$STATE" \ | |
| -f context="evaluation-status" \ | |
| -f description="$DESC" \ | |
| -f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| # ========================================================================== | |
| # GATE JOB | |
| # Validate /evaluate command: must be on a PR from a user with write+ permissions. | |
| # ========================================================================== | |
| gate: | |
| if: >- | |
| github.event.issue.pull_request && | |
| startsWith(github.event.comment.body, '/evaluate') | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| pull-requests: write | |
| statuses: write | |
| issues: write | |
| outputs: | |
| head_sha: ${{ steps.pr.outputs.head_sha }} | |
| base_sha: ${{ steps.pr.outputs.base_sha }} | |
| pr_number: ${{ steps.pr.outputs.pr_number }} | |
| is_fork: ${{ steps.pr.outputs.is_fork }} | |
| steps: | |
| - name: Check commenter permissions | |
| id: perms | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| PERMISSION=$(gh api "repos/${{ github.repository }}/collaborators/${{ github.event.comment.user.login }}/permission" --jq '.permission') | |
| echo "Commenter ${{ github.event.comment.user.login }} has permission: $PERMISSION" | |
| if [[ "$PERMISSION" != "admin" && "$PERMISSION" != "write" && "$PERMISSION" != "maintain" ]]; then | |
| echo "::error::User does not have write access" | |
| exit 1 | |
| fi | |
| - name: Get PR details | |
| id: pr | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| PR_NUMBER=${{ github.event.issue.number }} | |
| PR_DATA=$(gh api "repos/${{ github.repository }}/pulls/${PR_NUMBER}") | |
| HEAD_SHA=$(echo "$PR_DATA" | jq -r '.head.sha') | |
| HEAD_REPO=$(echo "$PR_DATA" | jq -r '.head.repo.full_name') | |
| BASE_REPO=$(echo "$PR_DATA" | jq -r '.base.repo.full_name') | |
| BASE_SHA=$(echo "$PR_DATA" | jq -r '.base.sha') | |
| if [[ "$HEAD_REPO" != "$BASE_REPO" ]]; then | |
| echo "is_fork=true" >> $GITHUB_OUTPUT | |
| else | |
| echo "is_fork=false" >> $GITHUB_OUTPUT | |
| fi | |
| echo "PR #${PR_NUMBER}: head=${HEAD_SHA} base=${BASE_SHA} fork=$([[ "$HEAD_REPO" != "$BASE_REPO" ]] && echo true || echo false)" | |
| echo "head_sha=${HEAD_SHA}" >> $GITHUB_OUTPUT | |
| echo "base_sha=${BASE_SHA}" >> $GITHUB_OUTPUT | |
| echo "pr_number=${PR_NUMBER}" >> $GITHUB_OUTPUT | |
| - name: Add reaction to comment | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| gh api "repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions" \ | |
| -X POST -f content='eyes' || true | |
| - name: Set pending commit status | |
| continue-on-error: true | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| gh api "repos/${{ github.repository }}/statuses/${{ steps.pr.outputs.head_sha }}" \ | |
| -f state=pending \ | |
| -f context="evaluation-status" \ | |
| -f description="Evaluation in progress..." \ | |
| -f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| # ========================================================================== | |
| # DISCOVER JOB | |
| # Find skills to evaluate based on changed files. | |
| # ========================================================================== | |
| discover: | |
| needs: gate | |
| if: >- | |
| always() && | |
| (needs.gate.result == 'success' || github.event_name == 'schedule') && | |
| (github.event_name != 'schedule' || github.repository == 'dotnet/skills') | |
| runs-on: ubuntu-latest | |
| permissions: | |
| actions: read | |
| contents: read | |
| outputs: | |
| entries: ${{ steps.find.outputs.entries }} | |
| has_entries: ${{ steps.find.outputs.has_entries }} | |
| is_infra: ${{ steps.find.outputs.is_infra }} | |
| plugins: ${{ steps.find.outputs.plugins }} | |
| has_plugins: ${{ steps.find.outputs.has_plugins }} | |
| steps: | |
| - name: Check for new commits since last evaluation | |
| if: github.event_name == 'schedule' | |
| id: check-changes | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| # Bypass the skip guard on manual reruns so "Re-run jobs" always executes. | |
| if [ "${GITHUB_RUN_ATTEMPT}" != "1" ]; then | |
| echo "Manual rerun (attempt ${GITHUB_RUN_ATTEMPT}) — bypassing skip guard" | |
| echo "has_changes=true" >> $GITHUB_OUTPUT | |
| exit 0 | |
| fi | |
| # Determine whether a new evaluation is needed by inspecting the most | |
| # recent completed scheduled run. | |
| LATEST=$(gh api "repos/${{ github.repository }}/actions/workflows/evaluation.yml/runs?event=schedule&status=completed&per_page=1" \ | |
| --jq '(.workflow_runs[0] // empty) | "\(.head_sha) \(.conclusion)"' 2>/dev/null) || LATEST="" | |
| if [ -z "$LATEST" ]; then | |
| echo "No previous completed scheduled run found — proceeding" | |
| echo "has_changes=true" >> $GITHUB_OUTPUT | |
| exit 0 | |
| fi | |
| LAST_SHA="${LATEST%% *}" | |
| LAST_CONCLUSION="${LATEST##* }" | |
| CURRENT_SHA="${{ github.sha }}" | |
| if [ "$LAST_SHA" = "$CURRENT_SHA" ] && [ "$LAST_CONCLUSION" = "success" ]; then | |
| echo "Last scheduled evaluation at $LAST_SHA succeeded — skipping" | |
| echo "has_changes=false" >> $GITHUB_OUTPUT | |
| else | |
| if [ "$LAST_SHA" != "$CURRENT_SHA" ]; then | |
| COUNT=$(gh api "repos/${{ github.repository }}/compare/${LAST_SHA}...${CURRENT_SHA}" --jq '.total_commits' 2>/dev/null) || COUNT="unknown" | |
| echo "$COUNT new commit(s) since last evaluation ($LAST_SHA)" | |
| else | |
| echo "Last scheduled evaluation at $LAST_SHA concluded with '$LAST_CONCLUSION' — retrying" | |
| fi | |
| echo "has_changes=true" >> $GITHUB_OUTPUT | |
| fi | |
| - name: Checkout repository | |
| if: github.event_name != 'schedule' || steps.check-changes.outputs.has_changes == 'true' | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| with: | |
| fetch-depth: 0 | |
| persist-credentials: false | |
| - name: Fetch PR head | |
| if: github.event_name == 'issue_comment' | |
| run: git fetch origin +refs/pull/${{ needs.gate.outputs.pr_number }}/head:refs/remotes/origin/pr-head | |
| - name: Find skills to evaluate | |
| if: github.event_name != 'schedule' || steps.check-changes.outputs.has_changes == 'true' | |
| id: find | |
| run: | | |
| $entries = @() | |
| $plugins = @() | |
| if ("${{ github.event_name }}" -eq "issue_comment") { | |
| # /evaluate command: detect individual changed skills using gate outputs | |
| $base = "${{ needs.gate.outputs.base_sha }}" | |
| $head = (git rev-parse origin/pr-head) | |
| # Use a worktree so Test-Path checks are against PR content | |
| git worktree add /tmp/pr-content origin/pr-head 2>$null | |
| $contentRoot = "/tmp/pr-content" | |
| $mergeBase = git merge-base $base $head | |
| $changedFiles = git diff --name-only --diff-filter=ACMR $mergeBase $head | |
| # Check if any changed files are in infrastructure paths | |
| # Documentation files under src/ don't affect evaluation. | |
| $hasInfraChanges = $changedFiles | | |
| Where-Object { | |
| ($_ -match '^eng/skill-validator/' -and $_ -notmatch '^eng/skill-validator/src/(README\.md|docs/)') -or | |
| ($_ -match '^eng/vally-adapter/') -or | |
| $_ -match '^\.github/workflows/(evaluation|vally-evaluation)\.yml$' | |
| } | | |
| Select-Object -First 1 | |
| # Also check for skill/test changes so we don't lose them | |
| $hasSkillChanges = $changedFiles | | |
| Where-Object { $_ -match '^(?:plugins/([^/]+)/skills|tests/([^/]+))/([^/]+)/' } | | |
| Select-Object -First 1 | |
| if ($hasInfraChanges -and -not $hasSkillChanges) { | |
| echo "is_infra=true" >> $env:GITHUB_OUTPUT | |
| # Infra-only: evaluate a small random subset of plugins to keep | |
| # the smoke-test fast while still catching regressions. | |
| $allPlugins = @(Get-ChildItem -Path (Join-Path $contentRoot "plugins") -Directory -ErrorAction SilentlyContinue | | |
| Where-Object { | |
| (Test-Path (Join-Path $_.FullName "skills")) -and | |
| (Test-Path (Join-Path $contentRoot "tests" $_.Name)) | |
| } | | |
| Select-Object -ExpandProperty Name) | |
| $plugins = @($allPlugins | Get-Random -Count ([Math]::Min(2, $allPlugins.Count))) | |
| Write-Host "Infrastructure changes detected, evaluating random subset: $($plugins -join ', ')" | |
| $entries = @($plugins | ForEach-Object { | |
| @{ name = $_; plugin = $_; skills_path = "plugins/$_/skills" } | |
| }) | |
| } else { | |
| # Extract unique plugin/skill pairs from changed files | |
| $changedPairs = @($changedFiles | | |
| Where-Object { $_ -match '^(?:plugins/([^/]+)/skills|tests/([^/]+))/([^/]+)/' } | | |
| ForEach-Object { | |
| $p = if ($Matches[1]) { $Matches[1] } else { $Matches[2] } | |
| "$p/$($Matches[3])" | |
| } | | |
| Sort-Object -Unique) | |
| # Filter to skills that have a SKILL.md and a tests directory | |
| $entries = @($changedPairs | ForEach-Object { | |
| $parts = $_ -split '/' | |
| $plugin = $parts[0] | |
| $skill = $parts[1] | |
| $skillMd = Join-Path $contentRoot "plugins" $plugin "skills" $skill "SKILL.md" | |
| $testsDir = Join-Path $contentRoot "tests" $plugin | |
| if ((Test-Path $skillMd) -and (Test-Path $testsDir)) { | |
| @{ | |
| name = "$plugin--$skill" | |
| plugin = $plugin | |
| skills_path = "plugins/$plugin/skills/$skill" | |
| } | |
| } | |
| } | Where-Object { $_ }) | |
| $plugins = @($entries | ForEach-Object { $_.plugin } | Sort-Object -Unique) | |
| } | |
| git worktree remove /tmp/pr-content --force 2>$null | |
| } else { | |
| # Schedule: evaluate all plugins with skills and tests | |
| # Exclude experimental plugins from scheduled runs — they are | |
| # evaluated only on-demand via /evaluate on PRs. | |
| $excludeFromSchedule = @('dotnet-experimental') | |
| $plugins = @(Get-ChildItem -Path "plugins" -Directory | | |
| Where-Object { (Test-Path (Join-Path $_.FullName "skills")) -and (Test-Path (Join-Path "tests" $_.Name)) -and ($_.Name -notin $excludeFromSchedule) } | | |
| Select-Object -ExpandProperty Name) | |
| $entries = @($plugins | ForEach-Object { | |
| @{ name = $_; plugin = $_; skills_path = "plugins/$_/skills" } | |
| }) | |
| } | |
| # Output entries for evaluate matrix | |
| if (-not $entries -or $entries.Count -eq 0) { | |
| Write-Host "No entries to evaluate" | |
| echo "entries=[]" >> $env:GITHUB_OUTPUT | |
| echo "has_entries=false" >> $env:GITHUB_OUTPUT | |
| } else { | |
| $json = $entries | ConvertTo-Json -Compress -AsArray | |
| Write-Host "Entries to evaluate: $json" | |
| echo "entries=$json" >> $env:GITHUB_OUTPUT | |
| echo "has_entries=true" >> $env:GITHUB_OUTPUT | |
| } | |
| # Output plugins for publish jobs | |
| if (-not $plugins -or $plugins.Count -eq 0) { | |
| echo "plugins=[]" >> $env:GITHUB_OUTPUT | |
| echo "has_plugins=false" >> $env:GITHUB_OUTPUT | |
| } else { | |
| $cjson = $plugins | ConvertTo-Json -Compress -AsArray | |
| echo "plugins=$cjson" >> $env:GITHUB_OUTPUT | |
| echo "has_plugins=true" >> $env:GITHUB_OUTPUT | |
| } | |
| shell: pwsh | |
| # ========================================================================== | |
| # BUILD VALIDATOR | |
| # Build the skill-validator from the appropriate ref. | |
| # ========================================================================== | |
| build-validator: | |
| needs: [gate, discover] | |
| if: >- | |
| always() && | |
| needs.discover.outputs.has_entries == 'true' && | |
| needs.discover.result == 'success' && | |
| (needs.gate.result == 'success' || github.event_name == 'schedule') | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| outputs: | |
| cache-key: ${{ steps.compute-key.outputs.cache-key }} | |
| steps: | |
| # For same-repo PRs (trusted), build from the PR branch so skill-validator | |
| # changes are tested. For fork PRs (untrusted), always build from the | |
| # base branch to prevent untrusted code from modifying tooling. | |
| - name: Checkout repository | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| with: | |
| ref: ${{ needs.gate.outputs.is_fork != 'true' && needs.gate.outputs.head_sha || '' }} | |
| persist-credentials: false | |
| - name: Compute cache key | |
| id: compute-key | |
| run: echo "cache-key=skill-validator-${{ runner.os }}-${{ hashFiles('eng/skill-validator/src/**', 'eng/skill-validator/Directory.Build.props', 'global.json') }}" >> "$GITHUB_OUTPUT" | |
| - name: Cache validator archive | |
| id: cache-validator | |
| uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v4 | |
| with: | |
| path: skill-validator-dist.tar.gz | |
| key: ${{ steps.compute-key.outputs.cache-key }} | |
| - name: Setup .NET SDK | |
| if: steps.cache-validator.outputs.cache-hit != 'true' | |
| uses: actions/setup-dotnet@c2fa09f4bde5ebb9d1777cf28262a3eb3db3ced7 # v5 | |
| with: | |
| global-json-file: global.json | |
| - name: Build skill-validator | |
| if: steps.cache-validator.outputs.cache-hit != 'true' | |
| run: dotnet publish eng/skill-validator/src/SkillValidator.csproj | |
| - name: Create validator archive | |
| if: steps.cache-validator.outputs.cache-hit != 'true' | |
| run: tar -czf skill-validator-dist.tar.gz -C artifacts/publish/SkillValidator/release . | |
| - name: Upload built validator | |
| if: steps.cache-validator.outputs.cache-hit != 'true' | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 | |
| with: | |
| name: skill-validator-dist.tar.gz | |
| path: skill-validator-dist.tar.gz | |
| archive: false | |
| retention-days: 1 | |
| # ========================================================================== | |
| # EVALUATE | |
| # Run skill-validator across changed skills (matrix). | |
| # ========================================================================== | |
| evaluate: | |
| needs: [gate, discover, build-validator] | |
| if: always() && !cancelled() && needs.build-validator.result == 'success' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: read | |
| timeout-minutes: 180 | |
| name: evaluate (${{ matrix.entry.name }}) | |
| strategy: | |
| fail-fast: false | |
| matrix: | |
| entry: ${{ fromJson(needs.discover.outputs.entries || '[]') }} | |
| steps: | |
| - name: Checkout skills content | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| with: | |
| ref: ${{ needs.gate.outputs.head_sha || '' }} | |
| persist-credentials: false | |
| - name: Setup .NET SDK | |
| uses: actions/setup-dotnet@c2fa09f4bde5ebb9d1777cf28262a3eb3db3ced7 # v5 | |
| with: | |
| global-json-file: global.json | |
| - name: Restore cached validator | |
| id: cache-validator | |
| uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v4 | |
| with: | |
| path: skill-validator-dist.tar.gz | |
| key: ${{ needs.build-validator.outputs.cache-key }} | |
| - name: Download built validator | |
| if: steps.cache-validator.outputs.cache-hit != 'true' | |
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8 | |
| with: | |
| name: skill-validator-dist.tar.gz | |
| - name: Extract validator | |
| run: | | |
| mkdir -p artifacts/publish/SkillValidator/release | |
| tar -xzf skill-validator-dist.tar.gz -C artifacts/publish/SkillValidator/release | |
| - name: Select random Copilot token | |
| id: select-token | |
| env: | |
| TOKEN_1: ${{ secrets.COPILOT_GITHUB_TOKEN }} | |
| TOKEN_2: ${{ secrets.COPILOT_GITHUB_TOKEN_2 }} | |
| TOKEN_3: ${{ secrets.COPILOT_GITHUB_TOKEN_3 }} | |
| TOKEN_4: ${{ secrets.COPILOT_GITHUB_TOKEN_4 }} | |
| TOKEN_5: ${{ secrets.COPILOT_GITHUB_TOKEN_5 }} | |
| TOKEN_6: ${{ secrets.COPILOT_GITHUB_TOKEN_6 }} | |
| TOKEN_7: ${{ secrets.COPILOT_GITHUB_TOKEN_7 }} | |
| TOKEN_8: ${{ secrets.COPILOT_GITHUB_TOKEN_8 }} | |
| run: | | |
| # Collect all non-empty token secrets | |
| TOKENS=() | |
| NAMES=() | |
| for i in 1 2 3 4 5 6 7 8; do | |
| var="TOKEN_$i" | |
| val="${!var}" | |
| if [ -n "$val" ]; then | |
| TOKENS+=("$val") | |
| if [ "$i" -eq 1 ]; then | |
| NAMES+=("COPILOT_GITHUB_TOKEN") | |
| else | |
| NAMES+=("COPILOT_GITHUB_TOKEN_$i") | |
| fi | |
| fi | |
| done | |
| if [ ${#TOKENS[@]} -eq 0 ]; then | |
| echo "::error::No COPILOT_GITHUB_TOKEN secrets are configured" | |
| exit 1 | |
| fi | |
| # Assign token deterministically by matrix job index to avoid collisions. | |
| # Falls back to RANDOM if strategy.job-index is unavailable. | |
| JOB_INDEX="${{ strategy.job-index }}" | |
| if [ -n "$JOB_INDEX" ]; then | |
| IDX=$(( JOB_INDEX % ${#TOKENS[@]} )) | |
| else | |
| IDX=$((RANDOM % ${#TOKENS[@]})) | |
| fi | |
| echo "Selected ${NAMES[$IDX]} (1 of ${#TOKENS[@]} available tokens, job-index=${JOB_INDEX:-random})" | |
| # Mask the value so it won't appear in logs, then export | |
| echo "::add-mask::${TOKENS[$IDX]}" | |
| echo "token=${TOKENS[$IDX]}" >> $GITHUB_OUTPUT | |
| - name: Run skill-validator | |
| env: | |
| GITHUB_TOKEN: ${{ steps.select-token.outputs.token }} | |
| RESULTS_PATH: artifacts/TestResults/skill-validator/${{ matrix.entry.name }} | |
| RUNS: ${{ needs.discover.outputs.is_infra == 'true' && '1' || (github.event_name == 'schedule' && '5' || '3') }} | |
| PARALLEL_SKILLS: ${{ (needs.discover.outputs.is_infra == 'true' || github.event_name == 'schedule') && '2' || '5' }} | |
| PARALLEL_SCENARIOS: ${{ (needs.discover.outputs.is_infra == 'true' || github.event_name == 'schedule') && '3' || '5' }} | |
| PARALLEL_RUNS: ${{ (needs.discover.outputs.is_infra == 'true' || github.event_name == 'schedule') && '3' || '5' }} | |
| run: | | |
| ARGS="--verdict-warn-only --verbose" | |
| ARGS="$ARGS --results-dir $RESULTS_PATH --reporter console --reporter json --reporter markdown" | |
| ARGS="$ARGS --model ${{ env.MODEL }}" | |
| ARGS="$ARGS --judge-model ${{ env.JUDGE_MODEL }}" | |
| ARGS="$ARGS --runs $RUNS" | |
| ARGS="$ARGS --parallel-skills $PARALLEL_SKILLS" | |
| ARGS="$ARGS --parallel-scenarios $PARALLEL_SCENARIOS" | |
| ARGS="$ARGS --parallel-runs $PARALLEL_RUNS" | |
| ARGS="$ARGS --keep-sessions" | |
| artifacts/publish/SkillValidator/release/skill-validator evaluate $ARGS --tests-dir ./tests/${{ matrix.entry.plugin }} ./${{ matrix.entry.skills_path }} | |
| - name: Upload results | |
| if: always() | |
| uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7 | |
| with: | |
| name: skill-validator-results-${{ matrix.entry.name }} | |
| path: artifacts/TestResults/skill-validator/${{ matrix.entry.name }}/ | |
| include-hidden-files: true | |
| retention-days: 30 | |
| # ========================================================================== | |
| # VALLY EVALUATION | |
| # Runs vally evaluations in parallel with skill-validator. | |
| # NOTE: These results do not gate PRs for now. | |
| # ========================================================================== | |
| vally-evaluate: | |
| needs: [gate, discover] | |
| if: >- | |
| always() && !cancelled() && | |
| needs.discover.outputs.has_entries == 'true' && | |
| needs.discover.result == 'success' | |
| uses: ./.github/workflows/vally-evaluation.yml | |
| with: | |
| entries: ${{ needs.discover.outputs.entries }} | |
| head_sha: ${{ needs.gate.outputs.head_sha }} | |
| secrets: inherit | |
| # ========================================================================== | |
| # COMMENT ON PR | |
| # Post consolidated evaluation results as a PR comment. | |
| # ========================================================================== | |
| comment-on-pr: | |
| needs: [gate, discover, build-validator, evaluate, publish-session-data] | |
| if: >- | |
| always() && | |
| needs.evaluate.result != 'cancelled' && | |
| needs.gate.outputs.pr_number != '' && | |
| needs.discover.outputs.has_entries == 'true' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| pull-requests: write | |
| steps: | |
| - name: Download all result artifacts | |
| if: needs.evaluate.result != 'skipped' | |
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8 | |
| with: | |
| pattern: skill-validator-results-* | |
| path: all-results/ | |
| merge-multiple: false | |
| continue-on-error: ${{ needs.evaluate.result != 'success' }} | |
| - name: Restore cached validator | |
| if: needs.evaluate.result != 'skipped' | |
| id: cache-validator | |
| uses: actions/cache/restore@27d5ce7f107fe9357f9df03efb73ab90386fccae # v4 | |
| with: | |
| path: skill-validator-dist.tar.gz | |
| key: ${{ needs.build-validator.outputs.cache-key }} | |
| continue-on-error: true | |
| - name: Download built validator | |
| if: needs.evaluate.result != 'skipped' && steps.cache-validator.outputs.cache-hit != 'true' | |
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8 | |
| with: | |
| name: skill-validator-dist.tar.gz | |
| continue-on-error: ${{ needs.evaluate.result != 'success' }} | |
| - name: Extract validator | |
| if: needs.evaluate.result != 'skipped' && hashFiles('skill-validator-dist.tar.gz') != '' | |
| run: | | |
| mkdir -p artifacts/publish/SkillValidator/release | |
| tar -xzf skill-validator-dist.tar.gz -C artifacts/publish/SkillValidator/release | |
| - name: Consolidate and post results | |
| continue-on-error: true | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| PR_NUMBER=${{ needs.gate.outputs.pr_number }} | |
| RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| # If results exist, consolidate them into a summary comment | |
| JSON_FILES=$(find all-results/ -name results.json 2>/dev/null || true) | |
| VALIDATOR="artifacts/publish/SkillValidator/release/skill-validator" | |
| if [ -n "$JSON_FILES" ] && [ -x "$VALIDATOR" ]; then | |
| "$VALIDATOR" evaluate consolidate \ | |
| --output summary-body.md \ | |
| $JSON_FILES | |
| # Create a simplified table for the PR comment by stripping | |
| # "Quality (Plugin)" and "Agents Invoked" columns (GH UI concern). | |
| # The full table is preserved in the workflow step summary. | |
| python3 - <<'PY' | |
| lines = open('summary-body.md').read().split('\n') | |
| result, remove, prev = [], set(), False | |
| for line in lines: | |
| is_tbl = line.startswith('|') | |
| if is_tbl and not prev: | |
| parts = line.split('|') | |
| remove = {i for i, p in enumerate(parts) if p.strip() in ('Quality (Plugin)', 'Agents Invoked')} | |
| parts = [p.replace('Quality (Isolated)', 'Quality') for p in parts] | |
| result.append('|'.join(p for i, p in enumerate(parts) if i not in remove)) | |
| elif is_tbl: | |
| parts = line.split('|') | |
| result.append('|'.join(p for i, p in enumerate(parts) if i not in remove)) | |
| else: | |
| result.append(line) | |
| prev = is_tbl | |
| open('simplified-body.md', 'w').write('\n'.join(result)) | |
| PY | |
| INVESTIGATE_PROMPT="" | |
| # If any skill failed, build a copy-paste prompt for AI-assisted investigation | |
| if jq -se '[.[].verdicts[] | select(.passed == false)] | length > 0' $JSON_FILES > /dev/null 2>&1; then | |
| RUN_ID="${{ github.run_id }}" | |
| INVESTIGATE_PROMPT=$(printf '\n> **To investigate failures**, paste this to your AI coding agent:\n>\n> _For PR %s in %s, download eval artifacts with `gh run download %s --repo %s --pattern "skill-validator-results-*" --dir ./eval-results`, then fetch https://raw.githubusercontent.com/%s/%s/eng/skill-validator/src/docs/InvestigatingResults.md and follow it to analyze the results.json files. Diagnose each failure, suggest fixes to the eval.yaml and skill content, and tell me what to fix first._' \ | |
| "${PR_NUMBER}" "${{ github.repository }}" "${RUN_ID}" "${{ github.repository }}" "${{ github.repository }}" "${{ needs.gate.outputs.head_sha }}") | |
| fi | |
| # PR comment: simplified table + "Full results" link | |
| { | |
| cat simplified-body.md | |
| echo "" | |
| echo "[🔍 Full Results - additional metrics and failure investigation steps]($RUN_URL)" | |
| } > consolidated-comment.md | |
| # Append AGENTVIZ replay link if session data was published | |
| if [[ "${{ needs.publish-session-data.result }}" == "success" ]]; then | |
| MANIFEST_URL="https://raw.githubusercontent.com/${{ github.repository }}/dashboard-session-data/data/manifest.json" | |
| # Derive the Pages base URL from the repository (org.github.io/repo) | |
| ORG=$(echo "${{ github.repository }}" | cut -d/ -f1) | |
| REPO=$(echo "${{ github.repository }}" | cut -d/ -f2) | |
| REPLAY_URL="https://${ORG}.github.io/${REPO}/replay/index.html" | |
| MANIFEST_ENCODED=$(printf '%s' "$MANIFEST_URL" | jq -sRr @uri) | |
| FULL_URL="${REPLAY_URL}?manifest=${MANIFEST_ENCODED}&tag=pr-${PR_NUMBER}" | |
| echo "" >> consolidated-comment.md | |
| echo "**[▶ Sessions Visualisation](${FULL_URL})** -- interactive replay of all evaluation sessions" >> consolidated-comment.md | |
| fi | |
| # Action summary: full table (all columns) + investigation prompt | |
| { | |
| cat summary-body.md | |
| if [ -n "$INVESTIGATE_PROMPT" ]; then | |
| echo "$INVESTIGATE_PROMPT" | |
| fi | |
| } >> "$GITHUB_STEP_SUMMARY" | |
| gh api "repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" \ | |
| -X POST -F "body=@consolidated-comment.md" | |
| else | |
| # No results — evaluate was skipped or failed before producing artifacts | |
| if [[ "${{ needs.evaluate.result }}" == "skipped" ]]; then | |
| BODY="❌ Evaluation did not complete (upstream job failed or was skipped). [View workflow run](${RUN_URL})" | |
| else | |
| BODY="❌ Evaluation failed. [View workflow run](${RUN_URL})" | |
| fi | |
| gh api "repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" -X POST -f body="$BODY" | |
| fi | |
| # ========================================================================== | |
| # REPORT STATUS | |
| # Post final evaluation status via commit status API. | |
| # ========================================================================== | |
| report-status: | |
| needs: [gate, discover, build-validator, evaluate] | |
| if: always() && github.event_name == 'issue_comment' && needs.gate.result == 'success' | |
| runs-on: ubuntu-latest | |
| permissions: | |
| statuses: write | |
| pull-requests: write | |
| issues: write | |
| steps: | |
| - name: Remove eyes reaction from trigger comment | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| REACTION_ID=$(gh api "repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions" \ | |
| --jq '.[] | select(.content == "eyes" and .user.login == "github-actions[bot]") | .id' | head -1 || echo "") | |
| if [[ -n "$REACTION_ID" && "$REACTION_ID" != "null" ]]; then | |
| gh api "repos/${{ github.repository }}/issues/comments/${{ github.event.comment.id }}/reactions/${REACTION_ID}" \ | |
| -X DELETE || true | |
| fi | |
| - name: Set final commit status | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| if [[ "${{ needs.evaluate.result }}" == "success" ]]; then | |
| STATE="success" | |
| DESC="Evaluation passed" | |
| elif [[ "${{ needs.evaluate.result }}" == "skipped" && "${{ needs.discover.result }}" == "success" && "${{ needs.discover.outputs.has_entries }}" != "true" ]]; then | |
| STATE="success" | |
| DESC="No skills to evaluate" | |
| elif [[ "${{ needs.build-validator.result }}" == "failure" ]]; then | |
| STATE="failure" | |
| DESC="Validator build failed" | |
| elif [[ "${{ needs.evaluate.result }}" == "failure" ]]; then | |
| STATE="failure" | |
| DESC="Evaluation failed" | |
| else | |
| STATE="error" | |
| DESC="Evaluation did not complete (build: ${{ needs.build-validator.result }}, evaluate: ${{ needs.evaluate.result }}, discover: ${{ needs.discover.result }})" | |
| fi | |
| gh api "repos/${{ github.repository }}/statuses/${{ needs.gate.outputs.head_sha }}" \ | |
| -f state="$STATE" \ | |
| -f context="evaluation-status" \ | |
| -f description="$DESC" \ | |
| -f target_url="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| - name: Post completion comment for skipped evaluation | |
| if: needs.evaluate.result == 'skipped' | |
| continue-on-error: true | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| PR_NUMBER=${{ needs.gate.outputs.pr_number }} | |
| RUN_URL="${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}" | |
| if [[ "${{ needs.discover.result }}" == "success" && "${{ needs.discover.outputs.has_entries }}" != "true" ]]; then | |
| BODY="⏭️ No skills to evaluate — no changed skills with tests were found in this PR. [View workflow run](${RUN_URL})" | |
| else | |
| BODY="❌ Evaluation did not complete (upstream job failed or was skipped). [View workflow run](${RUN_URL})" | |
| fi | |
| gh api "repos/${{ github.repository }}/issues/${PR_NUMBER}/comments" -X POST -f body="$BODY" | |
| # ========================================================================== | |
| # PUBLISH TOKEN DATA | |
| # Both scheduled and PR runs: generate token-usage data → dashboard-token-data | |
| # ========================================================================== | |
| publish-token-data: | |
| needs: [gate, discover, evaluate] | |
| if: >- | |
| always() && | |
| !cancelled() && | |
| needs.discover.result == 'success' && | |
| needs.discover.outputs.has_plugins == 'true' && | |
| ( | |
| github.ref == 'refs/heads/main' || | |
| (github.event_name == 'issue_comment' && needs.gate.result == 'success') | |
| ) | |
| concurrency: | |
| group: publish-token-data | |
| cancel-in-progress: false | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| with: | |
| persist-credentials: false | |
| - name: Download evaluation artifacts | |
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8 | |
| with: | |
| pattern: skill-validator-results-* | |
| path: all-results/ | |
| merge-multiple: false | |
| continue-on-error: ${{ needs.evaluate.result != 'success' }} | |
| - name: Fetch existing token data from dashboard-token-data | |
| run: | | |
| git fetch origin dashboard-token-data:dashboard-token-data 2>/dev/null || true | |
| mkdir -p /tmp/token-data/data | |
| git checkout dashboard-token-data -- data/token-usage.json 2>/dev/null && \ | |
| cp data/token-usage.json /tmp/token-data/data/ && \ | |
| git checkout HEAD -- . || true | |
| - name: Get PR title | |
| if: github.event_name == 'issue_comment' | |
| id: pr-info | |
| env: | |
| GH_TOKEN: ${{ github.token }} | |
| run: | | |
| PR_TITLE=$(gh api "repos/${{ github.repository }}/pulls/${{ needs.gate.outputs.pr_number }}" --jq '.title' 2>/dev/null) || PR_TITLE="" | |
| echo "pr_title=${PR_TITLE}" >> $GITHUB_OUTPUT | |
| - name: Generate token usage data | |
| env: | |
| PR_TITLE: ${{ steps.pr-info.outputs.pr_title }} | |
| run: | | |
| $source = if ("${{ github.event_name }}" -eq "issue_comment") { "pr" } else { "scheduled" } | |
| $plugins = '${{ needs.discover.outputs.plugins }}' | ConvertFrom-Json | |
| foreach ($plugin in $plugins) { | |
| # For scheduled runs and infra-change PRs, artifacts are named | |
| # skill-validator-results-$plugin. For individual skill-change PRs, | |
| # artifacts are named skill-validator-results-${plugin}--${skill}. | |
| # Search for both patterns so token data is captured in all cases. | |
| $artifactDirs = @(Get-ChildItem -Path "all-results" -Directory -ErrorAction SilentlyContinue | | |
| Where-Object { $_.Name -eq "skill-validator-results-$plugin" -or $_.Name -like "skill-validator-results-$plugin--*" }) | |
| if ($artifactDirs.Count -eq 0) { | |
| Write-Warning "No artifacts found for $plugin, skipping" | |
| continue | |
| } | |
| foreach ($artifactDir in $artifactDirs) { | |
| $runDir = Get-ChildItem -Path $artifactDir.FullName -Directory -ErrorAction SilentlyContinue | | |
| Where-Object { $_.Name -match '^\d{8}-\d{6}$' } | | |
| Sort-Object Name -Descending | | |
| Select-Object -First 1 | |
| if (-not $runDir) { | |
| Write-Warning "No run results found in $($artifactDir.Name), skipping" | |
| continue | |
| } | |
| $resultsFile = Join-Path $runDir.FullName "results.json" | |
| Write-Host "`n=== Collecting token usage for: $plugin (from $($artifactDir.Name)) ===" | |
| $params = @{ | |
| ResultsFile = $resultsFile | |
| PluginName = $plugin | |
| OutputDir = "/tmp/token-data/data" | |
| Source = $source | |
| RetentionDays = $env:DASHBOARD_RETENTION_DAYS | |
| } | |
| if ($source -eq "pr") { | |
| $params.PRNumber = ${{ needs.gate.outputs.pr_number }} | |
| $params.PRTitle = $env:PR_TITLE | |
| } | |
| $params.SkipBenchmarkData = $true | |
| & ./eng/dashboard/generate-benchmark-data.ps1 @params | |
| } | |
| } | |
| shell: pwsh | |
| - name: Push to dashboard-token-data branch | |
| run: | | |
| cd /tmp | |
| REPO_URL="https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git" | |
| if git ls-remote --exit-code --heads "$REPO_URL" dashboard-token-data > /dev/null 2>&1; then | |
| git clone --branch dashboard-token-data --single-branch "$REPO_URL" token-deploy | |
| else | |
| mkdir token-deploy && cd token-deploy && git init && git checkout -b dashboard-token-data | |
| git remote add origin "$REPO_URL" | |
| cd /tmp | |
| fi | |
| cd /tmp/token-deploy | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| # Remove any stale files so this branch stays token-only | |
| git rm -rf data/ --ignore-unmatch --quiet 2>/dev/null || true | |
| mkdir -p data | |
| # Ensure token-usage.json exists; create an empty fallback if missing | |
| if [ ! -f /tmp/token-data/data/token-usage.json ]; then | |
| mkdir -p /tmp/token-data/data | |
| printf '{ "entries": [] }\n' > /tmp/token-data/data/token-usage.json | |
| fi | |
| cp /tmp/token-data/data/token-usage.json data/ | |
| git add data/token-usage.json | |
| git diff --cached --quiet && echo "No changes to deploy" && exit 0 | |
| if [[ "${{ github.event_name }}" == "issue_comment" ]]; then | |
| git commit -m "Update PR token usage data (PR #${{ needs.gate.outputs.pr_number }})" | |
| else | |
| git commit -m "Update scheduled token usage data" | |
| fi | |
| git push origin dashboard-token-data | |
| # ========================================================================== | |
| # PUBLISH SESSION DATA | |
| # Both scheduled and PR runs: flatten JSONL sessions → dashboard-session-data | |
| # ========================================================================== | |
| publish-session-data: | |
| needs: [gate, discover, evaluate] | |
| if: >- | |
| always() && !cancelled() && | |
| needs.discover.result == 'success' && | |
| needs.discover.outputs.has_plugins == 'true' && | |
| ( | |
| github.ref == 'refs/heads/main' || | |
| (github.event_name == 'issue_comment' && needs.gate.result == 'success') | |
| ) | |
| runs-on: ubuntu-latest | |
| concurrency: | |
| group: publish-session-data | |
| cancel-in-progress: false | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| with: | |
| persist-credentials: false | |
| - name: Download evaluation artifacts | |
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8 | |
| with: | |
| pattern: skill-validator-results-* | |
| path: all-results/ | |
| merge-multiple: false | |
| continue-on-error: ${{ needs.evaluate.result != 'success' }} | |
| - name: Inspect downloaded artifacts | |
| if: always() | |
| run: | | |
| echo "=== all-results/ directory listing (3 levels) ===" | |
| if [ -d all-results ]; then | |
| find all-results -maxdepth 3 -ls 2>/dev/null | head -80 | |
| echo "---" | |
| echo "sessions.db files:" | |
| find all-results -name 'sessions.db' -ls 2>/dev/null | |
| echo "events.jsonl files:" | |
| find all-results -name 'events.jsonl' 2>/dev/null | head -20 | |
| else | |
| echo "all-results/ directory does not exist!" | |
| fi | |
| - name: Determine source metadata | |
| id: meta | |
| run: | | |
| if [ "${{ github.event_name }}" = "issue_comment" ]; then | |
| echo "source=pr" >> "$GITHUB_OUTPUT" | |
| echo "pr_number=${{ needs.gate.outputs.pr_number }}" >> "$GITHUB_OUTPUT" | |
| echo "subdir=pr/${{ needs.gate.outputs.pr_number }}" >> "$GITHUB_OUTPUT" | |
| else | |
| echo "source=scheduled" >> "$GITHUB_OUTPUT" | |
| echo "pr_number=" >> "$GITHUB_OUTPUT" | |
| echo "subdir=scheduled/$(date -u +%Y-%m-%d)" >> "$GITHUB_OUTPUT" | |
| fi | |
| - name: Build session manifest | |
| shell: pwsh | |
| run: | | |
| ./eng/dashboard/build-replay-sessions.ps1 ` | |
| -ResultsDir all-results ` | |
| -OutputDir staging ` | |
| -Source ${{ steps.meta.outputs.source }} ` | |
| -PrNumber "${{ steps.meta.outputs.pr_number }}" | |
| - name: Clone existing session data branch | |
| run: | | |
| cd /tmp | |
| REPO_URL="https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git" | |
| if git ls-remote --exit-code --heads "$REPO_URL" dashboard-session-data > /dev/null 2>&1; then | |
| git clone --branch dashboard-session-data --single-branch "$REPO_URL" session-deploy | |
| else | |
| mkdir session-deploy && cd session-deploy && git init && git checkout -b dashboard-session-data | |
| git remote add origin "$REPO_URL" | |
| cd /tmp | |
| fi | |
| cd /tmp/session-deploy | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| - name: Merge and purge old sessions | |
| shell: pwsh | |
| run: | | |
| ./eng/dashboard/purge-replay-sessions.ps1 ` | |
| -ExistingDir /tmp/session-deploy/data ` | |
| -NewDir staging ` | |
| -OutputDir /tmp/session-deploy/data ` | |
| -RetentionDays 7 | |
| - name: Push to dashboard-session-data branch | |
| run: | | |
| cd /tmp/session-deploy | |
| git add data/ | |
| git diff --cached --quiet && echo "No changes to deploy" && exit 0 | |
| if [[ "${{ github.event_name }}" == "issue_comment" ]]; then | |
| git commit -m "Update session data (PR #${{ needs.gate.outputs.pr_number }})" | |
| else | |
| git commit -m "Update scheduled session data" | |
| fi | |
| git push origin dashboard-session-data | |
| # ========================================================================== | |
| # PUBLISH EVAL DATA | |
| # Scheduled runs only: generate benchmark data → dashboard-eval-data | |
| # ========================================================================== | |
| publish-eval-data: | |
| needs: [gate, discover, evaluate] | |
| if: >- | |
| always() && | |
| !cancelled() && | |
| needs.discover.result == 'success' && | |
| needs.discover.outputs.has_plugins == 'true' && | |
| github.event_name == 'schedule' | |
| concurrency: | |
| group: publish-eval-data | |
| cancel-in-progress: false | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| with: | |
| persist-credentials: false | |
| - name: Download evaluation artifacts | |
| uses: actions/download-artifact@3e5f45b2cfb9172054b4087a40e8e0b5a5461e7c # v8 | |
| with: | |
| pattern: skill-validator-results-* | |
| path: all-results/ | |
| merge-multiple: false | |
| continue-on-error: ${{ needs.evaluate.result != 'success' }} | |
| - name: Fetch existing eval data from dashboard-eval-data | |
| run: | | |
| git fetch origin dashboard-eval-data:dashboard-eval-data 2>/dev/null || true | |
| mkdir -p /tmp/eval-data/data | |
| git checkout dashboard-eval-data -- data/ 2>/dev/null && \ | |
| cp -r data/* /tmp/eval-data/data/ && \ | |
| git checkout HEAD -- . || true | |
| - name: Generate benchmark data | |
| run: | | |
| $sha = "${{ github.sha }}" | |
| $commitMsg = git log -1 --format='%s' $sha | |
| $commitTimestamp = git log -1 --format='%aI' $sha | |
| $commitAuthor = git log -1 --format='%an' $sha | |
| $commitJson = @{ | |
| id = $sha | |
| message = $commitMsg | |
| timestamp = $commitTimestamp | |
| url = "https://github.com/${{ github.repository }}/commit/$sha" | |
| author = @{ name = $commitAuthor; username = "${{ github.actor }}" } | |
| } | ConvertTo-Json -Compress | |
| $plugins = '${{ needs.discover.outputs.plugins }}' | ConvertFrom-Json | |
| foreach ($plugin in $plugins) { | |
| $artifactDir = "all-results/skill-validator-results-$plugin" | |
| $runDir = Get-ChildItem -Path $artifactDir -Directory -ErrorAction SilentlyContinue | | |
| Where-Object { $_.Name -match '^\d{8}-\d{6}$' } | | |
| Sort-Object Name -Descending | | |
| Select-Object -First 1 | |
| if (-not $runDir) { | |
| Write-Warning "No run results found for $plugin, skipping" | |
| continue | |
| } | |
| $resultsFile = Join-Path $runDir.FullName "results.json" | |
| Write-Host "`n=== Generating benchmark data for: $plugin ===" | |
| $existingFile = "/tmp/eval-data/data/$plugin.json" | |
| $params = @{ | |
| ResultsFile = $resultsFile | |
| PluginName = $plugin | |
| OutputDir = "/tmp/eval-data/data" | |
| CommitJson = $commitJson | |
| RetentionDays = $env:DASHBOARD_RETENTION_DAYS | |
| Source = 'scheduled' | |
| SkipTokenUsage = $true | |
| } | |
| if ((Test-Path $existingFile) -and (Get-Content $existingFile -Raw -ErrorAction SilentlyContinue)) { | |
| $params.ExistingDataFile = $existingFile | |
| } | |
| & ./eng/dashboard/generate-benchmark-data.ps1 @params | |
| } | |
| # Purge entries older than retention window | |
| & ./eng/dashboard/generate-benchmark-data.ps1 -PurgeStaleFiles -DataDir "/tmp/eval-data/data" -RetentionDays $env:DASHBOARD_RETENTION_DAYS | |
| # Generate components.json manifest (exclude token-usage.json if present) | |
| $plugins = Get-ChildItem -Path "/tmp/eval-data/data" -Filter "*.json" -File -ErrorAction SilentlyContinue | | |
| Where-Object { $_.Name -notin @("components.json", "token-usage.json") } | | |
| ForEach-Object { $_.BaseName } | |
| @($plugins) | ConvertTo-Json -AsArray | Out-File -FilePath "/tmp/eval-data/data/components.json" -Encoding utf8 | |
| shell: pwsh | |
| - name: Push to dashboard-eval-data branch | |
| run: | | |
| cd /tmp | |
| REPO_URL="https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git" | |
| if git ls-remote --exit-code --heads "$REPO_URL" dashboard-eval-data > /dev/null 2>&1; then | |
| git clone --branch dashboard-eval-data --single-branch "$REPO_URL" eval-deploy | |
| else | |
| mkdir eval-deploy && cd eval-deploy && git init && git checkout -b dashboard-eval-data | |
| git remote add origin "$REPO_URL" | |
| cd /tmp | |
| fi | |
| cd /tmp/eval-deploy | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| # Remove stale files so this branch only contains current eval data | |
| git rm -rf data/ --ignore-unmatch --quiet 2>/dev/null || true | |
| mkdir -p data | |
| # Copy only eval data files (not token-usage.json) | |
| cp /tmp/eval-data/data/components.json data/ | |
| for f in /tmp/eval-data/data/*.json; do | |
| fname=$(basename "$f") | |
| [ "$fname" = "token-usage.json" ] && continue | |
| cp "$f" "data/$fname" | |
| done | |
| git add data/ | |
| git diff --cached --quiet && echo "No changes to deploy" && exit 0 | |
| git commit -m "Update benchmark data" | |
| git push origin dashboard-eval-data | |
| # ========================================================================== | |
| # DEPLOY DASHBOARD | |
| # Scheduled runs + manual dispatch on main: assemble data from both | |
| # branches + UI → gh-pages. No data generation — pure copy and deploy. | |
| # ========================================================================== | |
| deploy-dashboard: | |
| needs: [discover, publish-token-data, publish-eval-data, publish-session-data] | |
| if: >- | |
| always() && | |
| !cancelled() && | |
| ( | |
| (needs.discover.result == 'success' && needs.discover.outputs.has_plugins == 'true' && github.event_name == 'schedule') || | |
| (github.event_name == 'workflow_dispatch' && github.ref == 'refs/heads/main') | |
| ) | |
| concurrency: | |
| group: deploy-dashboard | |
| cancel-in-progress: false | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout repository (for dashboard UI files) | |
| uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6 | |
| with: | |
| persist-credentials: false | |
| - uses: actions/setup-node@48b55a011bda9f5d6aeb4c2d9c7362e8dae4041e # v6.4.0 | |
| with: | |
| node-version: 20 | |
| - name: Fetch eval data from dashboard-eval-data branch | |
| run: | | |
| mkdir -p /tmp/gh-pages/data | |
| git fetch origin dashboard-eval-data:dashboard-eval-data 2>/dev/null || true | |
| git checkout dashboard-eval-data -- data/ 2>/dev/null && \ | |
| cp -r data/* /tmp/gh-pages/data/ && \ | |
| git checkout HEAD -- . || true | |
| - name: Fetch token data from dashboard-token-data branch | |
| run: | | |
| git fetch origin dashboard-token-data:dashboard-token-data 2>/dev/null || true | |
| git show dashboard-token-data:data/token-usage.json > /tmp/gh-pages/data/token-usage.json 2>/dev/null || \ | |
| echo '{"entries":[]}' > /tmp/gh-pages/data/token-usage.json | |
| - name: Check if AGENTVIZ SPA needs update | |
| id: check-replay | |
| run: | | |
| AGENTVIZ_REPO="https://github.com/jayparikh/agentviz.git" | |
| AGENTVIZ_BRANCH="main" | |
| # Resolve the latest commit on the AGENTVIZ branch (no clone needed) | |
| TARGET_SHA=$(git ls-remote "$AGENTVIZ_REPO" "refs/heads/$AGENTVIZ_BRANCH" | cut -f1) | |
| if [ -z "$TARGET_SHA" ]; then | |
| echo "::error::Could not resolve AGENTVIZ branch $AGENTVIZ_BRANCH" | |
| exit 1 | |
| fi | |
| echo "target_sha=$TARGET_SHA" >> "$GITHUB_OUTPUT" | |
| echo "AGENTVIZ target commit: $TARGET_SHA" | |
| # Read the currently deployed commit SHA from gh-pages (no clone needed) | |
| DEPLOYED_SHA="" | |
| DEPLOYED_SHA=$(curl -fsSL \ | |
| "https://raw.githubusercontent.com/${{ github.repository }}/gh-pages/replay/.agentviz-commit" \ | |
| 2>/dev/null) || true | |
| echo "deployed_sha=$DEPLOYED_SHA" >> "$GITHUB_OUTPUT" | |
| if [ "$TARGET_SHA" = "$DEPLOYED_SHA" ]; then | |
| echo "skip=true" >> "$GITHUB_OUTPUT" | |
| echo "AGENTVIZ SPA is up-to-date (commit $TARGET_SHA), skipping build." | |
| else | |
| echo "skip=false" >> "$GITHUB_OUTPUT" | |
| echo "AGENTVIZ SPA needs update: deployed=$DEPLOYED_SHA target=$TARGET_SHA" | |
| fi | |
| - name: Build AGENTVIZ SPA | |
| if: steps.check-replay.outputs.skip != 'true' | |
| uses: actions/cache@27d5ce7f107fe9357f9df03efb73ab90386fccae # v4 | |
| id: agentviz-cache | |
| with: | |
| path: /tmp/agentviz-dist | |
| key: agentviz-dist-${{ steps.check-replay.outputs.target_sha }} | |
| - name: Build AGENTVIZ SPA (on cache miss) | |
| if: steps.check-replay.outputs.skip != 'true' && steps.agentviz-cache.outputs.cache-hit != 'true' | |
| run: | | |
| TARGET_SHA="${{ steps.check-replay.outputs.target_sha }}" | |
| git clone https://github.com/jayparikh/agentviz.git /tmp/agentviz-src | |
| cd /tmp/agentviz-src | |
| # Check out the exact resolved commit for deterministic builds | |
| if ! git checkout "$TARGET_SHA"; then | |
| echo "::error::Failed to check out AGENTVIZ commit $TARGET_SHA" | |
| exit 1 | |
| fi | |
| npm ci | |
| npm run build | |
| mkdir -p /tmp/agentviz-dist | |
| cp -r dist/* /tmp/agentviz-dist/ | |
| - name: Deploy to GitHub Pages | |
| run: | | |
| cd /tmp | |
| REPO_URL="https://x-access-token:${{ secrets.GITHUB_TOKEN }}@github.com/${{ github.repository }}.git" | |
| if git ls-remote --exit-code --heads "$REPO_URL" gh-pages > /dev/null 2>&1; then | |
| git clone --branch gh-pages --single-branch "$REPO_URL" deploy | |
| else | |
| mkdir deploy && cd deploy && git init && git checkout -b gh-pages | |
| git remote add origin "$REPO_URL" | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| cd /tmp | |
| fi | |
| cd /tmp/deploy | |
| git config user.name "github-actions[bot]" | |
| git config user.email "github-actions[bot]@users.noreply.github.com" | |
| # Copy data from both branches | |
| mkdir -p data | |
| cp /tmp/gh-pages/data/*.json data/ | |
| # Copy dashboard UI from source tree | |
| cp ${{ github.workspace }}/eng/dashboard/dashboard.html index.html | |
| cp ${{ github.workspace }}/eng/dashboard/dashboard.js dashboard.js | |
| cp ${{ github.workspace }}/eng/dashboard/token-usage.js token-usage.js | |
| # Deploy AGENTVIZ SPA (skip if already present and unchanged) | |
| if [ "${{ steps.check-replay.outputs.skip }}" != "true" ]; then | |
| rm -rf replay | |
| mkdir -p replay | |
| if [ -d /tmp/agentviz-dist ]; then | |
| cp -r /tmp/agentviz-dist/* replay/ | |
| else | |
| echo "::error::No AGENTVIZ build artifacts found" | |
| exit 1 | |
| fi | |
| echo "${{ steps.check-replay.outputs.target_sha }}" > replay/.agentviz-commit | |
| fi | |
| git add . | |
| git diff --cached --quiet && echo "No changes to deploy" && exit 0 | |
| git commit -m "Update dashboard" | |
| git push origin gh-pages |