modelcontextprotocol · koic · Mar 1, 2026
diff --git a/.github/workflows/conformance.yml b/.github/workflows/conformance.yml
@@ -0,0 +1,29 @@
+name: Conformance Tests
+
+on:
+  push:
+    branches: [main]
+  pull_request:
+  workflow_dispatch:
+
+concurrency:
+  group: conformance-${{ github.ref }}
+  cancel-in-progress: true
+
+permissions:
+  contents: read
+
+jobs:
+  server-conformance:
+    runs-on: ubuntu-latest
+    continue-on-error: true
+    steps:
+      - uses: actions/checkout@v6
+      - uses: ruby/setup-ruby@v1
+        with:
+          ruby-version: '4.0' # Specify the latest supported Ruby version.
+          bundler-cache: true
+      - uses: actions/setup-node@v4
+        with:
+          node-version: '24' # Specify the latest Node.js version.
+      - run: bundle exec rake conformance
diff --git a/README.md b/README.md
@@ -1028,6 +1028,12 @@ The client provides a wrapper class for tools returned by the server:
 
 This class provides easy access to tool properties like name, description, input schema, and output schema.
 
+## Conformance Testing
+
+The `conformance/` directory contains a test server and runner that validate the SDK against the MCP specification using [`@modelcontextprotocol/conformance`](https://github.com/modelcontextprotocol/conformance).
+
+See [conformance/README.md](conformance/README.md) for usage instructions.
+
 ## Documentation
 
 - [SDK API documentation](https://rubydoc.info/gems/mcp)

diff --git a/Rakefile b/Rakefile
@@ -14,4 +14,46 @@ require "rubocop/rake_task"
 
 RuboCop::RakeTask.new
 
-task default: [:test, :rubocop]
+task default: [:rubocop, :test, :conformance]
+
+desc "Run MCP conformance tests (PORT, SCENARIO, SPEC_VERSION, VERBOSE)"
+task :conformance do |t|
+  next unless npx_available?(t.name)
+
+  require_relative "conformance/runner"
+
+  options = {}
+  options[:port] = Integer(ENV["PORT"]) if ENV["PORT"]
+  options[:scenario] = ENV["SCENARIO"] if ENV["SCENARIO"]
+  options[:spec_version] = ENV["SPEC_VERSION"] if ENV["SPEC_VERSION"]
+  options[:verbose] = true if ENV["VERBOSE"]
+
+  Conformance::Runner.new(**options).run
+end
+
+desc "List available conformance scenarios"
+task :conformance_list do |t|
+  next unless npx_available?(t.name)
+
+  system("npx", "--yes", "@modelcontextprotocol/conformance", "list", "--server")
+end
+
+desc "Start the conformance server (PORT)"
+task :conformance_server do
+  require_relative "conformance/server"
+
+  options = {}
+  options[:port] = Integer(ENV["PORT"]) if ENV["PORT"]
+
+  Conformance::Server.new(**options).start
+rescue Interrupt
+  puts "Exiting..."
+  exit
+end
+
+def npx_available?(task_name)
+  return true if system("which", "npx", out: File::NULL, err: File::NULL)
+
+  warn("Skipping #{task_name}: npx is not installed. Install Node.js to run this task: https://nodejs.org/")
+  false
+end
diff --git a/conformance/README.md b/conformance/README.md
@@ -0,0 +1,103 @@
+# MCP Conformance Tests
+
+Validates the Ruby SDK's conformance to the MCP specification using [`@modelcontextprotocol/conformance`](https://github.com/modelcontextprotocol/conformance).
+
+## Prerequisites
+
+- Node.js (for `npx`)
+- `bundle install` completed
+
+## Running the Tests
+
+### Run all scenarios
+
+```bash
+bundle exec rake conformance
+```
+
+Starts the conformance server, runs all active scenarios against it, prints a pass/fail
+summary for each scenario, and exits with a non-zero status code if any unexpected failures
+are detected. Scenarios listed in `expected_failures.yml` are allowed to fail without
+affecting the exit code.
+
+### Environment variables
+
+| Variable       | Description                          | Default |
+|----------------|--------------------------------------|---------|
+| `PORT`         | Server port                          | `9292`  |
+| `SCENARIO`     | Run a single scenario by name        | (all)   |
+| `SPEC_VERSION` | Filter scenarios by spec version     | (all)   |
+| `VERBOSE`      | Show raw JSON output when set        | (off)   |
+
+```bash
+# Run a single scenario
+bundle exec rake conformance SCENARIO=ping
+
+# Use a different port with verbose output
+bundle exec rake conformance PORT=3000 VERBOSE=1
+
+# Start the server on a specific port
+bundle exec rake conformance_server PORT=3000
+```
+
+### Start the server and test separately
+
+```bash
+# Terminal 1: start the server
+bundle exec rake conformance_server
+
+# Terminal 2: run all scenarios
+npx @modelcontextprotocol/conformance server --url http://localhost:9292/mcp
+
+# Terminal 2: run a single scenario
+npx @modelcontextprotocol/conformance server --url http://localhost:9292/mcp --scenario ping
+```
+
+Keeps the server alive between test runs, which avoids the startup overhead when iterating
+on a single scenario. Stop the server with Ctrl+C when done.
+
+### List available scenarios
+
+```bash
+bundle exec rake conformance_list
+```
+
+Prints all scenario names that can be passed to `SCENARIO`.
+
+## SDK Tier Report
+
+The [MCP SDK Tier system](https://modelcontextprotocol.io/community/sdk-tiers) requires SDK
+maintainers to self-assess and report results to the SDK Working Group via
+[modelcontextprotocol/modelcontextprotocol issues](https://github.com/modelcontextprotocol/modelcontextprotocol/issues).
+
+To generate a full tier assessment report, use the `/mcp-sdk-tier-audit` slash command from
+the [modelcontextprotocol/conformance](https://github.com/modelcontextprotocol/conformance)
+repository with the conformance server running:
+
+```bash
+# Terminal 1 (this repository): start the conformance server
+bundle exec rake conformance_server
+
+# Terminal 2 (conformance repository): run the tier audit skill as a slash command in Claude Code
+/mcp-sdk-tier-audit /path/to/modelcontextprotocol/ruby-sdk http://localhost:9292/mcp
+```
+
+The skill evaluates conformance pass rate, issue label taxonomy, triage metrics, documentation
+coverage, and policy compliance, then produces a markdown report suitable for tier advancement
+submissions.
+
+## File Structure
+
+```
+conformance/
+  server.rb              # Conformance server (Rack + Puma, default port 9292)
+  runner.rb              # Starts the server, runs npx conformance, exits with result code
+  expected_failures.yml  # Baseline of known-failing scenarios
+  README.md              # This file
+```
+
+## Known Limitations
+
+Known-failing scenarios are registered in `conformance/expected_failures.yml`. They are allowed to
+fail without affecting the exit code and are tracked to catch regressions.
+These are shown in the output of `bundle exec rake conformance`.
diff --git a/conformance/expected_failures.yml b/conformance/expected_failures.yml
@@ -0,0 +1,9 @@
+server:
+  # Server-to-client requests (sampling/createMessage, elicitation/create) are not
+  # implemented: `Transport#send_request` does not exist in the current SDK.
+  - tools-call-sampling
+  - tools-call-elicitation
+  - elicitation-sep1034-defaults
+  - elicitation-sep1330-enums
+  # TODO: The SDK does not extract `_meta.progressToken` from tool call requests or deliver `notifications/progress` to tools.
+  - tools-call-with-progress
diff --git a/conformance/runner.rb b/conformance/runner.rb
@@ -0,0 +1,101 @@
+# frozen_string_literal: true
+
+# Starts the conformance server and runs `npx @modelcontextprotocol/conformance` against it.
+require "English"
+require "net/http"
+require_relative "server"
+
+module Conformance
+  class Runner
+    # Empirical timeout: Puma typically starts well within this window.
+    SERVER_START_TIMEOUT = 20
+    SERVER_POLL_INTERVAL = 0.5
+    SERVER_HEALTH_CHECK_RETRIES = (SERVER_START_TIMEOUT / SERVER_POLL_INTERVAL).to_i
+
+    def initialize(port: Server::DEFAULT_PORT, scenario: nil, spec_version: nil, verbose: false)
+      @port = port
+      @scenario = scenario
+      @spec_version = spec_version
+      @verbose = verbose
+    end
+
+    def run
+      command = build_command
+      server_pid = start_server
+
+      run_conformance(command, server_pid: server_pid)
+    end
+
+    private
+
+    def build_command
+      expected_failures_yml = File.expand_path("expected_failures.yml", __dir__)
+
+      npx_command = ["npx", "--yes", "@modelcontextprotocol/conformance", "server", "--url", "http://localhost:#{@port}/mcp"]
+      npx_command += ["--scenario", @scenario] if @scenario
+      npx_command += ["--spec-version", @spec_version] if @spec_version
+      npx_command += ["--verbose"] if @verbose
+      npx_command += ["--expected-failures", expected_failures_yml]
+      npx_command
+    end
+
+    def start_server
+      puts "Starting conformance server on port #{@port}..."
+
+      server_pid = fork do
+        Conformance::Server.new(port: @port).start
+      end
+
+      health_url = URI("http://localhost:#{@port}/health")
+      ready = false
+      SERVER_HEALTH_CHECK_RETRIES.times do
+        begin
+          response = Net::HTTP.get_response(health_url)
+          if response.code == "200"
+            ready = true
+            break
+          end
+        rescue Errno::ECONNREFUSED, Errno::ECONNRESET, Net::ReadTimeout
+          # not ready yet
+        end
+        sleep(SERVER_POLL_INTERVAL)
+      end
+
+      unless ready
+        warn("ERROR: Conformance server did not start within #{SERVER_START_TIMEOUT} seconds")
+        terminate_server(server_pid)
+        exit(1)
+      end
+
+      puts "Server ready. Running conformance tests..."
+
+      server_pid
+    end
+
+    def run_conformance(command, server_pid:)
+      puts "Command: #{command.join(" ")}\n\n"
+
+      conformance_exit_code = nil
+      begin
+        system(*command)
+        conformance_exit_code = $CHILD_STATUS.exitstatus
+      ensure
+        terminate_server(server_pid)
+      end
+
+      exit(conformance_exit_code || 1)
+    end
+
+    def terminate_server(pid)
+      Process.kill("TERM", pid)
+    rescue Errno::ESRCH
+      # process already exited
+    ensure
+      begin
+        Process.wait(pid)
+      rescue Errno::ECHILD
+        # already reaped
+      end
+    end
+  end
+end