diff --git a/.ci/README.md b/.ci/README.md
new file mode 100644
index 0000000..0bd59bd
--- /dev/null
+++ b/.ci/README.md
@@ -0,0 +1,92 @@
+# .ci — CI 镜像与流水线
+
+```
+.ci/
+├── config.yaml              # 统一配置（镜像、job 定义）
+├── build.py                 # 镜像构建
+├── run.py                   # CI 流水线执行
+└── images/
+    ├── nvidia/Dockerfile
+    └── ascend/Dockerfile
+```
+
+**前置依赖**：Docker、Python 3.10+、`pip install pyyaml`
+
+---
+
+## 配置文件 `config.yaml`
+
+```yaml
+repo:
+  url: https://github.com/InfiniTensor/InfiniOps.git
+  branch: master
+
+images:
+  nvidia:
+    dockerfile: .ci/images/nvidia/
+    build_args:
+      BASE_IMAGE: nvcr.io/nvidia/pytorch:24.10-py3
+
+jobs:
+  nvidia_gpu:
+    image: latest            # latest | <commit-hash>
+    platform: nvidia
+    resources:
+      gpu_ids: "0"           # "0" | "0,2" | "all"
+      memory: 32GB
+      shm_size: 16g          # 避免 PyTorch SHMEM 不足
+      timeout: 3600          # 容器内脚本最大运行秒数
+    setup: pip install .[dev]
+    env:                     # 可选，注入容器环境变量
+      MY_VAR: value
+    stages:
+      - name: test
+        run: pytest tests/ -n auto -v --tb=short --junitxml=/workspace/results/test-results.xml
+```
+
+---
+
+## 镜像构建 `build.py`
+
+| 参数 | 说明 |
+|---|---|
+| `--platform nvidia\|ascend\|all` | 构建平台，默认 `all` |
+| `--force` | 跳过 Dockerfile 变更检测 |
+| `--dry-run` | 打印命令不执行 |
+
+```bash
+# 检测变更后构建（无变更自动跳过）
+python .ci/build.py --platform nvidia
+
+# 强制构建
+python .ci/build.py --platform nvidia --force
+```
+
+构建产物以宿主机本地镜像 tag 存储：`infiniops-ci/<platform>:<commit-hash>` 和 `:latest`。
+代理、`no_proxy` 自动从宿主机环境变量透传到 `docker build`。
+
+> `--push` 为预留功能，需在 `config.yaml` 中配置 `registry` 段后方可使用。
+
+---
+
+## 流水线执行 `run.py`
+
+| 参数 | 说明 |
+|---|---|
+| `--branch` | 覆盖克隆分支 |
+| `--stage` | 只运行指定 stage |
+| `--image-tag` | 覆盖镜像 tag |
+| `--gpu-id` | 覆盖 GPU 设备 ID |
+| `--results-dir` | 宿主机目录，挂载到容器 `/workspace/results` |
+| `--dry-run` | 打印 docker 命令不执行 |
+
+```bash
+# 运行默认 job
+python .ci/run.py --branch feat/my-feature --results-dir ./ci-results
+
+# 只跑 test stage，预览命令
+python .ci/run.py --stage test --dry-run
+```
+
+容器内执行流程：`git clone` → `checkout` → `setup` → stages。
+代理从宿主机透传，测试结果写入 `--results-dir`。每次运行均为干净环境（不挂载宿主机 pip 缓存）。
diff --git a/.ci/build.py b/.ci/build.py
new file mode 100644
index 0000000..2339319
--- /dev/null
+++ b/.ci/build.py
@@ -0,0 +1,285 @@
+#!/usr/bin/env python3
+"""CI image builder: detect changes, build, tag, and optionally push Docker images."""
+
+import argparse
+import json
+import os
+import shlex
+import subprocess
+import sys
+from pathlib import Path
+
+try:
+    import yaml
+except ImportError:
+    print(
+        "error: pyyaml is required. Install with: pip install pyyaml", file=sys.stderr
+    )
+    sys.exit(1)
+
+
+def load_config(path):
+    with open(path, encoding="utf-8") as f:
+        return yaml.safe_load(f)
+
+
+def get_git_commit(ref="HEAD"):
+    result = subprocess.run(
+        ["git", "rev-parse", "--short", ref],
+        capture_output=True,
+        text=True,
+    )
+
+    if result.returncode != 0:
+        print(f"error: failed to get commit hash for `{ref}`", file=sys.stderr)
+        sys.exit(1)
+
+    return result.stdout.strip()
+
+
+def has_dockerfile_changed(dockerfile_dir, base_ref="HEAD~1"):
+    """Check if any file under `dockerfile_dir` changed since `base_ref`."""
+    result = subprocess.run(
+        ["git", "diff", "--name-only", base_ref, "--", dockerfile_dir],
+        capture_output=True,
+        text=True,
+    )
+
+    if result.returncode != 0:
+        print(
+            "warning: git diff failed (shallow clone or initial commit?);"
+            " assuming Dockerfile changed",
+            file=sys.stderr,
+        )
+        return True
+
+    return bool(result.stdout.strip())
+
+
+def docker_login(registry_cfg, dry_run):
+    """Log in to the registry using `credentials_env` token.
+
+    Returns True on success.
+
+    NOTE: Registry support is currently unused (`config.yaml` has no registry
+    section). Retained for future integration with an external image management
+    system.
+    """
+    credentials_env = registry_cfg.get("credentials_env")
+    registry_url = registry_cfg.get("url", "")
+
+    if not credentials_env or not registry_url:
+        return True
+
+    token = os.environ.get(credentials_env)
+
+    if not token:
+        print(
+            f"error: {credentials_env} not set, cannot login",
+            file=sys.stderr,
+        )
+        return False
+
+    if dry_run:
+        print(
+            f"[dry-run] echo <token> | docker login {registry_url}"
+            " --username token --password-stdin"
+        )
+        return True
+
+    result = subprocess.run(
+        ["docker", "login", registry_url, "--username", "token", "--password-stdin"],
+        input=token,
+        text=True,
+    )
+
+    if result.returncode != 0:
+        print("error: docker login failed", file=sys.stderr)
+        return False
+
+    return True
+
+
+def build_image_tag(registry_url, project, platform, tag):
+    if registry_url:
+        return f"{registry_url}/{project}/{platform}:{tag}"
+
+    return f"{project}-ci/{platform}:{tag}"
+
+
+def build_image(platform, platform_cfg, registry_cfg, commit, push, dry_run, logged_in):
+    """Build a single platform image. Returns True on success."""
+    registry_url = registry_cfg.get("url", "")
+    project = registry_cfg.get("project", "infiniops")
+    dockerfile_dir = platform_cfg["dockerfile"]
+    commit_tag = build_image_tag(registry_url, project, platform, commit)
+    latest_tag = build_image_tag(registry_url, project, platform, "latest")
+
+    build_args_cfg = platform_cfg.get("build_args", {})
+    build_cmd = ["docker", "build", "--network", "host"]
+
+    for key, value in build_args_cfg.items():
+        build_cmd.extend(["--build-arg", f"{key}={value}"])
+
+    for proxy_var in ("HTTP_PROXY", "HTTPS_PROXY", "NO_PROXY"):
+        proxy_val = os.environ.get(proxy_var) or os.environ.get(proxy_var.lower())
+
+        if proxy_val:
+            build_cmd.extend(["--build-arg", f"{proxy_var}={proxy_val}"])
+            build_cmd.extend(["--build-arg", f"{proxy_var.lower()}={proxy_val}"])
+
+    private_sdk = platform_cfg.get("private_sdk", {})
+
+    if private_sdk:
+        source_env = private_sdk.get("source_env", "")
+        sdk_url = os.environ.get(source_env, "") if source_env else ""
+
+        if sdk_url:
+            build_cmd.extend(["--build-arg", f"PRIVATE_SDK_URL={sdk_url}"])
+
+    build_cmd.extend(["-t", commit_tag, "-t", latest_tag, dockerfile_dir])
+
+    if dry_run:
+        print(f"[dry-run] {shlex.join(build_cmd)}")
+
+        if push:
+            if not logged_in:
+                print("[dry-run] (skipping push: docker login failed)")
+            else:
+                print(f"[dry-run] docker push {commit_tag}")
+                print(f"[dry-run] docker push {latest_tag}")
+
+        return True
+
+    print(f"==> building {platform}: {commit_tag}", file=sys.stderr)
+    result = subprocess.run(build_cmd)
+
+    if result.returncode != 0:
+        error = {
+            "stage": "build",
+            "platform": platform,
+            "tag": commit_tag,
+            "exit_code": result.returncode,
+        }
+        print(json.dumps(error), file=sys.stderr)
+
+        return False
+
+    if push:
+        if not logged_in:
+            print("error: docker login failed, cannot push", file=sys.stderr)
+            return False
+
+        for tag in (commit_tag, latest_tag):
+            print(f"==> pushing {tag}", file=sys.stderr)
+            push_result = subprocess.run(["docker", "push", tag])
+
+            if push_result.returncode != 0:
+                error = {
+                    "stage": "push",
+                    "platform": platform,
+                    "tag": tag,
+                    "exit_code": push_result.returncode,
+                }
+                print(json.dumps(error), file=sys.stderr)
+
+                return False
+
+    return True
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Build CI Docker images")
+    parser.add_argument(
+        "--platform",
+        type=str,
+        default="all",
+        help="Platform to build: nvidia, ascend, or all (default: all)",
+    )
+    parser.add_argument(
+        "--config",
+        type=Path,
+        default=Path(__file__).resolve().parent / "config.yaml",
+        help="Path to config.yaml",
+    )
+    parser.add_argument(
+        "--commit",
+        type=str,
+        default="HEAD",
+        help="Git ref for tagging the image (default: HEAD)",
+    )
+    parser.add_argument(
+        "--push",
+        action="store_true",
+        help="Push images to registry after building (requires registry in config)",
+    )
+    parser.add_argument(
+        "--force",
+        action="store_true",
+        help="Skip change detection and force build",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print commands without executing",
+    )
+    args = parser.parse_args()
+
+    config = load_config(args.config)
+    registry_cfg = config.get("registry", {})
+    images_cfg = config.get("images", {})
+
+    if not images_cfg:
+        print("error: no `images` section in config", file=sys.stderr)
+        sys.exit(1)
+
+    if args.platform == "all":
+        platforms = list(images_cfg.keys())
+    else:
+        if args.platform not in images_cfg:
+            print(
+                f"error: platform `{args.platform}` not found in config",
+                file=sys.stderr,
+            )
+            sys.exit(1)
+        platforms = [args.platform]
+
+    commit = get_git_commit(args.commit)
+    logged_in = docker_login(registry_cfg, args.dry_run) if args.push else True
+    failed = False
+
+    for platform in platforms:
+        platform_cfg = images_cfg[platform]
+        dockerfile_dir = platform_cfg["dockerfile"]
+
+        if not Path(dockerfile_dir).is_dir():
+            print(
+                f"warning: dockerfile directory `{dockerfile_dir}` does not exist,"
+                f" skipping {platform}",
+                file=sys.stderr,
+            )
+            continue
+
+        if not args.force and not has_dockerfile_changed(dockerfile_dir):
+            print(f"==> {platform}: no changes detected, skipping", file=sys.stderr)
+            continue
+
+        ok = build_image(
+            platform,
+            platform_cfg,
+            registry_cfg,
+            commit,
+            args.push,
+            args.dry_run,
+            logged_in=logged_in,
+        )
+
+        if not ok:
+            failed = True
+
+    if failed:
+        sys.exit(1)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.ci/config.yaml b/.ci/config.yaml
new file mode 100644
index 0000000..a86174a
--- /dev/null
+++ b/.ci/config.yaml
@@ -0,0 +1,33 @@
+repo:
+  url: https://github.com/InfiniTensor/InfiniOps.git
+  branch: master
+
+images:
+  nvidia:
+    dockerfile: .ci/images/nvidia/
+    build_args:
+      BASE_IMAGE: nvcr.io/nvidia/pytorch:24.10-py3
+  ascend:                              # TODO: Ascend image is not ready yet
+    dockerfile: .ci/images/ascend/
+    build_args:
+      BASE_IMAGE: ascendhub.huawei.com/public-ascendhub/ascend-pytorch:24.0.0
+    private_sdk:
+      source_env: PRIVATE_SDK_URL
+
+jobs:
+  nvidia_gpu:
+    image: latest
+    platform: nvidia
+    resources:
+      gpu_ids: "0"                       # 指定 GPU ID，如 "0" "0,2" "all"
+      memory: 32GB
+      shm_size: 16g                      # 避免 PyTorch 默认 64MB SHMEM 不足
+      timeout: 3600
+
+    setup: pip install .[dev]
+    # env:                             # 可选，注入容器环境变量
+    #   MY_VAR: value
+
+    stages:
+      - name: test
+        run: pytest tests/ -n 8 -v --tb=short --junitxml=/workspace/results/test-results.xml
diff --git a/.ci/images/ascend/Dockerfile b/.ci/images/ascend/Dockerfile
new file mode 100644
index 0000000..66392eb
--- /dev/null
+++ b/.ci/images/ascend/Dockerfile
@@ -0,0 +1,39 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+ARG HTTP_PROXY
+ARG HTTPS_PROXY
+ARG NO_PROXY
+ARG http_proxy
+ARG https_proxy
+ARG no_proxy
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        git \
+        cmake \
+        ninja-build \
+        coreutils \
+        curl \
+        libclang-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+ARG PRIVATE_SDK_URL
+RUN if [ -n "$PRIVATE_SDK_URL" ]; then \
+        curl -fSL "$PRIVATE_SDK_URL" -o /tmp/sdk.run && \
+        chmod +x /tmp/sdk.run && /tmp/sdk.run --quiet && \
+        rm /tmp/sdk.run; \
+    fi
+
+RUN pip install --no-cache-dir \
+    scikit-build-core \
+    pybind11 \
+    libclang \
+    pytest \
+    pytest-cov \
+    pytest-xdist \
+    pyyaml
+
+WORKDIR /workspace
diff --git a/.ci/images/nvidia/Dockerfile b/.ci/images/nvidia/Dockerfile
new file mode 100644
index 0000000..74ccfd1
--- /dev/null
+++ b/.ci/images/nvidia/Dockerfile
@@ -0,0 +1,31 @@
+ARG BASE_IMAGE
+FROM ${BASE_IMAGE}
+
+ENV DEBIAN_FRONTEND=noninteractive
+
+ARG HTTP_PROXY
+ARG HTTPS_PROXY
+ARG NO_PROXY
+ARG http_proxy
+ARG https_proxy
+ARG no_proxy
+
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        git \
+        cmake \
+        ninja-build \
+        coreutils \
+        libclang-dev \
+    && rm -rf /var/lib/apt/lists/*
+
+RUN pip install --no-cache-dir \
+    scikit-build-core \
+    pybind11 \
+    libclang \
+    pytest \
+    pytest-cov \
+    pytest-xdist \
+    pyyaml
+
+WORKDIR /workspace
diff --git a/.ci/run.py b/.ci/run.py
new file mode 100644
index 0000000..0c8d648
--- /dev/null
+++ b/.ci/run.py
@@ -0,0 +1,289 @@
+#!/usr/bin/env python3
+"""Standalone Docker CI runner: clone repo, setup, run stages. Output to stdout."""
+
+import argparse
+import os
+import shlex
+import subprocess
+import sys
+from datetime import datetime
+from pathlib import Path
+
+try:
+    import yaml
+except ImportError:
+    print(
+        "error: pyyaml is required. Install with: pip install pyyaml", file=sys.stderr
+    )
+    sys.exit(1)
+
+
+def load_config(path):
+    with open(path, encoding="utf-8") as f:
+        return yaml.safe_load(f)
+
+
+def get_git_commit(ref="HEAD"):
+    result = subprocess.run(
+        ["git", "rev-parse", "--short", ref],
+        capture_output=True,
+        text=True,
+    )
+
+    if result.returncode != 0:
+        return "unknown"
+
+    return result.stdout.strip()
+
+
+def build_results_dir(base, platform, stages, commit):
+    """Build a results directory path: `{base}/{platform}_{stages}_{commit}_{timestamp}`."""
+    stage_names = "+".join(s["name"] for s in stages)
+    timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+    dirname = f"{platform}_{stage_names}_{commit}_{timestamp}"
+
+    return Path(base) / dirname
+
+
+def resolve_image(config, platform, image_tag):
+    """Resolve an image reference to a full image name.
+
+    Accepts `stable`, `latest`, or a commit hash as `image_tag`. When config
+    contains a registry section, returns a registry-prefixed URL. Otherwise
+    returns a local tag (current default).
+    """
+    registry = config.get("registry", {})
+    registry_url = registry.get("url", "")
+    project = registry.get("project", "infiniops")
+
+    if not registry_url:
+        return f"{project}-ci/{platform}:{image_tag}"
+
+    return f"{registry_url}/{project}/{platform}:{image_tag}"
+
+
+def build_runner_script():
+    return r"""
+set -e
+cd /workspace
+mkdir -p /workspace/results
+git clone "$REPO_URL" repo
+cd repo
+git checkout "$BRANCH"
+echo "========== Setup =========="
+eval "$SETUP_CMD"
+set +e
+failed=0
+for i in $(seq 1 "$NUM_STAGES"); do
+  name_var="STAGE_${i}_NAME"
+  cmd_var="STAGE_${i}_CMD"
+  name="${!name_var}"
+  cmd="${!cmd_var}"
+  echo "========== Stage: $name =========="
+  eval "$cmd" || failed=1
+done
+echo "========== Summary =========="
+if [ -n "$HOST_UID" ] && [ -n "$HOST_GID" ]; then
+  chown -R "$HOST_UID:$HOST_GID" /workspace/results 2>/dev/null || true
+fi
+exit $failed
+"""
+
+
+def build_docker_args(
+    config,
+    job_name,
+    repo_url,
+    branch,
+    stages,
+    workdir,
+    image_tag_override,
+    gpu_id_override=None,
+    results_dir=None,
+):
+    job = config["jobs"][job_name]
+    platform = job.get("platform", "nvidia")
+    image_tag = image_tag_override or job.get("image", "latest")
+    image = resolve_image(config, platform, image_tag)
+    resources = job.get("resources", {})
+    setup_raw = job.get("setup", "pip install .[dev]")
+
+    if isinstance(setup_raw, list):
+        setup_cmd = "\n".join(setup_raw)
+    else:
+        setup_cmd = setup_raw
+
+    args = [
+        "docker",
+        "run",
+        "--rm",
+        "--network",
+        "host",
+        "-i",
+        "-w",
+        workdir,
+        "-e",
+        f"REPO_URL={repo_url}",
+        "-e",
+        f"BRANCH={branch}",
+        "-e",
+        f"SETUP_CMD={setup_cmd}",
+        "-e",
+        f"NUM_STAGES={len(stages)}",
+        "-e",
+        f"HOST_UID={os.getuid()}",
+        "-e",
+        f"HOST_GID={os.getgid()}",
+    ]
+
+    for proxy_var in ("HTTP_PROXY", "HTTPS_PROXY", "NO_PROXY"):
+        proxy_val = os.environ.get(proxy_var) or os.environ.get(proxy_var.lower())
+
+        if proxy_val:
+            args.extend(["-e", f"{proxy_var}={proxy_val}"])
+            args.extend(["-e", f"{proxy_var.lower()}={proxy_val}"])
+
+    for key, value in job.get("env", {}).items():
+        args.extend(["-e", f"{key}={value}"])
+
+    if results_dir:
+        args.extend(["-v", f"{results_dir.resolve()}:/workspace/results"])
+
+    for i, s in enumerate(stages):
+        args.append("-e")
+        args.append(f"STAGE_{i + 1}_NAME={s['name']}")
+        args.append("-e")
+        args.append(f"STAGE_{i + 1}_CMD={s['run']}")
+
+    gpu_id = gpu_id_override or str(resources.get("gpu_ids", ""))
+    gpu_count = resources.get("gpu_count", 0)
+
+    if gpu_id:
+        if gpu_id == "all":
+            args.extend(["--gpus", "all"])
+        else:
+            args.extend(["--gpus", f'"device={gpu_id}"'])
+    elif gpu_count and gpu_count > 0:
+        args.extend(["--gpus", f"count={gpu_count}"])
+
+    memory = resources.get("memory")
+
+    if memory:
+        mem = str(memory).lower().replace("gb", "g").replace("mb", "m")
+
+        if not mem.endswith("g") and not mem.endswith("m"):
+            mem = f"{mem}g"
+
+        args.extend(["--memory", mem])
+
+    shm_size = resources.get("shm_size")
+
+    if shm_size:
+        args.extend(["--shm-size", str(shm_size)])
+
+    timeout_sec = resources.get("timeout")
+    args.append(image)
+
+    if timeout_sec:
+        # Requires coreutils `timeout` inside the container image.
+        args.extend(["timeout", str(timeout_sec)])
+
+    args.extend(["bash", "-c", build_runner_script().strip()])
+
+    return args
+
+
+def main():
+    parser = argparse.ArgumentParser(description="Run Docker CI pipeline")
+    parser.add_argument(
+        "--config",
+        type=Path,
+        default=Path(__file__).resolve().parent / "config.yaml",
+        help="Path to config.yaml",
+    )
+    parser.add_argument("--branch", type=str, help="Override repo branch")
+    parser.add_argument("--job", type=str, help="Job name to run (default: first job)")
+    parser.add_argument(
+        "--stage",
+        type=str,
+        help="Run only this stage name (still runs setup first)",
+    )
+    parser.add_argument(
+        "--image-tag",
+        type=str,
+        help="Override image tag (stable, latest, or commit hash)",
+    )
+    parser.add_argument(
+        "--gpu-id",
+        type=str,
+        help='GPU device IDs to use, e.g. "0", "0,2", "all"',
+    )
+    parser.add_argument(
+        "--results-dir",
+        type=Path,
+        default=Path("ci-results"),
+        help="Base directory for test results (default: ./ci-results)",
+    )
+    parser.add_argument(
+        "--dry-run",
+        action="store_true",
+        help="Print docker command and exit",
+    )
+    args = parser.parse_args()
+
+    config = load_config(args.config)
+    repo = config.get("repo", {})
+    repo_url = repo.get("url", "https://github.com/InfiniTensor/InfiniOps.git")
+    branch = args.branch or repo.get("branch", "master")
+
+    jobs = config.get("jobs", {})
+
+    if not jobs:
+        print("error: no jobs in config", file=sys.stderr)
+        sys.exit(1)
+
+    job_name = args.job or next(iter(jobs))
+
+    if job_name not in jobs:
+        print(f"error: job {job_name!r} not in config", file=sys.stderr)
+        sys.exit(1)
+
+    job = jobs[job_name]
+    all_stages = job.get("stages", [])
+
+    if args.stage:
+        stages = [s for s in all_stages if s["name"] == args.stage]
+
+        if not stages:
+            print(f"error: stage {args.stage!r} not found", file=sys.stderr)
+            sys.exit(1)
+    else:
+        stages = all_stages
+
+    platform = job.get("platform", "nvidia")
+    commit = get_git_commit()
+    results_dir = build_results_dir(args.results_dir, platform, stages, commit)
+
+    workdir = "/workspace"
+    docker_args = build_docker_args(
+        config,
+        job_name,
+        repo_url,
+        branch,
+        stages,
+        workdir,
+        args.image_tag,
+        gpu_id_override=args.gpu_id,
+        results_dir=results_dir,
+    )
+
+    if args.dry_run:
+        print(shlex.join(docker_args))
+        return
+
+    results_dir.mkdir(parents=True, exist_ok=True)
+    sys.exit(subprocess.run(docker_args).returncode)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/.ci/tests/__init__.py b/.ci/tests/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/.ci/tests/conftest.py b/.ci/tests/conftest.py
new file mode 100644
index 0000000..98079cd
--- /dev/null
+++ b/.ci/tests/conftest.py
@@ -0,0 +1,42 @@
+import sys
+from pathlib import Path
+
+# Allow `import run` and `import build` directly.
+sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
+
+import pytest
+
+
+@pytest.fixture
+def minimal_config():
+    return {
+        "repo": {
+            "url": "https://github.com/InfiniTensor/InfiniOps.git",
+            "branch": "master",
+        },
+        "images": {
+            "nvidia": {
+                "dockerfile": ".ci/images/nvidia/",
+                "build_args": {"BASE_IMAGE": "nvcr.io/nvidia/pytorch:24.10-py3"},
+            }
+        },
+        "jobs": {
+            "nvidia_gpu": {
+                "image": "latest",
+                "platform": "nvidia",
+                "resources": {
+                    "gpu_ids": "0",
+                    "memory": "32GB",
+                    "shm_size": "16g",
+                    "timeout": 3600,
+                },
+                "setup": "pip install .[dev]",
+                "stages": [
+                    {
+                        "name": "test",
+                        "run": "pytest tests/ -v",
+                    }
+                ],
+            }
+        },
+    }
diff --git a/.ci/tests/test_build.py b/.ci/tests/test_build.py
new file mode 100644
index 0000000..fa2f292
--- /dev/null
+++ b/.ci/tests/test_build.py
@@ -0,0 +1,186 @@
+import build
+
+
+# ---------------------------------------------------------------------------
+# build_image_tag
+# ---------------------------------------------------------------------------
+
+
+def test_build_image_tag_with_registry():
+    tag = build.build_image_tag("localhost:5000", "infiniops", "nvidia", "latest")
+    assert tag == "localhost:5000/infiniops/nvidia:latest"
+
+
+def test_build_image_tag_without_registry():
+    tag = build.build_image_tag("", "infiniops", "nvidia", "abc1234")
+    assert tag == "infiniops-ci/nvidia:abc1234"
+
+
+def test_build_image_tag_commit_hash():
+    tag = build.build_image_tag(
+        "registry.example.com:5000", "proj", "ascend", "deadbeef"
+    )
+    assert tag == "registry.example.com:5000/proj/ascend:deadbeef"
+
+
+# ---------------------------------------------------------------------------
+# has_dockerfile_changed
+# ---------------------------------------------------------------------------
+
+
+def test_has_dockerfile_changed_true_when_stdout_nonempty(mocker):
+    mocker.patch(
+        "subprocess.run",
+        return_value=mocker.Mock(returncode=0, stdout="Dockerfile\n"),
+    )
+    assert build.has_dockerfile_changed(".ci/images/nvidia/") is True
+
+
+def test_has_dockerfile_changed_false_when_stdout_empty(mocker):
+    mocker.patch(
+        "subprocess.run",
+        return_value=mocker.Mock(returncode=0, stdout=""),
+    )
+    assert build.has_dockerfile_changed(".ci/images/nvidia/") is False
+
+
+def test_has_dockerfile_changed_true_on_git_error(mocker):
+    # Shallow clone or initial commit: `git diff` returns non-zero.
+    mocker.patch(
+        "subprocess.run",
+        return_value=mocker.Mock(returncode=128, stdout=""),
+    )
+    assert build.has_dockerfile_changed(".ci/images/nvidia/") is True
+
+
+# ---------------------------------------------------------------------------
+# docker_login
+# ---------------------------------------------------------------------------
+
+
+def test_docker_login_no_credentials_env(mocker):
+    run_mock = mocker.patch("subprocess.run")
+    result = build.docker_login({"url": "localhost:5000"}, dry_run=False)
+    assert result is True
+    run_mock.assert_not_called()
+
+
+def test_docker_login_token_not_set(mocker, monkeypatch, capsys):
+    monkeypatch.delenv("REGISTRY_TOKEN", raising=False)
+    run_mock = mocker.patch("subprocess.run")
+    cfg = {"url": "localhost:5000", "credentials_env": "REGISTRY_TOKEN"}
+    result = build.docker_login(cfg, dry_run=False)
+    assert result is False
+    run_mock.assert_not_called()
+
+
+def test_docker_login_dry_run_does_not_call_subprocess(mocker, monkeypatch):
+    monkeypatch.setenv("REGISTRY_TOKEN", "mytoken")
+    run_mock = mocker.patch("subprocess.run")
+    cfg = {"url": "localhost:5000", "credentials_env": "REGISTRY_TOKEN"}
+    result = build.docker_login(cfg, dry_run=True)
+    assert result is True
+    run_mock.assert_not_called()
+
+
+def test_docker_login_success(mocker, monkeypatch):
+    monkeypatch.setenv("REGISTRY_TOKEN", "mytoken")
+    run_mock = mocker.patch(
+        "subprocess.run",
+        return_value=mocker.Mock(returncode=0),
+    )
+    cfg = {"url": "localhost:5000", "credentials_env": "REGISTRY_TOKEN"}
+    result = build.docker_login(cfg, dry_run=False)
+    assert result is True
+    run_mock.assert_called_once()
+    cmd = run_mock.call_args[0][0]
+    assert "docker" in cmd
+    assert "login" in cmd
+
+
+# ---------------------------------------------------------------------------
+# build_image — dry_run and proxy
+# ---------------------------------------------------------------------------
+
+
+def _platform_cfg():
+    return {
+        "dockerfile": ".ci/images/nvidia/",
+        "build_args": {"BASE_IMAGE": "nvcr.io/nvidia/pytorch:24.10-py3"},
+    }
+
+
+def _registry_cfg():
+    return {"url": "localhost:5000", "project": "infiniops"}
+
+
+def test_build_image_dry_run_no_subprocess(mocker, monkeypatch, capsys):
+    monkeypatch.delenv("HTTP_PROXY", raising=False)
+    run_mock = mocker.patch("subprocess.run")
+    build.build_image(
+        "nvidia",
+        _platform_cfg(),
+        _registry_cfg(),
+        "abc1234",
+        push=False,
+        dry_run=True,
+        logged_in=True,
+    )
+    run_mock.assert_not_called()
+    captured = capsys.readouterr()
+    assert "[dry-run]" in captured.out
+
+
+def test_build_image_dry_run_output_contains_image_tag(mocker, monkeypatch, capsys):
+    monkeypatch.delenv("HTTP_PROXY", raising=False)
+    mocker.patch("subprocess.run")
+    build.build_image(
+        "nvidia",
+        _platform_cfg(),
+        _registry_cfg(),
+        "abc1234",
+        push=False,
+        dry_run=True,
+        logged_in=True,
+    )
+    captured = capsys.readouterr()
+    assert "abc1234" in captured.out
+
+
+def test_build_image_proxy_in_build_args(mocker, monkeypatch):
+    monkeypatch.setenv("HTTP_PROXY", "http://proxy.test:3128")
+    run_mock = mocker.patch(
+        "subprocess.run",
+        return_value=mocker.Mock(returncode=0),
+    )
+    build.build_image(
+        "nvidia",
+        _platform_cfg(),
+        _registry_cfg(),
+        "abc1234",
+        push=False,
+        dry_run=False,
+        logged_in=True,
+    )
+    called_cmd = run_mock.call_args[0][0]
+    joined = " ".join(called_cmd)
+    assert "HTTP_PROXY=http://proxy.test:3128" in joined
+    assert "http_proxy=http://proxy.test:3128" in joined
+
+
+def test_build_image_returns_false_on_docker_error(mocker, monkeypatch):
+    monkeypatch.delenv("HTTP_PROXY", raising=False)
+    mocker.patch(
+        "subprocess.run",
+        return_value=mocker.Mock(returncode=1),
+    )
+    result = build.build_image(
+        "nvidia",
+        _platform_cfg(),
+        _registry_cfg(),
+        "abc1234",
+        push=False,
+        dry_run=False,
+        logged_in=True,
+    )
+    assert result is False
diff --git a/.ci/tests/test_run.py b/.ci/tests/test_run.py
new file mode 100644
index 0000000..075546e
--- /dev/null
+++ b/.ci/tests/test_run.py
@@ -0,0 +1,298 @@
+from pathlib import Path
+
+import pytest
+
+import run
+
+
+# ---------------------------------------------------------------------------
+# resolve_image
+# ---------------------------------------------------------------------------
+
+
+def test_resolve_image_with_registry():
+    cfg = {"registry": {"url": "localhost:5000", "project": "infiniops"}}
+    img = run.resolve_image(cfg, "nvidia", "latest")
+    assert img == "localhost:5000/infiniops/nvidia:latest"
+
+
+def test_resolve_image_without_registry(minimal_config):
+    img = run.resolve_image(minimal_config, "nvidia", "abc1234")
+    assert img == "infiniops-ci/nvidia:abc1234"
+
+
+# ---------------------------------------------------------------------------
+# build_runner_script
+# ---------------------------------------------------------------------------
+
+
+def test_runner_script_contains_git_clone():
+    script = run.build_runner_script()
+    assert "git clone" in script
+
+
+def test_runner_script_contains_setup_cmd():
+    script = run.build_runner_script()
+    assert "SETUP_CMD" in script
+
+
+def test_runner_script_exits_on_failure():
+    script = run.build_runner_script()
+    assert "exit $failed" in script
+
+
+def test_runner_script_creates_results_dir():
+    script = run.build_runner_script()
+    assert "mkdir -p /workspace/results" in script
+
+
+# ---------------------------------------------------------------------------
+# build_docker_args — basic structure
+# ---------------------------------------------------------------------------
+
+
+def test_docker_args_basic_structure(minimal_config):
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+    )
+    assert args[0] == "docker"
+    assert "run" in args
+    assert "--rm" in args
+
+
+def test_docker_args_correct_image(minimal_config):
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+    )
+    assert "infiniops-ci/nvidia:latest" in args
+
+
+def test_docker_args_image_tag_override(minimal_config):
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        "abc1234",
+    )
+    assert "infiniops-ci/nvidia:abc1234" in args
+
+
+# ---------------------------------------------------------------------------
+# build_docker_args — proxy passthrough
+# ---------------------------------------------------------------------------
+
+
+def test_docker_args_proxy_present_when_set(minimal_config, monkeypatch):
+    monkeypatch.setenv("HTTP_PROXY", "http://proxy.example.com:8080")
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+    )
+    assert "-e" in args
+    assert "HTTP_PROXY=http://proxy.example.com:8080" in args
+    assert "http_proxy=http://proxy.example.com:8080" in args
+
+
+def test_docker_args_proxy_absent_when_not_set(minimal_config, monkeypatch):
+    monkeypatch.delenv("HTTP_PROXY", raising=False)
+    monkeypatch.delenv("http_proxy", raising=False)
+    monkeypatch.delenv("HTTPS_PROXY", raising=False)
+    monkeypatch.delenv("https_proxy", raising=False)
+    monkeypatch.delenv("NO_PROXY", raising=False)
+    monkeypatch.delenv("no_proxy", raising=False)
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+    )
+
+    for arg in args:
+        assert not arg.startswith("HTTP_PROXY=")
+        assert not arg.startswith("http_proxy=")
+        assert not arg.startswith("HTTPS_PROXY=")
+        assert not arg.startswith("https_proxy=")
+        assert not arg.startswith("NO_PROXY=")
+        assert not arg.startswith("no_proxy=")
+
+
+def test_docker_args_proxy_lowercase_fallback(minimal_config, monkeypatch):
+    monkeypatch.delenv("HTTP_PROXY", raising=False)
+    monkeypatch.setenv("http_proxy", "http://lowercase.proxy:3128")
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+    )
+    assert "HTTP_PROXY=http://lowercase.proxy:3128" in args
+    assert "http_proxy=http://lowercase.proxy:3128" in args
+
+
+# ---------------------------------------------------------------------------
+# build_docker_args — GPU flags
+# ---------------------------------------------------------------------------
+
+
+def _make_args(config, gpu_id_override=None):
+    return run.build_docker_args(
+        config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+        gpu_id_override=gpu_id_override,
+    )
+
+
+def test_docker_args_gpu_device(minimal_config):
+    args = _make_args(minimal_config)
+    idx = args.index("--gpus")
+    assert "device=0" in args[idx + 1]
+
+
+def test_docker_args_gpu_all(minimal_config):
+    minimal_config["jobs"]["nvidia_gpu"]["resources"]["gpu_ids"] = "all"
+    args = _make_args(minimal_config)
+    idx = args.index("--gpus")
+    assert args[idx + 1] == "all"
+
+
+def test_docker_args_no_gpu(minimal_config):
+    minimal_config["jobs"]["nvidia_gpu"]["resources"]["gpu_ids"] = ""
+    minimal_config["jobs"]["nvidia_gpu"]["resources"].pop("gpu_count", None)
+    args = _make_args(minimal_config)
+    assert "--gpus" not in args
+
+
+def test_docker_args_gpu_override(minimal_config):
+    args = _make_args(minimal_config, gpu_id_override="2,3")
+    idx = args.index("--gpus")
+    assert "2,3" in args[idx + 1]
+
+
+# ---------------------------------------------------------------------------
+# build_docker_args — memory format
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.parametrize(
+    "raw,expected",
+    [
+        ("32GB", "32g"),
+        ("512MB", "512m"),
+        ("8", "8g"),
+        ("16gb", "16g"),
+        ("256mb", "256m"),
+    ],
+)
+def test_docker_args_memory_format(minimal_config, raw, expected):
+    minimal_config["jobs"]["nvidia_gpu"]["resources"]["memory"] = raw
+    args = _make_args(minimal_config)
+    idx = args.index("--memory")
+    assert args[idx + 1] == expected
+
+
+# ---------------------------------------------------------------------------
+# build_docker_args — stages encoding
+# ---------------------------------------------------------------------------
+
+
+def test_docker_args_num_stages(minimal_config):
+    args = _make_args(minimal_config)
+    assert "NUM_STAGES=1" in args
+
+
+def test_docker_args_stage_name_cmd(minimal_config):
+    args = _make_args(minimal_config)
+    assert "STAGE_1_NAME=test" in args
+    assert any(a.startswith("STAGE_1_CMD=") for a in args)
+
+
+def test_docker_args_multiple_stages(minimal_config):
+    minimal_config["jobs"]["nvidia_gpu"]["stages"] = [
+        {"name": "lint", "run": "ruff check ."},
+        {"name": "test", "run": "pytest tests/"},
+    ]
+    args = _make_args(minimal_config)
+    assert "NUM_STAGES=2" in args
+    assert "STAGE_1_NAME=lint" in args
+    assert "STAGE_2_NAME=test" in args
+
+
+# ---------------------------------------------------------------------------
+# build_docker_args — results_dir mount
+# ---------------------------------------------------------------------------
+
+
+def test_docker_args_results_dir(minimal_config, tmp_path):
+    args = run.build_docker_args(
+        minimal_config,
+        "nvidia_gpu",
+        "https://github.com/example/repo.git",
+        "master",
+        minimal_config["jobs"]["nvidia_gpu"]["stages"],
+        "/workspace",
+        None,
+        results_dir=tmp_path,
+    )
+    joined = " ".join(str(a) for a in args)
+    assert "-v" in args
+    assert "/workspace/results" in joined
+
+
+# ---------------------------------------------------------------------------
+# build_results_dir
+# ---------------------------------------------------------------------------
+
+
+def test_build_results_dir_contains_platform():
+    stages = [{"name": "test", "run": "pytest"}]
+    d = run.build_results_dir("ci-results", "nvidia", stages, "abc1234")
+    assert "nvidia" in d.name
+
+
+def test_build_results_dir_contains_commit():
+    stages = [{"name": "test", "run": "pytest"}]
+    d = run.build_results_dir("ci-results", "nvidia", stages, "abc1234")
+    assert "abc1234" in d.name
+
+
+def test_build_results_dir_contains_stage_names():
+    stages = [{"name": "lint", "run": "ruff"}, {"name": "test", "run": "pytest"}]
+    d = run.build_results_dir("ci-results", "nvidia", stages, "abc1234")
+    assert "lint+test" in d.name
+
+
+def test_build_results_dir_under_base():
+    stages = [{"name": "test", "run": "pytest"}]
+    d = run.build_results_dir("/tmp/my-results", "ascend", stages, "def5678")
+    assert d.parent == Path("/tmp/my-results")
diff --git a/pyproject.toml b/pyproject.toml
index 765b90a..3dbc186 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -7,7 +7,7 @@ name = "InfiniOps"
 version = "0.1.0"
 
 [project.optional-dependencies]
-dev = ["pytest", "pytest-cov", "pytest-xdist", "ruff", "torch"]
+dev = ["pytest", "pytest-cov", "pytest-xdist", "ruff", "torch", "pyyaml"]
 
 [tool.scikit-build.wheel]
 install-dir = "infini"
diff --git a/tests/test_add.py b/tests/test_add.py
index 1c98d91..61d6715 100644
--- a/tests/test_add.py
+++ b/tests/test_add.py
@@ -4,15 +4,39 @@
 
 from tests.utils import Payload, empty_strided, randint_strided, randn_strided
 
-_INT_DTYPES = (
-    torch.int16,
-    torch.uint16,
-    torch.int32,
-    torch.uint32,
-    torch.int64,
-    torch.uint64,
+_INT_DTYPES = tuple(
+    d
+    for d in (
+        torch.int16,
+        torch.int32,
+        torch.int64,
+    )
+    if d is not None
 )
 
+_UINT_DTYPES = tuple(
+    d
+    for d in (
+        getattr(torch, "uint16", None),
+        getattr(torch, "uint32", None),
+        getattr(torch, "uint64", None),
+    )
+    if d is not None
+)
+
+def _dtype_parametrize():
+    candidates = [
+        (torch.float32, 1e-7, 1e-7),
+        (torch.float16, 1e-3, 1e-3),
+        (torch.bfloat16, 1e-2, 5e-3),
+        (torch.int16, 0, 0),
+        (torch.int32, 0, 0),
+        (getattr(torch, "uint32", None), 0, 0),
+        (torch.int64, 0, 0),
+        (getattr(torch, "uint64", None), 0, 0),
+    ]
+    return tuple((d, r, a) for (d, r, a) in candidates if d is not None)
+
 
 @pytest.mark.auto_act_and_assert
 @pytest.mark.parametrize(
@@ -32,30 +56,11 @@
         ((4, 4, 5632), (45056, 5632, 1), (45056, 5632, 1), (45056, 5632, 1)),
     ),
 )
-@pytest.mark.parametrize(
-    ("dtype", "rtol", "atol"),
-    (
-        (torch.float32, 1e-7, 1e-7),
-        (torch.float16, 1e-3, 1e-3),
-        (torch.bfloat16, 1e-2, 5e-3),
-        (torch.int16, 0, 0),
-        (torch.uint16, 0, 0),
-        (torch.int32, 0, 0),
-        (torch.uint32, 0, 0),
-        (torch.int64, 0, 0),
-        (torch.uint64, 0, 0),
-    ),
-)
-def test_add(
-    shape, input_strides, other_strides, out_strides, dtype, device, rtol, atol
-):
-    if dtype in _INT_DTYPES:
-        input = randint_strided(
-            0, 100, shape, input_strides, dtype=dtype, device=device
-        )
-        other = randint_strided(
-            0, 100, shape, other_strides, dtype=dtype, device=device
-        )
+@pytest.mark.parametrize(("dtype", "rtol", "atol"), _dtype_parametrize())
+def test_add(shape, input_strides, other_strides, out_strides, dtype, device, rtol, atol):
+    if dtype in _INT_DTYPES or dtype in _UINT_DTYPES:
+        input = randint_strided(0, 100, shape, input_strides, dtype=dtype, device=device)
+        other = randint_strided(0, 100, shape, other_strides, dtype=dtype, device=device)
     else:
         input = randn_strided(shape, input_strides, dtype=dtype, device=device)
         other = randn_strided(shape, other_strides, dtype=dtype, device=device)
@@ -72,10 +77,10 @@ def _add(input, other, out):
 
 
 def _torch_add(input, other, out):
-    if input.dtype in (torch.uint16, torch.uint32, torch.uint64):
+    if input.dtype in _UINT_DTYPES:
         input = input.to(torch.int64)
 
-    if other.dtype in (torch.uint16, torch.uint32, torch.uint64):
+    if other.dtype in _UINT_DTYPES:
         other = other.to(torch.int64)
 
     res = torch.add(input, other)
diff --git a/tests/test_rms_norm.py b/tests/test_rms_norm.py
index f447091..b0c9c5d 100644
--- a/tests/test_rms_norm.py
+++ b/tests/test_rms_norm.py
@@ -59,4 +59,13 @@ def _rms_norm(input, weight, *, eps=1e-6, out=None):
 
 
 def _torch_rms_norm(input, weight, *, eps=1e-6, out=None):
-    return torch.nn.functional.rms_norm(input, input.shape[-1:], weight=weight, eps=eps)
+    rms_norm_fn = getattr(torch.nn.functional, "rms_norm", None)
+    if rms_norm_fn is not None:
+        return rms_norm_fn(input, input.shape[-1:], weight=weight, eps=eps)
+    # Fallback for PyTorch < 2.3: RMS norm = (x / sqrt(mean(x^2) + eps)) * weight
+    rms = torch.sqrt(torch.mean(input * input, dim=-1, keepdim=True) + eps)
+    result = (input / rms) * weight
+    if out is not None:
+        out.copy_(result)
+        return out
+    return result