kernel_rl/justfile at main · G-structure/kernel_rl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# KernelBench RL Training Commands

# Default run name (can be overridden: just train run_name=my_experiment)
run_name := "run_" + `date +%Y%m%d_%H%M%S`
config := "kernel_rl/config/rl_kernelbench.yaml"
runs_dir := "./runs"

# List available commands
default:
    @just --list

# === Training ===

# Start training (detached, survives shell close)
train run=run_name:
    @mkdir -p {{runs_dir}}
    @echo "Starting training: {{run}}"
    nohup uv run python -m kernel_rl.scripts.train_kernel_rl \
        --config {{config}} \
        log_path={{runs_dir}}/{{run}} \
        > {{runs_dir}}/{{run}}_nohup.log 2>&1 &
    @sleep 2
    @pgrep -f "log_path={{runs_dir}}/{{run}}" > /dev/null && echo "✓ Training started (PID: $$(pgrep -f 'log_path={{runs_dir}}/{{run}}'))" || echo "✗ Failed to start"
    @echo "Logs: {{runs_dir}}/{{run}}/logs.log"
    @echo "Metrics: {{runs_dir}}/{{run}}/metrics.jsonl"

# Start training with custom config
train-config config_path run=run_name:
    @mkdir -p {{runs_dir}}
    @echo "Starting training: {{run}} with config: {{config_path}}"
    nohup uv run python -m kernel_rl.scripts.train_kernel_rl \
        --config {{config_path}} \
        log_path={{runs_dir}}/{{run}} \
        > {{runs_dir}}/{{run}}_nohup.log 2>&1 &
    @sleep 2
    @pgrep -f "log_path={{runs_dir}}/{{run}}" > /dev/null && echo "✓ Training started" || echo "✗ Failed to start"

# Resume training from checkpoint
resume run:
    @echo "Resuming training: {{run}}"
    nohup uv run python -m kernel_rl.scripts.train_kernel_rl \
        --config {{config}} \
        log_path={{runs_dir}}/{{run}} \
        load_checkpoint_path={{runs_dir}}/{{run}} \
        > {{runs_dir}}/{{run}}_nohup.log 2>&1 &
    @sleep 2
    @pgrep -f "log_path={{runs_dir}}/{{run}}" > /dev/null && echo "✓ Training resumed" || echo "✗ Failed to start"

# === Monitoring ===

# Show live logs for a run
logs run:
    @tail -f {{runs_dir}}/{{run}}/logs.log

# Show last N lines of logs
logs-tail run n="50":
    @tail -n {{n}} {{runs_dir}}/{{run}}/logs.log

# Show batch metrics for a run
metrics run:
    @echo "=== Metrics for {{run}} ==="
    @wc -l < {{runs_dir}}/{{run}}/metrics.jsonl | xargs -I {} echo "Batches completed: {}"
    @echo "---"
    @cat {{runs_dir}}/{{run}}/metrics.jsonl | uv run python3 -c "import sys,json; [print(f\"Batch {d['step']}: reward={d['reward/mean']:.3f} (±{d['reward/std']:.3f}), compile={d['kernel/compile_rate']*100:.1f}%, correct={d['kernel/correct_rate']*100:.1f}%\") for d in (json.loads(l) for l in sys.stdin)]"

# Watch metrics update live
watch-metrics run:
    watch -n 10 'echo "=== {{run}} ===" && \
        wc -l < {{runs_dir}}/{{run}}/metrics.jsonl | xargs -I {} echo "Batches: {}" && \
        tail -1 {{runs_dir}}/{{run}}/metrics.jsonl 2>/dev/null | uv run python3 -c "import sys,json; d=json.loads(sys.stdin.read()); print(f\"Latest: reward={d[\"reward/mean\"]:.3f}, compile={d[\"kernel/compile_rate\"]*100:.1f}%, correct={d[\"kernel/correct_rate\"]*100:.1f}%\")" 2>/dev/null || echo "No metrics yet"'

# Show summary of all runs
summary:
    @echo "=== All Runs ==="
    @for dir in {{runs_dir}}/*/; do \
        name=$$(basename $$dir); \
        if [ -f "$$dir/metrics.jsonl" ]; then \
            batches=$$(wc -l < "$$dir/metrics.jsonl"); \
            echo "$$name: $$batches batches"; \
        fi; \
    done

# === Process Management ===

# Check if training is running
status:
    @echo "=== Running Training Jobs ==="
    @pgrep -fa "train_kernel_rl" | grep -v grep || echo "No training jobs running"

# Stop a specific run
stop run:
    @echo "Stopping {{run}}..."
    @pkill -f "log_path={{runs_dir}}/{{run}}" && echo "✓ Stopped" || echo "Not running"

# Stop all training jobs
stop-all:
    @echo "Stopping all training jobs..."
    @pkill -f "train_kernel_rl" && echo "✓ All stopped" || echo "No jobs running"

# === TensorBoard ===

# Launch TensorBoard for a run
tensorboard run port="6006":
    uv run tensorboard --logdir {{runs_dir}}/{{run}}/tensorboard --port {{port}}

# Launch TensorBoard for all runs
tensorboard-all port="6006":
    uv run tensorboard --logdir {{runs_dir}} --port {{port}}

# === Utilities ===

# List all runs
list:
    @ls -lt {{runs_dir}} | head -20

# Show disk usage of runs
disk:
    @du -sh {{runs_dir}}/*/ 2>/dev/null | sort -h

# Clean up a run (DANGEROUS)
clean run:
    @echo "This will delete {{runs_dir}}/{{run}}"
    @read -p "Are you sure? [y/N] " confirm && [ "$$confirm" = "y" ] && rm -rf {{runs_dir}}/{{run}} && echo "✓ Deleted" || echo "Cancelled"

# Show uniform reward warnings count
uniform-warnings run:
    @echo "Uniform reward warnings in {{run}}:"
    @grep -c "All rewards are uniform" {{runs_dir}}/{{run}}/logs.log 2>/dev/null || echo "0"

# === RAG Index ===

rag_index_dir := "./kernel_rag_index"

# Build RAG index from KernelBook + Sakana datasets (both Triton and CUDA)
build-rag-index output=rag_index_dir:
    @echo "Building RAG index at {{output}}..."
    uv run python -m kernel_rl.scripts.build_rag_index \
        --output {{output}}

# Build RAG index for Triton only (KernelBook)
build-rag-index-triton output=rag_index_dir:
    @echo "Building Triton-only RAG index at {{output}}..."
    uv run python -m kernel_rl.scripts.build_rag_index \
        --output {{output}} \
        --triton-only

# Build RAG index for CUDA only (Sakana AI-CUDA-Engineer)
build-rag-index-cuda output=rag_index_dir:
    @echo "Building CUDA-only RAG index at {{output}}..."
    uv run python -m kernel_rl.scripts.build_rag_index \
        --output {{output}} \
        --cuda-only

# Train with RA-ICL prompts (requires RAG index)
train-raicl run=run_name rag_index=rag_index_dir:
    @mkdir -p {{runs_dir}}
    @if [ ! -d "{{rag_index}}" ]; then echo "Error: RAG index not found at {{rag_index}}. Run 'just build-rag-index' first."; exit 1; fi
    @echo "Starting RA-ICL training: {{run}}"
    nohup uv run python -m kernel_rl.scripts.train_kernel_rl \
        --config kernel_rl/config/rl_kernelbench_raicl.yaml \
        log_path={{runs_dir}}/{{run}} \
        dataset_builder.rag_index_path={{rag_index}} \
        > {{runs_dir}}/{{run}}_nohup.log 2>&1 &
    @sleep 2
    @pgrep -f "log_path={{runs_dir}}/{{run}}" > /dev/null && echo "✓ Training started (PID: $$(pgrep -f 'log_path={{runs_dir}}/{{run}}'))" || echo "✗ Failed to start"
    @echo "Logs: {{runs_dir}}/{{run}}/logs.log"

# === Kevin Mode (Multi-Turn) ===

# Train with Kevin mode (multi-turn refinement, requires RAG index)
train-kevin run=run_name rag_index=rag_index_dir:
    @mkdir -p {{runs_dir}}
    @if [ ! -d "{{rag_index}}" ]; then echo "Error: RAG index not found at {{rag_index}}. Run 'just build-rag-index' first."; exit 1; fi
    @echo "Starting Kevin mode training: {{run}}"
    @echo "  Mode: multi_turn"
    @echo "  Max turns: 4"
    @echo "  Gamma: 0.4"
    nohup uv run python -m kernel_rl.scripts.train_kernel_rl \
        --config kernel_rl/config/rl_kernelbench_kevin.yaml \
        log_path={{runs_dir}}/{{run}} \
        dataset_builder.rag_index_path={{rag_index}} \
        > {{runs_dir}}/{{run}}_nohup.log 2>&1 &
    @sleep 2
    @pgrep -f "log_path={{runs_dir}}/{{run}}" > /dev/null && echo "✓ Training started (PID: $$(pgrep -f 'log_path={{runs_dir}}/{{run}}'))" || echo "✗ Failed to start"
    @echo "Logs: {{runs_dir}}/{{run}}/logs.log"

# Watch Kevin mode metrics (multi-turn specific)
watch-kevin run:
    watch -n 10 'echo "=== {{run}} (Kevin Mode) ===" && \
        wc -l < {{runs_dir}}/{{run}}/metrics.jsonl | xargs -I {} echo "Batches: {}" && \
        tail -1 {{runs_dir}}/{{run}}/metrics.jsonl 2>/dev/null | uv run python3 -c "import sys,json; d=json.loads(sys.stdin.read()); print(f\"Latest: step_score={d.get(\"multiturn/step_score_mean\",0):.3f}, compile={d.get(\"multiturn/compile_rate\",0)*100:.1f}%, correct={d.get(\"multiturn/correct_rate\",0)*100:.1f}%, success={d.get(\"multiturn/success_rate\",0)*100:.1f}%, avg_turns={d.get(\"multiturn/avg_turns\",0):.1f}\")" 2>/dev/null || echo "No metrics yet"'