Sync docs from GitHub

Browse files

Files changed (10) hide show

.env.template +57 -0
.gitattributes +1 -0
GCS_UPLOAD_GUIDE.md +323 -0
LAMBDA_MANUAL_SETUP.md +317 -0
QUICKSTART.md +288 -0
README.md +615 -599
aquarat2.png +3 -0
launch_lambda.py +296 -0
scripts/launch_hyperbolic_training.py +701 -0
scripts/launch_lambda_training.py +535 -0

.env.template ADDED Viewed

	@@ -0,0 +1,57 @@

+# nanochatAquaRat Environment Template
+# Copy this file to .env and fill in actual values before running any scripts.
+# -----------------------------------------------------------------------------
+# Cloud GPU Providers
+# -----------------------------------------------------------------------------
+# Lambda Labs automation (scripts/launch_lambda_training.py, launch_lambda.py)
+# https://cloud.lambdalabs.com/api-keys
+LAMBDA_API_KEY=your-lambda-api-key-here
+# Hyperbolic Labs automation (scripts/launch_hyperbolic_training.py)
+# https://app.hyperbolic.ai/settings/api-keys
+HYPERBOLIC_API_KEY=your-hyperbolic-api-key-here
+# Optional Hyperbolic defaults (override with CLI args if needed)
+# HYPERBOLIC_REGION=us-east
+# HYPERBOLIC_MAX_PRICE=6.00
+# -----------------------------------------------------------------------------
+# Experiment Tracking (Weights & Biases)
+# -----------------------------------------------------------------------------
+# Get your key at https://wandb.ai/authorize
+WANDB_API_KEY=your-wandb-api-key-here
+WANDB_PROJECT=nanochat-aquarat
+WANDB_ENTITY=your-wandb-username-or-team-name
+# Optional run metadata
+WANDB_RUN=aquarat-$(date +%Y%m%d-%H%M%S)
+WANDB_MODE=online            # online | offline | disabled
+# -----------------------------------------------------------------------------
+# Google Cloud Storage Uploads (scripts/upload_to_gcs.sh)
+# See GCS_UPLOAD_GUIDE.md for detailed instructions
+# -----------------------------------------------------------------------------
+# GCP project that owns your storage bucket
+GCP_PROJECT_ID=your-gcp-project-id
+# Default bucket for model artifacts (used by upload scripts and automation)
+GCS_BUCKET=gs://your-model-bucket
+# Service account credentials (recommended for automation). Point to the JSON
+# key you download with `gcloud iam service-accounts keys create ...`
+GOOGLE_APPLICATION_CREDENTIALS=/path/to/nanochat-gcs-key.json
+# -----------------------------------------------------------------------------
+# Optional cache/directories
+# -----------------------------------------------------------------------------
+# Override the default ~/.cache/nanochat location (used for checkpoints/data)
+# NANOCHAT_BASE_DIR=/mnt/nanochat-cache
+# If you pre-convert AQuA-RAT with scripts/prepare_aqua.py, set this so tasks
+# and training scripts reuse the cached JSONL splits instead of downloading.
+# AQUA_DATA_DIR=/mnt/datasets/aqua

.gitattributes CHANGED Viewed

@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+aquarat2.png filter=lfs diff=lfs merge=lfs -text

GCS_UPLOAD_GUIDE.md ADDED Viewed

	@@ -0,0 +1,323 @@

+# Google Cloud Storage Upload Guide
+After training your nanochat model on Lambda Labs, use this guide to upload all weights and artifacts to Google Cloud Storage.
+## Quick Start
+```bash
+# After training completes, SSH to your Lambda instance
+ssh ubuntu@<INSTANCE_IP>
+# Navigate to project directory
+cd ~/nanochatAquaRat
+# Run upload script
+bash scripts/upload_to_gcs.sh --bucket gs://your-bucket-name
+```
+The script will:
+1. Check/install gcloud CLI if needed
+2. Verify authentication and bucket access
+3. Show what will be uploaded and ask for confirmation
+4. Upload all artifacts with progress
+5. Ask if you want to terminate the Lambda instance
+## Prerequisites
+### 1. Create a GCS Bucket
+```bash
+# From your local machine
+gcloud storage buckets create gs://your-bucket-name \
+  --location=us-central1 \
+  --uniform-bucket-level-access
+```
+Or create via console: https://console.cloud.google.com/storage/create-bucket
+### 2. Set Up Authentication
+#### Option A: Service Account (Recommended for Automation)
+On your local machine:
+```bash
+# Create service account
+gcloud iam service-accounts create nanochat-uploader \
+  --display-name="Nanochat Model Uploader"
+# Grant storage permissions
+gcloud projects add-iam-policy-binding YOUR_PROJECT_ID \
+  --member="serviceAccount:nanochat-uploader@YOUR_PROJECT_ID.iam.gserviceaccount.com" \
+  --role="roles/storage.objectCreator"
+# Create and download key
+gcloud iam service-accounts keys create ~/nanochat-key.json \
+  --iam-account=nanochat-uploader@YOUR_PROJECT_ID.iam.gserviceaccount.com
+```
+Copy key to Lambda instance:
+```bash
+scp ~/nanochat-key.json ubuntu@<INSTANCE_IP>:~/
+```
+On Lambda instance:
+```bash
+gcloud auth activate-service-account --key-file=~/nanochat-key.json
+```
+#### Option B: User Account (Simpler for Manual Use)
+On Lambda instance:
+```bash
+gcloud auth login
+# Follow the prompts in your browser
+```
+## Usage
+### Basic Upload
+```bash
+bash scripts/upload_to_gcs.sh --bucket gs://my-models
+```
+### Custom Run Name
+```bash
+bash scripts/upload_to_gcs.sh \
+  --bucket gs://my-models \
+  --run-name depth20-experiment1
+```
+### Exclude Large Dataset Files
+```bash
+bash scripts/upload_to_gcs.sh \
+  --bucket gs://my-models \
+  --exclude-data
+```
+### Dry Run (Preview Only)
+```bash
+bash scripts/upload_to_gcs.sh \
+  --bucket gs://my-models \
+  --dry-run
+```
+### Auto-Terminate After Upload
+```bash
+bash scripts/upload_to_gcs.sh \
+  --bucket gs://my-models \
+  --auto-terminate
+```
+## What Gets Uploaded
+From `~/.cache/nanochat/`:
+| Directory | Contents | Typical Size |
+|-----------|----------|--------------|
+| `checkpoints/` | Model weights (.pt, .pkl files) | 500MB - 2GB |
+| `report/` | Training reports and markdown summaries | 1-10MB |
+| `tokenizer/` | BPE tokenizer files | 10-50MB |
+| `eval_bundle/` | Evaluation datasets | 50-200MB |
+| `aqua/` | AQuA-RAT dataset (optional) | 100-500MB |
+| `mechanistic_interpretability/` | DeepMind interp tools | 10-100MB |
+**Total**: Typically 1-5 GB per training run
+## Upload Structure
+Files are organized in GCS as:
+```
+gs://your-bucket/
+└── runs/
+    ├── aquarat-20251023-143022/
+    │   ├── checkpoints/
+    │   │   ├── base_final.pt
+    │   │   ├── mid_final.pt
+    │   │   ├── sft_final.pt
+    │   │   └── rl_final.pt
+    │   ├── report/
+    │   │   └── report.md
+    │   ├── tokenizer/
+    │   └── ...
+    └── depth20-experiment1/
+        └── ...
+```
+## Download Weights Later
+### Download Entire Run
+```bash
+gsutil -m rsync -r \
+  gs://your-bucket/runs/aquarat-20251023-143022/ \
+  ./local_checkpoints/
+```
+### Download Just Checkpoints
+```bash
+gsutil -m cp -r \
+  gs://your-bucket/runs/aquarat-20251023-143022/checkpoints/ \
+  ./checkpoints/
+```
+### Download Single File
+```bash
+gsutil cp \
+  gs://your-bucket/runs/aquarat-20251023-143022/checkpoints/rl_final.pt \
+  ./rl_final.pt
+```
+## Cost Considerations
+### Storage Costs
+- Standard storage: ~$0.02/GB/month
+- Nearline storage (30+ days): ~$0.01/GB/month
+- Coldline storage (90+ days): ~$0.004/GB/month
+**Example**: 2GB model stored for 1 month = $0.04
+### Network Egress
+- Upload (ingress): **Free**
+- Download to same region: **Free**
+- Download to internet: ~$0.12/GB
+**Tip**: Keep your GCS bucket in the same region as your compute for free transfers.
+### Lifecycle Management
+Auto-delete or move to cheaper storage after 90 days:
+```bash
+cat > lifecycle.json << EOF
+{
+  "lifecycle": {
+    "rule": [
+      {
+        "action": {"type": "Delete"},
+        "condition": {"age": 90}
+      }
+    ]
+  }
+}
+EOF
+gsutil lifecycle set lifecycle.json gs://your-bucket
+```
+## Troubleshooting
+### "gcloud: command not found"
+The script auto-installs gcloud on Linux. If it fails:
+```bash
+curl https://sdk.cloud.google.com | bash
+exec -l $SHELL
+```
+### "Permission denied" Error
+Check your service account has `roles/storage.objectCreator`:
+```bash
+gcloud projects get-iam-policy YOUR_PROJECT_ID \
+  --flatten="bindings[].members" \
+  --filter="bindings.members:serviceAccount:nanochat-uploader*"
+```
+### Upload Interrupted
+The script uses `gsutil rsync`, so re-running will resume:
+```bash
+bash scripts/upload_to_gcs.sh --bucket gs://your-bucket
+# Will skip already-uploaded files
+```
+### Verify Upload
+```bash
+# List all files in the run
+gsutil ls -r gs://your-bucket/runs/your-run-name/
+# Check specific checkpoints
+gsutil ls gs://your-bucket/runs/your-run-name/checkpoints/
+```
+## Integration with Lambda Launcher
+You can add GCS credentials to the automated launcher:
+```python
+# In scripts/launch_lambda_training.py
+# Add to the cloud-init user-data:
+write_files:
+  - path: /home/ubuntu/.config/gcloud/application_default_credentials.json
+    content: |
+      {your service account key JSON}
+```
+Or pass as environment variable:
+```bash
+export GOOGLE_APPLICATION_CREDENTIALS=/path/to/key.json
+python scripts/launch_lambda_training.py \
+  --inject-env GOOGLE_APPLICATION_CREDENTIALS \
+  ...
+```
+## Best Practices
+1. **Name runs descriptively**: Use `--run-name depth20-lr1e4-batch32`
+2. **Exclude data when iterating**: Use `--exclude-data` to save bandwidth
+3. **Dry run first**: Always use `--dry-run` to preview
+4. **Service accounts for automation**: Easier than user auth
+5. **Regional buckets**: Match Lambda instance region when possible
+6. **Lifecycle policies**: Auto-archive old models
+7. **Download to Lambda**: If re-training, download previous checkpoints to Lambda first
+## Security Notes
+- Service account keys are sensitive - treat like passwords
+- Use least-privilege IAM roles (don't grant `roles/owner`)
+- Rotate service account keys regularly
+- Consider Workload Identity if using GKE
+- Don't commit keys to git (add to `.gitignore`)
+## Support
+- GCS Documentation: https://cloud.google.com/storage/docs
+- gsutil Reference: https://cloud.google.com/storage/docs/gsutil
+- IAM Permissions: https://cloud.google.com/storage/docs/access-control/iam-permissions
+---
+**Quick Reference**:
+```bash
+# Upload
+bash scripts/upload_to_gcs.sh --bucket gs://my-bucket
+# Download
+gsutil -m cp -r gs://my-bucket/runs/NAME/checkpoints/ ./
+# List runs
+gsutil ls gs://my-bucket/runs/
+# Delete old run
+gsutil -m rm -r gs://my-bucket/runs/old-run/

LAMBDA_MANUAL_SETUP.md ADDED Viewed

	@@ -0,0 +1,317 @@

+# Manual Setup Guide for Lambda Labs
+This guide walks you through manually launching and configuring a Lambda Labs GPU instance for training the nanochatAquaRat model with RL on AQuA-RAT.
+## Prerequisites
+1. **Lambda Labs Account**: Sign up at https://cloud.lambdalabs.com
+2. **SSH Key**: Add your SSH public key to Lambda Labs
+3. **W&B Account**: Sign up at https://wandb.ai and get your API key
+4. **Sufficient Credits**: Ensure you have enough credits (~$24 for 8 hours on 8x H100)
+## Step 1: Add SSH Key to Lambda Labs
+1. Go to https://cloud.lambdalabs.com/ssh-keys
+2. Click "Add SSH Key"
+3. Paste your public SSH key (from `~/.ssh/id_rsa.pub` or `~/.ssh/id_ed25519.pub`)
+4. Give it a name (e.g., "my-laptop")
+5. Click "Add SSH Key"
+## Step 2: Launch Instance via Web Dashboard
+1. Navigate to https://cloud.lambdalabs.com/instances
+2. Click **"Launch instance"**
+3. Configure your instance:
+   - **Instance type**: Select `gpu_8x_h100_sxm5` (8x NVIDIA H100 80GB SXM5)
+     - For testing: Use `gpu_1x_a10` or smaller
+   - **Region**: Choose a region with availability (e.g., `us-west-1`)
+   - **SSH Keys**: Select your SSH key
+   - **Filesystem**: (Optional) If you have persistent storage
+4. Click **"Launch instance"**
+5. Wait 1-2 minutes for the instance to boot
+## Step 3: Note Instance Details
+Once the instance is running, note:
+- **Instance ID**: (e.g., `0123456789abcdef`)
+- **IP Address**: (e.g., `123.45.67.89`)
+- **SSH Command**: Shown in the web interface
+## Step 4: Connect to Instance
+Open your terminal and connect:
+```bash
+ssh ubuntu@<INSTANCE_IP>
+```
+Example:
+```bash
+ssh ubuntu@123.45.67.89
+```
+## Step 5: Set Up Environment
+Once connected, run these commands:
+### 5.1 Create Environment File
+```bash
+# Create .env file with your credentials
+cat > ~/.env << 'EOF'
+WANDB_API_KEY=your-wandb-api-key-here
+WANDB_PROJECT=nanochat-aquarat
+WANDB_ENTITY=your-wandb-username-or-team
+EOF
+```
+Replace `your-wandb-api-key-here` with your actual W&B API key (get it from https://wandb.ai/authorize)
+### 5.2 Clone Repository
+```bash
+cd ~
+git clone https://github.com/HarleyCoops/nanochatAquaRat.git
+cd nanochatAquaRat
+```
+### 5.3 Copy Environment Variables
+```bash
+# Copy the .env file to the project directory
+cp ~/.env .env
+```
+## Step 6: Start Training
+You have two options for running the training:
+### Option A: Run in Screen Session (Recommended)
+This allows you to detach and let training continue even if you disconnect:
+```bash
+# Start a screen session
+screen -S training
+# Run the training script
+bash run_aquarat_small.sh
+```
+**Screen Commands:**
+- **Detach from screen**: Press `Ctrl+A` then `D`
+- **Reattach to screen**: `screen -r training`
+- **List all screen sessions**: `screen -ls`
+- **Kill a screen session**: `screen -X -S training quit`
+### Option B: Run Directly (Blocks Terminal)
+```bash
+# Run training directly (terminal will be blocked)
+bash run_aquarat_small.sh 2>&1 | tee training.log
+```
+This saves output to `training.log` for later review.
+## Step 7: Monitor Training
+### Monitor via Terminal
+If using screen:
+```bash
+# Reattach to see live output
+screen -r training
+# Or tail the report
+tail -f ~/.cache/nanochat/report/report.md
+```
+### Monitor via Weights & Biases
+1. Go to https://wandb.ai
+2. Navigate to your project: `nanochat-aquarat`
+3. View real-time metrics, losses, and generated samples
+Key metrics to watch:
+- `rl/acc` - Accuracy on AQuA-RAT
+- `rl/mean_reward` - Average reward per sample
+- `rl/kl_letter_mean` - KL divergence from initial policy
+- `rl/letter_margin_mean` - Confidence in letter choices
+- `attn/entropy_mean` - Attention mechanism entropy
+## Step 8: Training Timeline
+For the **small (depth=8) model**:
+- **Base pretraining**: ~1-2 hours
+- **Mid-training**: ~30 minutes
+- **SFT**: ~30 minutes
+- **RL**: ~30 minutes
+- **Total**: ~3-4 hours
+For the **d-20 model** (561M params):
+- **Base pretraining**: ~3-4 hours
+- **Mid-training**: ~1 hour
+- **SFT**: ~1 hour
+- **RL**: ~1 hour
+- **Total**: ~6-8 hours
+## Step 9: Check Results
+After training completes:
+```bash
+# View the final report
+cat ~/.cache/nanochat/report/report.md
+# Check RL checkpoint
+ls -lh ~/.cache/nanochat/checkpoints/
+# View evaluation results
+cat ~/.cache/nanochat/evals/
+```
+## Step 10: Download Artifacts (Optional)
+If you want to save the trained model locally:
+```bash
+# From your local machine (not on the Lambda instance):
+# Download checkpoints
+scp -r ubuntu@<INSTANCE_IP>:~/.cache/nanochat/checkpoints ./local_checkpoints/
+# Download reports
+scp -r ubuntu@<INSTANCE_IP>:~/.cache/nanochat/report ./local_reports/
+# Download training logs
+scp ubuntu@<INSTANCE_IP>:~/nanochatAquaRat/training.log ./training.log
+```
+## Step 11: Terminate Instance
+**IMPORTANT**: Remember to terminate your instance when done to avoid charges!
+### Via Web Dashboard:
+1. Go to https://cloud.lambdalabs.com/instances
+2. Find your instance
+3. Click the **"..."** menu
+4. Select **"Terminate"**
+5. Confirm termination
+### Via SSH (before disconnecting):
+```bash
+# Shutdown the instance (will auto-terminate if configured)
+sudo shutdown -h now
+```
+## Troubleshooting
+### Issue: "Out of memory" Error
+**Solution**: Reduce batch size in the training script
+```bash
+# Edit run_aquarat_small.sh and add these flags to the torchrun commands:
+--device_batch_size=2  # Reduce from default
+```
+### Issue: W&B Not Logging
+**Solution**: Check your API key
+```bash
+# Test W&B login
+wandb login
+# Verify environment variable
+echo $WANDB_API_KEY
+# Re-run with explicit login
+export WANDB_API_KEY=your-key-here
+bash run_aquarat_small.sh
+```
+### Issue: Screen Session Lost
+**Solution**: Reattach to screen
+```bash
+# List all screen sessions
+screen -ls
+# Reattach to the training session
+screen -r training
+# If screen says "Detached", force attach
+screen -d -r training
+```
+### Issue: Dataset Download Slow
+**Solution**: The script downloads data in parallel. Wait for completion or reduce number of shards.
+### Issue: SSH Connection Drops
+**Solution**: Use `screen` or `tmux` to keep processes running
+```bash
+# If you didn't use screen initially and got disconnected:
+# Reconnect and check if the process is still running
+ps aux | grep python
+# If running, you can monitor the log files:
+tail -f ~/.cache/nanochat/report/report.md
+tail -f ~/nanochatAquaRat/training.log
+```
+## Cost Estimation
+**8x H100 SXM5** pricing (as of reference):
+- ~$3.00/hour per GPU
+- 8 GPUs = $24/hour
+- Small model (4 hours) = ~$96
+- d-20 model (8 hours) = ~$192
+**Budget-friendly testing options:**
+- 1x A10 (24GB): ~$0.60/hour - Good for testing pipeline
+- 1x A6000 (48GB): ~$0.80/hour - Can run small model
+- 2x A100 (40GB): ~$2.20/hour - Can run d-20 with reduced batch size
+## Quick Reference Commands
+```bash
+# SSH to instance
+ssh ubuntu@<INSTANCE_IP>
+# Start training in screen
+screen -S training
+bash run_aquarat_small.sh
+# Detach from screen
+Ctrl+A then D
+# Reattach to screen
+screen -r training
+# Monitor W&B
+Open: https://wandb.ai
+# View live report
+tail -f ~/.cache/nanochat/report/report.md
+# Check GPU usage
+nvidia-smi
+# Terminate instance (via dashboard)
+https://cloud.lambdalabs.com/instances
+```
+## Support
+- **Lambda Labs Support**: https://lambdalabs.com/support
+- **W&B Support**: https://docs.wandb.ai
+- **nanochat Issues**: https://github.com/HarleyCoops/nanochatAquaRat/issues
+## Next Steps
+After your model is trained:
+1. Download checkpoints for inference
+2. Use the web interface: `python -m scripts.chat_web`
+3. Test via CLI: `python -m scripts.chat_cli`
+4. Share your results on W&B
+5. Fine-tune on additional datasets if desired

QUICKSTART.md ADDED Viewed

	@@ -0,0 +1,288 @@

+# Lambda Labs Training Quickstart
+This project provides two ways to run model training on Lambda Labs:
+1. **Automated API Script** (Recommended) - Fully automated deployment
+2. **Manual Setup** - Step-by-step web dashboard approach
+## Prerequisites
+Both methods require:
+- Lambda Labs account with API key
+- SSH key added to Lambda Labs
+- Weights & Biases account with API key
+- Sufficient credits (~$24/hour for 8x H100)
+## Method 1: Automated API Script (Recommended)
+### Setup
+1. **Set environment variables:**
+```bash
+export LAMBDA_API_KEY='your-lambda-api-key'
+export WANDB_API_KEY='your-wandb-api-key'
+```
+Get your Lambda API key from: https://cloud.lambdalabs.com/api-keys
+Get your W&B API key from: https://wandb.ai/authorize
+2. **Install dependencies:**
+```bash
+pip install lambda-cloud-client
+```
+### Usage
+**Check available instances:**
+```bash
+python launch_lambda.py --list-types
+```
+**Launch and start training:**
+```bash
+# Launch 8x H100 instance (recommended for d-20 model)
+python launch_lambda.py --instance-type gpu_8x_h100_sxm5 --region us-west-1
+# Launch smaller instance for testing (depth-8 model)
+python launch_lambda.py --instance-type gpu_1x_a100 --region us-west-1
+```
+**Just launch without deploying:**
+```bash
+python launch_lambda.py --instance-type gpu_8x_h100_sxm5 --no-deploy
+```
+The script will:
+1. ✓ Launch the instance
+2. ✓ Wait for it to be ready
+3. ✓ Deploy the code
+4. ✓ Start training in a screen session
+5. ✓ Provide connection details
+### Monitor Training
+After launching, SSH to the instance:
+```bash
+ssh ubuntu@<INSTANCE_IP>
+```
+Then attach to the screen session:
+```bash
+screen -r training
+```
+Or view logs:
+```bash
+tail -f ~/nanochatAquaRat/training.log
+```
+## Method 2: Manual Setup
+For detailed step-by-step instructions, see [LAMBDA_MANUAL_SETUP.md](LAMBDA_MANUAL_SETUP.md)
+**Quick summary:**
+1. Go to https://cloud.lambdalabs.com/instances
+2. Launch instance manually
+3. SSH to instance
+4. Clone repo and set up .env
+5. Run `bash run_aquarat_small.sh`
+## Training Configuration
+The `run_aquarat_small.sh` script trains a **depth-8 (smaller) model** which takes approximately **3-4 hours** on 8x H100.
+### What Gets Trained:
+1. **Base Model** (depth-8, ~60M params)
+   - Pretrained on limited corpus (24 shards)
+   - Faster iteration for testing
+2. **Mid-Training**
+   - Conversation format adaptation
+   - Tool use capabilities
+3. **Supervised Fine-Tuning (SFT)**
+   - Fine-tuned on AQuA-RAT dataset
+   - Multiple-choice math reasoning
+4. **Reinforcement Learning (RL)**
+   - GRPO-style RL on AQuA-RAT
+   - KL divergence tracking
+   - Letter-choice logit margin analysis
+   - Attention mechanism logging
+### W&B Metrics Logged:
+- `rl/acc` - Answer accuracy
+- `rl/mean_reward` - Average reward
+- `rl/kl_letter_mean` - Policy drift (letter-level)
+- `rl/kl_sequence_mean` - Policy drift (sequence-level)
+- `rl/letter_margin_mean` - Confidence in answers
+- `attn/entropy_mean` - Attention patterns
+## Model Sizes Available
+You can modify `run_aquarat_small.sh` to change the model depth:
+| Depth | Params | Training Time | Recommended Instance |
+|-------|--------|---------------|---------------------|
+| 8     | ~60M   | 3-4 hours     | 1x A100 / 2x A100  |
+| 12    | ~180M  | 4-5 hours     | 4x A100             |
+| 20    | ~561M  | 6-8 hours     | 8x H100             |
+| 26    | ~1.1B  | 10-12 hours   | 8x H100             |
+To change depth, edit the `--depth` parameter in `run_aquarat_small.sh`:
+```bash
+torchrun --standalone --nproc_per_node=8 -m scripts.base_train -- --depth=20 --run=$WANDB_RUN
+```
+## Cost Estimates
+Based on Lambda Labs pricing:
+| Instance Type    | GPUs         | Cost/Hour | Small (4h) | d-20 (8h) |
+|------------------|--------------|-----------|------------|-----------|
+| gpu_8x_h100_sxm5 | 8x H100 80GB | ~$24.00   | ~$96       | ~$192     |
+| gpu_4x_a100      | 4x A100 40GB | ~$8.80    | ~$35       | ~$70      |
+| gpu_2x_a100      | 2x A100 40GB | ~$4.40    | ~$18       | ~$35      |
+| gpu_1x_a100      | 1x A100 40GB | ~$2.20    | ~$9        | ~$18      |
+## Monitoring Options
+### 1. SSH + Screen
+```bash
+ssh ubuntu@<INSTANCE_IP>
+screen -r training
+# Ctrl+A then D to detach
+```
+### 2. Weights & Biases
+Dashboard: https://wandb.ai
+Real-time metrics, attention heatmaps, sample completions
+### 3. Log Files
+```bash
+# Training log
+tail -f ~/nanochatAquaRat/training.log
+# Progress report
+tail -f ~/.cache/nanochat/report/report.md
+```
+## After Training
+### Download Checkpoints
+From your local machine:
+```bash
+scp -r ubuntu@<INSTANCE_IP>:~/.cache/nanochat/checkpoints ./checkpoints/
+```
+### Run Inference
+On the Lambda instance:
+```bash
+# Web interface
+python -m scripts.chat_web
+# CLI interface
+python -m scripts.chat_cli -p "What is 25 * 37?"
+# Evaluate on test set
+python -m scripts.chat_eval -- -i rl -a AQUA
+```
+### Don't Forget to Terminate!
+**Via Dashboard:**
+https://cloud.lambdalabs.com/instances → Terminate
+**Via CLI:**
+```bash
+sudo shutdown -h now
+```
+## Troubleshooting
+### Issue: API Key Not Working
+```bash
+# Verify keys are set
+echo $LAMBDA_API_KEY
+echo $WANDB_API_KEY
+# Re-export if needed
+export LAMBDA_API_KEY='your-key'
+export WANDB_API_KEY='your-key'
+```
+### Issue: No Available Instances
+Lambda Labs instances can be in high demand. Try:
+- Different regions (`--region us-east-1`)
+- Smaller instance types (`gpu_1x_a100`)
+- Check availability: `python launch_lambda.py --list-types`
+### Issue: Out of Memory
+Edit `run_aquarat_small.sh` and reduce batch size:
+```bash
+# Add to torchrun commands:
+--device_batch_size=2
+```
+### Issue: Training Stuck
+Check GPU utilization:
+```bash
+nvidia-smi
+```
+If GPUs are idle, check for errors:
+```bash
+tail -100 ~/nanochatAquaRat/training.log
+```
+## Files in This Repository
+- `launch_lambda.py` - Automated Lambda Labs launcher
+- `run_aquarat_small.sh` - Training script (depth-8 model)
+- `LAMBDA_MANUAL_SETUP.md` - Detailed manual setup guide
+- `QUICKSTART.md` - This file
+- `.env.template` - Environment variable template
+## Support
+- **Lambda Labs**: https://lambdalabs.com/support
+- **Weights & Biases**: https://docs.wandb.ai
+- **Project Issues**: https://github.com/HarleyCoops/nanochatAquaRat/issues
+## Next Steps
+1. ✓ Set up Lambda Labs account and API key
+2. ✓ Set up Weights & Biases account
+3. ✓ Choose your method (API script or manual)
+4. ✓ Launch instance and start training
+5. ✓ Monitor via W&B dashboard
+6. ✓ Download checkpoints when complete
+7. ✓ Terminate instance to stop charges
+Happy training!

README.md CHANGED Viewed

@@ -13,440 +13,440 @@ tags:
 ---
 <div align="center">
-![AQuA-RAT Training](./aquarat2.png)
-# nanochatAquaRat
-**Training Language Models with Reinforcement Learning on Mathematical Reasoning**
-[![GitHub](https://img.shields.io/badge/GitHub-Repository-blue?logo=github)](https://github.com/HarleyCoops/nanochatAquaRat)
-[![License](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
-[![Python 3.8+](https://img.shields.io/badge/Python-3.8+-blue.svg)](https://www.python.org/downloads/)
-A modified version of [nanochat](https://github.com/karpathy/nanochat) trained with reinforcement learning on the [DeepMind AQuA-RAT dataset](https://huggingface.co/datasets/deepmind/aqua_rat) for algebraic reasoning and multiple-choice problem solving.
-[Quick Start](#quick-start) • [Dataset](#dataset-structure) • [Modifications](#modifications-from-base-nanochat) • [Training](#training-pipeline) • [Results](#results)
-</div>
----
-## Table of Contents
-- [Overview](#overview)
-- [The Base: nanochat Framework](#the-base-nanochat-framework)
-- [Dataset Structure](#dataset-structure)
-- [Modifications from Base nanochat](#modifications-from-base-nanochat)
-- [Training Pipeline](#training-pipeline)
-- [Quick Start](#quick-start)
-- [File Structure](#file-structure)
-- [Monitoring & Visualization](#monitoring--visualization)
-- [Results](#results)
----
-## Overview
-This project adapts the **nanochat** training framework (originally designed for GSM8K numerical reasoning) to work with **AQuA-RAT** (Algebra Question Answering with Rationales), a dataset of ~97,000 algebraic word problems with multiple-choice answers (A-E) and natural language solution rationales.
-### Why This Matters
-- **Domain Transfer**: Demonstrates how to adapt a mathematical reasoning pipeline from free-form numeric answers to multiple-choice format
-- **RL on Math**: Implements GRPO-style reinforcement learning with reward shaping for categorical outputs
-- **Mechanistic Interpretability**: Integrates attention analysis during training to understand model reasoning patterns
-- **Production-Ready**: Includes automated Lambda Labs and Hyperbolic Labs deployment helpers for cloud GPU training
-### Key Results
-| Model | Parameters | Training Time | AQuA-RAT Dev Accuracy |
-|-------|------------|---------------|----------------------|
-| depth-8 | ~60M | 3-4 hours | 30-50% |
-| depth-20 | ~561M | 6-8 hours | 40-60% |
----
-## The Base: nanochat Framework
-**nanochat** is a minimalist yet complete pipeline for training transformer language models from scratch, created by Andrej Karpathy. It implements:
-- **Custom tokenizer**: BPE tokenizer written in Rust for performance
-- **Training stages**: Pretraining → Mid-training → SFT → RL
-- **Evaluation suite**: CORE benchmarks and task-specific metrics
-- **Optimizations**: Memory-efficient training, gradient accumulation, distributed training
-**Original focus**: Training on GSM8K (Grade School Math 8K) with free-form numeric answers.
----
-## Dataset Structure
-### AQuA-RAT Format
-The [DeepMind AQuA-RAT dataset](https://github.com/deepmind/AQuA) contains algebraic reasoning problems in JSON format:
-```json
-{
-  "question": "A person is traveling at 20 km/hr and reached his destiny in 2.5 hr then find the distance?",
-  "options": [
-    "A) 53 km",
-    "B) 55 km",
-    "C) 52 km",
-    "D) 60 km",
-    "E) 50 km"
-  ],
-  "rationale": "The distance that the person traveled = 20 * 2.5 = 50 km. Answer: E",
-  "correct": "E"
-}
-```
-**Dataset splits**:
-- Training: 97,467 problems
-- Development: 254 problems
-- Test: 254 problems
-**Key characteristics**:
-- Multiple-choice (A-E) format
-- Algebraic word problems
-- Natural language rationales
-- Topics: arithmetic, algebra, geometry, probability
-### Comparison: GSM8K vs AQuA-RAT
-| Aspect | GSM8K (Original) | AQuA-RAT (This Project) |
-|--------|------------------|-------------------------|
-| **Format** | Free-form numeric | Multiple choice (A-E) |
-| **Answer** | Single number | Letter choice |
-| **Size** | 8,500 problems | 97,700 problems |
-| **Difficulty** | Elementary school | High school algebra |
-| **Rationale** | Step-by-step | Natural language |
-| **Evaluation** | Exact match on number | Categorical accuracy |
----
-## Modifications from Base nanochat
-To adapt nanochat from GSM8K to AQuA-RAT, we modified the following components:
-### 1. Dataset Loader (`scripts/prepare_aqua.py`)
-**Created new file** to download and format AQuA-RAT:
-```python
-# New file: scripts/prepare_aqua.py
-### 1. Dataset Preparation (`scripts/prepare_aqua.py`)
-- Uses `datasets.load_dataset("deepmind/aqua_rat")` and optionally caps split sizes.
-- Emits JSONL files (`train.jsonl`, `validation.jsonl`, `test.jsonl`) compatible with
-  the conversation schema used throughout nanochat.
-- Defaults to `~/.cache/nanochat/aqua`, but accepts `--output_dir` overrides so
-  launchers can bundle their own artifact.
-```python
-def format_example(row):
-    options = row["options"]
-    assistant_content = [
-        {"type": "text", "text": row["rationale"].strip()},
-        {"type": "text", "text": f"Answer: {row['correct'].strip().upper()}"},
-    ]
-    return {
-        "messages": [
-            {"role": "user", "content": _render_user_prompt(row["question"], options)},
-            {"role": "assistant", "content": assistant_content},
-        ],
-        "letters": letters,
-        "answer_letter": correct,
-    }
-```
-### 2. Task Module (`tasks/aqua.py`)
-- Accepts optional `data_dir` (or `AQUA_DATA_DIR` / `NANOCHAT_AQUA_DIR`) so the task
-  can read the cached JSONL; otherwise falls back to Hugging Face.
-- Provides `_render_user_prompt` to format the question/options using the common
-  multiple-choice helper and `_extract_letter` to score completions.
-- Returns conversations whose assistant messages include both the rationale and a
-  final `Answer: <LETTER>` line for SFT, while `evaluate()` only cares about the letter.
-```python
-def _extract_letter(text, default=None):
-    answer_match = re.search(r"answer\s*[:\-]\s*([A-E])", text, flags=re.IGNORECASE)
-    if answer_match:
-        return answer_match.group(1).upper()
-    match = LETTER_RE.search(text)
-    return match.group(1).upper() if match else default
-```
-**Key differences from GSM8K**:
-- Numeric extraction → Letter extraction
-- Free-form answer → Fixed choices A-E
-- Exact number match → Categorical match
-### 3. RL Training (`scripts/chat_rl.py`)
-**Modified** to support both GSM8K and AQuA-RAT:
-Key updates:
-- `train_task` / `val_task` now instantiate `AQUA(...)` instead of `GSM8K(...)`.
-- Rewards reuse the task's `evaluate()` helper so any completion containing
-  “Answer: X” (or the first bare letter) is scored correctly.
-- The validation helper became `run_aqua_eval`, still reporting pass@k accuracy
-  across sampled completions.
-- CLI overrides remain the same because the script continues to rely on the
-  nanochat configurator (`--run`, `--temperature`, `--max_new_tokens`, …).
-### 4. Evaluation (`scripts/chat_eval.py`)
-- Registered `'AQUA'` in the task registry so `-a AQUA` just works.
-- Added a 20% random-guess baseline when aggregating the ChatCORE metric.
-- The categorical evaluation path reuses `run_categorical_eval`, clamping logits
-  to the available letters before scoring.
-### 5. Training Script (`run_aquarat_small.sh`)
-**What changed vs upstream nanochat**:
-```bash
-# (Optional) Cache the dataset locally as JSONL
-python -m scripts.prepare_aqua --output_dir "$NANOCHAT_BASE_DIR/aqua"
-# Mid-training now samples from the AQuA mixture
-torchrun -m scripts.mid_train -- --run=demo --num_iterations=200
-# SFT stage emphasises AQuA problems
-torchrun -m scripts.sft_train -- --run=demo --aqua_train_examples=20000
-# RL fine-tuning rewards the correct letter on AQuA-RAT
-torchrun -m scripts.chat_rl -- --run=demo --temperature=0.7 --max_new_tokens=64
-```
-- **`tasks/aqua.py`** loads AQuA-RAT either from Hugging Face or the cached JSONL
-  splits, formats questions as conversations, and scores completions by letter.
-- **`scripts/mid_train.py`** extends the original Reasoning+Chat mixture with a
-  50k slice of AQuA so the model sees multiple-choice algebra earlier.
-- **`scripts/chat_sft.py`** replaces the GSM8K component with AQuA, keeping ARC,
-  SmolTalk, and identity prompts for general chat coverage.
-- **`scripts/chat_rl.py`** retools the GRPO loop to sample, reward, and evaluate
-  AQuA answers (categorical accuracy instead of GSM8K free-form math).
-- **`scripts/chat_eval.py`** registers the new AQuA task so `chat_eval` can report
-  categorical accuracy alongside ARC/MMLU/GSM8K/HumanEval.
----
-## Training Pipeline
-### Stage 1: Base Pretraining (50-60% of time)
-**What happens**: Model learns language from scratch on FineWeb corpus
-```bash
-torchrun --nproc_per_node=8 -m scripts.base_train -- --depth=8
-```
-**Duration**: 1.5-2 hours on 8x H100
-**Output**: Base checkpoint with general language understanding
-**Metrics**: Validation loss, CORE benchmark scores
-### Stage 2: Mid-Training (12-15% of time)
-**What happens**: Teach conversation format and special tokens
-```bash
-torchrun --nproc_per_node=8 -m scripts.mid_train
-```
-**Duration**: 30 minutes
-**Output**: Conversational checkpoint
-**Metrics**: Format adherence, tool use capability
-### Stage 3: Supervised Fine-Tuning (12-15% of time)
-**What happens**: Fine-tune on AQuA-RAT with ground-truth solutions
-```bash
-torchrun --nproc_per_node=8 -m scripts.sft_train -- \
-  --aqua_train_examples=20000 \
-  --aqua_val_examples=254
-```
-**Duration**: 30 minutes
-**Output**: AQuA-tuned checkpoint
-**Metrics**: Dev set accuracy (categorical)
-### Stage 4: Reinforcement Learning (12-15% of time)
-**What happens**: Policy gradient learning with GRPO algorithm
-```bash
-torchrun --nproc_per_node=1 -m scripts.chat_rl -- \
-  --temperature=0.7 \
-  --max_new_tokens=64
-```
-**Duration**: 30 minutes
-**Algorithm**: Group Relative Policy Optimization (GRPO)
-**Reward**: +1.0 for correct letter, +0.1 for valid letter format
-**Output**: RL-optimized checkpoint
-**Logged metrics**:
-- `rl/acc` - Accuracy on training samples
-- `rl/mean_reward` - Average reward per generation
-- `rl/kl_letter_mean` - KL divergence at decision point
-- `rl/kl_sequence_mean` - Full sequence KL
-- `rl/letter_margin_mean` - Confidence (logit gap)
-- `attn/entropy_mean` - Attention mechanism patterns
----
-## Quick Start
-### Repo Setup & Rust Toolchain
-- Clone with submodules so the `rustbpe` tokenizer sources are present:
-  ```bash
-  git clone --recurse-submodules https://github.com/HarleyCoops/nanochatAquaRat.git
-  ```
-  For existing clones run `git submodule update --init --recursive` before building.
-- Install Rust (needed for the tokenizer build). On Linux/macOS follow [https://rustup.rs](https://rustup.rs). On Windows, after installing rustup, ensure the toolchain is MSVC x86\_64 and the cargo bin directory is on `PATH`:
-  ```powershell
-  $env:Path += ";$env:USERPROFILE\.cargo\bin"
-  setx PATH "$env:Path"
-  setx CARGO_HOME "$env:USERPROFILE\.cargo"
-  setx RUSTUP_HOME "$env:USERPROFILE\.rustup"
-  rustup set default-host x86_64-pc-windows-msvc
-  rustup default stable-x86_64-pc-windows-msvc
-  cargo --version
-  rustup --version
-  ```
-- Build the tokenizer once per machine:
-  ```bash
-  uv run maturin develop
-  ```
-### Option 1: Lambda Labs Cloud (Automated)
-Use the automation helper for one-command deployment:
-```bash
-# Set credentials
-export LAMBDA_API_KEY='your-lambda-api-key'
-export WANDB_API_KEY='your-wandb-api-key'
-# Launch with auto-start
-python scripts/launch_lambda_training.py \
-  --ssh-key-name your_lambda_ssh_key \
-  --instance-type gpu_8x_h100_sxm5 \
-  --region us-west-1 \
-  --auto-start \
-  --inject-env WANDB_API_KEY
-```
-The script provisions the instance, clones this repository, sets up environment variables, and starts training in a tmux session.
-**Monitor training**:
-```bash
-# SSH to instance
-ssh ubuntu@<INSTANCE_IP>
-# Attach to tmux session
-tmux attach -t nanochat-train
-# Or view logs
-tail -f ~/nanochatAquaRat/training.log
-```
-### Option 2: Hyperbolic Labs Cloud (Automated)
-Spin up on-demand GPUs via Hyperbolic's marketplace API:
-```bash
-# Set credentials
-export HYPERBOLIC_API_KEY='your-hyperbolic-api-key'
-export WANDB_API_KEY='your-wandb-api-key'
-# Launch with auto-start
-python scripts/launch_hyperbolic_training.py \
-  --gpu-count 1 \
-  --region us-east \
-  --auto-start \
-  --inject-env WANDB_API_KEY
-```
-The launcher discovers an available node (respecting `--region`, `--supplier`, or `--max-price` filters), provisions it, copies your `.env`, and optionally starts training in tmux. Use `--list` to inspect available marketplace inventory without launching.
-### Option 3: Lambda Labs Cloud (Manual)
-For step-by-step control, see [LAMBDA_MANUAL_SETUP.md](LAMBDA_MANUAL_SETUP.md).
-**Quick summary**:
-1. Launch instance at https://cloud.lambdalabs.com/instances
-2. SSH to instance: `ssh ubuntu@<IP>`
-3. Clone repo: `git clone <repo-url> && cd nanochatAquaRat`
-4. Set up credentials: `echo "WANDB_API_KEY=..." > .env`
-5. Run training: `bash run_aquarat_small.sh`
-### Option 4: Hyperbolic VM (Manual)
-For marketplace nodes without automation access, follow this lightweight bootstrap:
-1. Provision a GPU VM from the Hyperbolic console and copy the SSH command (including `-p <port>` and username).
-2. SSH in and install prerequisites:
-   ```bash
-   sudo apt-get update
-   sudo apt-get install -y git curl unzip build-essential python3 python3-venv tmux
-   git clone https://github.com/HarleyCoops/nanochatAquaRat.git
-   cd nanochatAquaRat
-   ```
-3. Create `.env` with the required keys (WANDB, GCS bucket, AQUA path) and upload your GCP service-account JSON to the VM, e.g. `scp -P <port> C:\path\to\credentials.json user@<ip>:/home/user/gcp-sa.json`.
-4. Install tooling and build the tokenizer:
-   ```bash
-   curl -LsSf https://astral.sh/uv/install.sh | sh
-   curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
-   source "$HOME/.cargo/env"
-   export PATH="$HOME/.local/bin:$PATH"
-   uv venv && uv sync --extra gpu
-   source .venv/bin/activate
-   uv run maturin develop
-    uv run python -m scripts.tok_train
-   ```
-5. Install the Google Cloud SDK, authenticate, and stage the cached AQuA splits (or regenerate them):
-   ```bash
-   curl -sSL https://sdk.cloud.google.com | bash
-   source "$HOME/.bashrc"
-   gcloud auth login --no-launch-browser
-   gcloud config set project <your-project-id>
-   gcloud storage cp gs://nanochat-aquarat-datasets/datasets/aqua/aqua_cache.zip .
-   unzip -o aqua_cache.zip -d ~/aqua_cache
-   export AQUA_DATA_DIR=$HOME/aqua_cache
-   ```
-6. Fetch the identity conversation bundle (required for SFT) and the evaluation bundle once so CORE metrics don’t fail:
-   ```bash
-   cd ~/.cache/nanochat
-   curl -L -o identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl
-   curl -L -o eval_bundle.zip https://karpathy-public.s3.us-west-2.amazonaws.com/eval_bundle.zip
-   unzip -q eval_bundle.zip && rm eval_bundle.zip
-   cd ~/nanochatAquaRat
-   ```
-7. Launch the desired script, e.g. `CUDA_VISIBLE_DEVICES=0 bash run_aquarat_lite.sh` or the full `run_aquarat_small.sh`.
-8. Monitor training via tmux/W&B and terminate the VM from Hyperbolic when the run finishes to stop billing.
-### Option 4: Alternative Launcher Script
-A simplified launcher is also available:
-```bash
-export LAMBDA_API_KEY='your-key'
-export WANDB_API_KEY='your-key'
-python launch_lambda.py \
-  --instance-type gpu_8x_h100_sxm5 \
-  --region us-west-1
-```
-See [QUICKSTART.md](QUICKSTART.md) for details.
 ### Option 5: Local/Custom Setup
 ```bash
@@ -464,175 +464,191 @@ bash run_aquarat_small.sh
 - 40GB+ GPU memory per GPU
 - ~100GB disk space
----
-## File Structure
-```
-nanochatAquaRat/
-├── nanochat/…                         # Vendored upstream nanochat package
-├── scripts/
-│   ├── base_train.py                  # Base pretraining stage
-│   ├── mid_train.py                   # Mid-training (now includes AQuA)
-│   ├── chat_sft.py                    # Chat SFT pipeline
-│   ├── sft_train.py                   # Shim so `-m scripts.sft_train` still works
-│   ├── chat_rl.py                     # Reinforcement learning on AQuA-RAT
-│   ├── chat_eval.py                   # Evaluation harness (adds AQuA task)
-│   ├── prepare_aqua.py                # AQuA-RAT JSONL exporter
-│   ├── launch_lambda_training.py      # Lambda Labs automation
-│   ├── launch_hyperbolic_training.py  # Hyperbolic Labs automation
-│   └── upload_to_gcs.sh               # Artifact helper
-├── tasks/
-│   ├── aqua.py                        # AQuA-RAT task implementation
-│   ├── arc.py / gsm8k.py / mmlu.py    # Other reasoning tasks
-│   └── …
-├── run_aquarat_small.sh               # End-to-end orchestration
-├── pyproject.toml / uv.lock           # Environment definitions
-└── README.md
-```
-### Summary of Code Changes
-| File | Type | Description |
-|------|------|-------------|
-| `tasks/aqua.py` | NEW | Conversation + evaluation wrapper for AQuA-RAT |
-| `scripts/prepare_aqua.py` | NEW | Materializes train/validation/test JSONL splits for offline use |
-| `scripts/mid_train.py` | MODIFIED | Adds AQuA to the mid-training mixture |
-| `scripts/chat_sft.py` | MODIFIED | SFT mixture now includes AQuA controls |
-| `scripts/sft_train.py` | NEW | Thin compatibility shim around `chat_sft` |
-| `scripts/chat_rl.py` | MODIFIED | RL loop retargeted from GSM8K to AQuA-RAT |
-| `scripts/chat_eval.py` | MODIFIED | Registers AQuA for categorical evaluation |
-| `run_aquarat_small.sh` | MODIFIED | Pipeline glue aligned with AQuA staging |
-| `scripts/launch_hyperbolic_training.py` | NEW | Hyperbolic Labs automation helper |
-| `launch_lambda.py` / `scripts/launch_lambda_training.py` | EXISTING | Lambda Labs support retained |
 ---
-## Monitoring & Visualization
-All metrics stream to [Weights & Biases](https://wandb.ai) in real-time:
-**Training Metrics**:
-- Loss curves (pretraining, SFT, RL)
-- Learning rate schedules
-- Gradient norms
-**RL Metrics**:
-- Policy performance (accuracy, rewards)
-- KL divergence from initial policy
-- Letter-choice distributions (A-E)
-- Confidence margins
-**Interpretability**:
-- Attention heatmaps per layer
-- Entropy evolution across training
-- Token-level attention weights
-Example W&B dashboard:
-```
-rl/acc                    ━━━━━━━━━━ 0.45
-rl/kl_letter_mean        ━━━━━━━━━━ 0.12
-rl/letter_margin_mean    ━━━━━━━━━━ 2.34
-attn/entropy_mean        ━━━━━━━━━━ 3.21
 ```
----
-## Results
-### Model Configurations
-| Depth | Parameters | Training Time | Best Instance Type | Estimated Cost |
-|-------|------------|---------------|-------------------|----------------|
-| 8     | ~60M       | 3-4 hours     | 1-2x A100        | ~$18-35        |
-| 12    | ~180M      | 4-5 hours     | 4x A100          | ~$35-45        |
-| 20    | ~561M      | 6-8 hours     | 8x H100          | ~$144-192      |
-| 26    | ~1.1B      | 10-12 hours   | 8x H100          | ~$240-288      |
-To change model depth, edit the `--depth` parameter in `run_aquarat_small.sh`.
-### Expected Performance
-**After SFT** (before RL):
-- Dev accuracy: 20-30% (depth-8), 30-40% (depth-20)
-- Basic problem-solving capability
-- Some format errors (invalid letters)
-**After RL**:
-- Dev accuracy: 30-50% (depth-8), 40-60% (depth-20)
-- Improved reasoning coherence
-- Better multiple-choice selection confidence
-- Reduced format errors
-- Stable attention patterns
-### Cost Management
-Lambda Labs pricing (8x H100 SXM5 @ ~$24/hour):
-| Model | Training Time | Total Cost |
-|-------|---------------|------------|
-| depth-8 (60M) | 3-4 hours | ~$96 |
-| depth-20 (561M) | 6-8 hours | ~$192 |
-Budget options:
-- Test pipeline: 1x A10 @ $0.60/hr
-- Small model: 2x A100 @ $4.40/hr
-- Production: 8x H100 @ $24/hr
----
-## Important Notes
-### For Lambda Labs Users
-- **Always terminate instances** after training to avoid charges
-- Monitor spending in the Lambda Labs dashboard
-- Check instance availability before launching (high demand periods)
-### Known Limitations
-- RL on AQuA-RAT is experimental; results may vary
-- Attention logging adds ~5-10% overhead
-- KL computation can be expensive with large batch sizes
-- Smaller models (<100M params) may struggle with complex reasoning
----
-## Documentation
-- **[scripts/launch_lambda_training.py](scripts/launch_lambda_training.py)** - Full-featured automation
-- **[scripts/launch_hyperbolic_training.py](scripts/launch_hyperbolic_training.py)** - Hyperbolic marketplace automation
-- **[launch_lambda.py](launch_lambda.py)** - Simplified launcher
-- **[QUICKSTART.md](QUICKSTART.md)** - Fast track guide
-- **[LAMBDA_MANUAL_SETUP.md](LAMBDA_MANUAL_SETUP.md)** - Manual setup walkthrough
-- **[GCS_UPLOAD_GUIDE.md](GCS_UPLOAD_GUIDE.md)** - Upload weights to Google Cloud Storage
-- **[.env.template](.env.template)** - Environment configuration
----
-## Contributing
-This project is based on the nanochat framework. For issues specific to:
-- **AQuA-RAT training**: Open an issue in this repository
-- **Base nanochat framework**: Refer to the upstream nanochat project
-- **Lambda Labs deployment**: See documentation above
----
-## License
-This project inherits the license from the base nanochat project.
----
-## Acknowledgments
-- **Andrej Karpathy** - nanochat framework
-- **DeepMind** - AQuA-RAT dataset and mechanistic interpretability tools
-- **Lambda Labs** - Cloud GPU infrastructure
-- **Weights & Biases** - Experiment tracking and visualization
----
-## Support
-- **Lambda Labs Support**: https://lambdalabs.com/support
-- **Weights & Biases Docs**: https://docs.wandb.ai
-- **Project Issues**: https://github.com/HarleyCoops/nanochatAquaRat/issues

 ---
 <div align="center">
+![AQuA-RAT Training](./aquarat2.png)
+# nanochatAquaRat
+**Training Language Models with Reinforcement Learning on Mathematical Reasoning**
+[![GitHub](https://img.shields.io/badge/GitHub-Repository-blue?logo=github)](https://github.com/HarleyCoops/nanochatAquaRat)
+[![License](https://img.shields.io/badge/License-MIT-green.svg)](LICENSE)
+[![Python 3.8+](https://img.shields.io/badge/Python-3.8+-blue.svg)](https://www.python.org/downloads/)
+A modified version of [nanochat](https://github.com/karpathy/nanochat) trained with reinforcement learning on the [DeepMind AQuA-RAT dataset](https://huggingface.co/datasets/deepmind/aqua_rat) for algebraic reasoning and multiple-choice problem solving.
+[Quick Start](#quick-start) • [Dataset](#dataset-structure) • [Modifications](#modifications-from-base-nanochat) • [Training](#training-pipeline) • [Results](#results)
+</div>
+---
+## Table of Contents
+- [Overview](#overview)
+- [The Base: nanochat Framework](#the-base-nanochat-framework)
+- [Dataset Structure](#dataset-structure)
+- [Modifications from Base nanochat](#modifications-from-base-nanochat)
+- [Training Pipeline](#training-pipeline)
+- [Quick Start](#quick-start)
+- [File Structure](#file-structure)
+- [Monitoring & Visualization](#monitoring--visualization)
+- [Results](#results)
+---
+## Overview
+This project adapts the **nanochat** training framework (originally designed for GSM8K numerical reasoning) to work with **AQuA-RAT** (Algebra Question Answering with Rationales), a dataset of ~97,000 algebraic word problems with multiple-choice answers (A-E) and natural language solution rationales.
+### Why This Matters
+- **Domain Transfer**: Demonstrates how to adapt a mathematical reasoning pipeline from free-form numeric answers to multiple-choice format
+- **RL on Math**: Implements GRPO-style reinforcement learning with reward shaping for categorical outputs
+- **Mechanistic Interpretability**: Integrates attention analysis during training to understand model reasoning patterns
+- **Production-Ready**: Includes automated Lambda Labs and Hyperbolic Labs deployment helpers for cloud GPU training
+### Key Results
+| Model | Parameters | Training Time | AQuA-RAT Dev Accuracy |
+|-------|------------|---------------|----------------------|
+| depth-8 | ~60M | 3-4 hours | 30-50% |
+| depth-20 | ~561M | 6-8 hours | 40-60% |
+---
+## The Base: nanochat Framework
+**nanochat** is a minimalist yet complete pipeline for training transformer language models from scratch, created by Andrej Karpathy. It implements:
+- **Custom tokenizer**: BPE tokenizer written in Rust for performance
+- **Training stages**: Pretraining → Mid-training → SFT → RL
+- **Evaluation suite**: CORE benchmarks and task-specific metrics
+- **Optimizations**: Memory-efficient training, gradient accumulation, distributed training
+**Original focus**: Training on GSM8K (Grade School Math 8K) with free-form numeric answers.
+---
+## Dataset Structure
+### AQuA-RAT Format
+The [DeepMind AQuA-RAT dataset](https://github.com/deepmind/AQuA) contains algebraic reasoning problems in JSON format:
+```json
+{
+  "question": "A person is traveling at 20 km/hr and reached his destiny in 2.5 hr then find the distance?",
+  "options": [
+    "A) 53 km",
+    "B) 55 km",
+    "C) 52 km",
+    "D) 60 km",
+    "E) 50 km"
+  ],
+  "rationale": "The distance that the person traveled = 20 * 2.5 = 50 km. Answer: E",
+  "correct": "E"
+}
+```
+**Dataset splits**:
+- Training: 97,467 problems
+- Development: 254 problems
+- Test: 254 problems
+**Key characteristics**:
+- Multiple-choice (A-E) format
+- Algebraic word problems
+- Natural language rationales
+- Topics: arithmetic, algebra, geometry, probability
+### Comparison: GSM8K vs AQuA-RAT
+| Aspect | GSM8K (Original) | AQuA-RAT (This Project) |
+|--------|------------------|-------------------------|
+| **Format** | Free-form numeric | Multiple choice (A-E) |
+| **Answer** | Single number | Letter choice |
+| **Size** | 8,500 problems | 97,700 problems |
+| **Difficulty** | Elementary school | High school algebra |
+| **Rationale** | Step-by-step | Natural language |
+| **Evaluation** | Exact match on number | Categorical accuracy |
+---
+## Modifications from Base nanochat
+To adapt nanochat from GSM8K to AQuA-RAT, we modified the following components:
+### 1. Dataset Loader (`scripts/prepare_aqua.py`)
+**Created new file** to download and format AQuA-RAT:
+```python
+# New file: scripts/prepare_aqua.py
+### 1. Dataset Preparation (`scripts/prepare_aqua.py`)
+- Uses `datasets.load_dataset("deepmind/aqua_rat")` and optionally caps split sizes.
+- Emits JSONL files (`train.jsonl`, `validation.jsonl`, `test.jsonl`) compatible with
+  the conversation schema used throughout nanochat.
+- Defaults to `~/.cache/nanochat/aqua`, but accepts `--output_dir` overrides so
+  launchers can bundle their own artifact.
+```python
+def format_example(row):
+    options = row["options"]
+    assistant_content = [
+        {"type": "text", "text": row["rationale"].strip()},
+        {"type": "text", "text": f"Answer: {row['correct'].strip().upper()}"},
+    ]
+    return {
+        "messages": [
+            {"role": "user", "content": _render_user_prompt(row["question"], options)},
+            {"role": "assistant", "content": assistant_content},
+        ],
+        "letters": letters,
+        "answer_letter": correct,
+    }
+```
+### 2. Task Module (`tasks/aqua.py`)
+- Accepts optional `data_dir` (or `AQUA_DATA_DIR` / `NANOCHAT_AQUA_DIR`) so the task
+  can read the cached JSONL; otherwise falls back to Hugging Face.
+- Provides `_render_user_prompt` to format the question/options using the common
+  multiple-choice helper and `_extract_letter` to score completions.
+- Returns conversations whose assistant messages include both the rationale and a
+  final `Answer: <LETTER>` line for SFT, while `evaluate()` only cares about the letter.
+```python
+def _extract_letter(text, default=None):
+    answer_match = re.search(r"answer\s*[:\-]\s*([A-E])", text, flags=re.IGNORECASE)
+    if answer_match:
+        return answer_match.group(1).upper()
+    match = LETTER_RE.search(text)
+    return match.group(1).upper() if match else default
+```
+**Key differences from GSM8K**:
+- Numeric extraction → Letter extraction
+- Free-form answer → Fixed choices A-E
+- Exact number match → Categorical match
+### 3. RL Training (`scripts/chat_rl.py`)
+**Modified** to support both GSM8K and AQuA-RAT:
+Key updates:
+- `train_task` / `val_task` now instantiate `AQUA(...)` instead of `GSM8K(...)`.
+- Rewards reuse the task's `evaluate()` helper so any completion containing
+  “Answer: X” (or the first bare letter) is scored correctly.
+- The validation helper became `run_aqua_eval`, still reporting pass@k accuracy
+  across sampled completions.
+- CLI overrides remain the same because the script continues to rely on the
+  nanochat configurator (`--run`, `--temperature`, `--max_new_tokens`, …).
+### 4. Evaluation (`scripts/chat_eval.py`)
+- Registered `'AQUA'` in the task registry so `-a AQUA` just works.
+- Added a 20% random-guess baseline when aggregating the ChatCORE metric.
+- The categorical evaluation path reuses `run_categorical_eval`, clamping logits
+  to the available letters before scoring.
+### 5. Training Script (`run_aquarat_small.sh`)
+**What changed vs upstream nanochat**:
+```bash
+# (Optional) Cache the dataset locally as JSONL
+python -m scripts.prepare_aqua --output_dir "$NANOCHAT_BASE_DIR/aqua"
+# Mid-training now samples from the AQuA mixture
+torchrun -m scripts.mid_train -- --run=demo --num_iterations=200
+# SFT stage emphasises AQuA problems
+torchrun -m scripts.sft_train -- --run=demo --aqua_train_examples=20000
+# RL fine-tuning rewards the correct letter on AQuA-RAT
+torchrun -m scripts.chat_rl -- --run=demo --temperature=0.7 --max_new_tokens=64
+```
+- **`tasks/aqua.py`** loads AQuA-RAT either from Hugging Face or the cached JSONL
+  splits, formats questions as conversations, and scores completions by letter.
+- **`scripts/mid_train.py`** extends the original Reasoning+Chat mixture with a
+  50k slice of AQuA so the model sees multiple-choice algebra earlier.
+- **`scripts/chat_sft.py`** replaces the GSM8K component with AQuA, keeping ARC,
+  SmolTalk, and identity prompts for general chat coverage.
+- **`scripts/chat_rl.py`** retools the GRPO loop to sample, reward, and evaluate
+  AQuA answers (categorical accuracy instead of GSM8K free-form math).
+- **`scripts/chat_eval.py`** registers the new AQuA task so `chat_eval` can report
+  categorical accuracy alongside ARC/MMLU/GSM8K/HumanEval.
+---
+## Training Pipeline
+### Stage 1: Base Pretraining (50-60% of time)
+**What happens**: Model learns language from scratch on FineWeb corpus
+```bash
+torchrun --nproc_per_node=8 -m scripts.base_train -- --depth=8
+```
+**Duration**: 1.5-2 hours on 8x H100
+**Output**: Base checkpoint with general language understanding
+**Metrics**: Validation loss, CORE benchmark scores
+### Stage 2: Mid-Training (12-15% of time)
+**What happens**: Teach conversation format and special tokens
+```bash
+torchrun --nproc_per_node=8 -m scripts.mid_train
+```
+**Duration**: 30 minutes
+**Output**: Conversational checkpoint
+**Metrics**: Format adherence, tool use capability
+### Stage 3: Supervised Fine-Tuning (12-15% of time)
+**What happens**: Fine-tune on AQuA-RAT with ground-truth solutions
+```bash
+torchrun --nproc_per_node=8 -m scripts.sft_train -- \
+  --aqua_train_examples=20000 \
+  --aqua_val_examples=254
+```
+**Duration**: 30 minutes
+**Output**: AQuA-tuned checkpoint
+**Metrics**: Dev set accuracy (categorical)
+### Stage 4: Reinforcement Learning (12-15% of time)
+**What happens**: Policy gradient learning with GRPO algorithm
+```bash
+torchrun --nproc_per_node=1 -m scripts.chat_rl -- \
+  --temperature=0.7 \
+  --max_new_tokens=64
+```
+**Duration**: 30 minutes
+**Algorithm**: Group Relative Policy Optimization (GRPO)
+**Reward**: +1.0 for correct letter, +0.1 for valid letter format
+**Output**: RL-optimized checkpoint
+**Logged metrics**:
+- `rl/acc` - Accuracy on training samples
+- `rl/mean_reward` - Average reward per generation
+- `rl/kl_letter_mean` - KL divergence at decision point
+- `rl/kl_sequence_mean` - Full sequence KL
+- `rl/letter_margin_mean` - Confidence (logit gap)
+- `attn/entropy_mean` - Attention mechanism patterns
+---
+## Quick Start
+### Repo Setup & Rust Toolchain
+- Clone with submodules so the `rustbpe` tokenizer sources are present:
+  ```bash
+  git clone --recurse-submodules https://github.com/HarleyCoops/nanochatAquaRat.git
+  ```
+  For existing clones run `git submodule update --init --recursive` before building.
+- Install Rust (needed for the tokenizer build). On Linux/macOS follow [https://rustup.rs](https://rustup.rs). On Windows, after installing rustup, ensure the toolchain is MSVC x86\_64 and the cargo bin directory is on `PATH`:
+  ```powershell
+  $env:Path += ";$env:USERPROFILE\.cargo\bin"
+  setx PATH "$env:Path"
+  setx CARGO_HOME "$env:USERPROFILE\.cargo"
+  setx RUSTUP_HOME "$env:USERPROFILE\.rustup"
+  rustup set default-host x86_64-pc-windows-msvc
+  rustup default stable-x86_64-pc-windows-msvc
+  cargo --version
+  rustup --version
+  ```
+- Build the tokenizer once per machine:
+  ```bash
+  uv run maturin develop
+  ```
+### Option 1: Lambda Labs Cloud (Automated)
+Use the automation helper for one-command deployment:
+```bash
+# Set credentials
+export LAMBDA_API_KEY='your-lambda-api-key'
+export WANDB_API_KEY='your-wandb-api-key'
+# Launch with auto-start
+python scripts/launch_lambda_training.py \
+  --ssh-key-name your_lambda_ssh_key \
+  --instance-type gpu_8x_h100_sxm5 \
+  --region us-west-1 \
+  --auto-start \
+  --inject-env WANDB_API_KEY
+```
+The script provisions the instance, clones this repository, sets up environment variables, and starts training in a tmux session.
+**Monitor training**:
+```bash
+# SSH to instance
+ssh ubuntu@<INSTANCE_IP>
+# Attach to tmux session
+tmux attach -t nanochat-train
+# Or view logs
+tail -f ~/nanochatAquaRat/training.log
+```
+### Option 2: Hyperbolic Labs Cloud (Automated)
+Spin up on-demand GPUs via Hyperbolic's marketplace API:
+```bash
+# Set credentials
+export HYPERBOLIC_API_KEY='your-hyperbolic-api-key'
+export WANDB_API_KEY='your-wandb-api-key'
+# Launch with auto-start
+python scripts/launch_hyperbolic_training.py \
+  --gpu-count 1 \
+  --region us-east \
+  --auto-start \
+  --inject-env WANDB_API_KEY
+```
+The launcher discovers an available node (respecting `--region`, `--supplier`, or `--max-price` filters), provisions it, copies your `.env`, and optionally starts training in tmux. Use `--list` to inspect available marketplace inventory without launching.
+### Option 3: Lambda Labs Cloud (Manual)
+For step-by-step control, see [LAMBDA_MANUAL_SETUP.md](LAMBDA_MANUAL_SETUP.md).
+**Quick summary**:
+1. Launch instance at https://cloud.lambdalabs.com/instances
+2. SSH to instance: `ssh ubuntu@<IP>`
+3. Clone repo: `git clone <repo-url> && cd nanochatAquaRat`
+4. Set up credentials: `echo "WANDB_API_KEY=..." > .env`
+5. Run training: `bash run_aquarat_small.sh`
+### Option 4: Hyperbolic VM (Manual)
+For marketplace nodes without automation access, follow this lightweight bootstrap:
+1. Provision a GPU VM from the Hyperbolic console and copy the SSH command (including `-p <port>` and username).
+2. SSH in and install prerequisites:
+   ```bash
+   sudo apt-get update
+   sudo apt-get install -y git curl unzip build-essential python3 python3-venv tmux
+   git clone https://github.com/HarleyCoops/nanochatAquaRat.git
+   cd nanochatAquaRat
+   ```
+3. Create `.env` with the required keys (WANDB, GCS bucket, AQUA path) and upload your GCP service-account JSON to the VM, e.g. `scp -P <port> C:\path\to\credentials.json user@<ip>:/home/user/gcp-sa.json`.
+4. Install tooling and build the tokenizer:
+   ```bash
+   curl -LsSf https://astral.sh/uv/install.sh | sh
+   curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain stable
+   source "$HOME/.cargo/env"
+   export PATH="$HOME/.local/bin:$PATH"
+   uv venv && uv sync --extra gpu
+   source .venv/bin/activate
+   uv run maturin develop
+    uv run python -m scripts.tok_train
+   ```
+5. Install the Google Cloud SDK, authenticate, and stage the cached AQuA splits (or regenerate them):
+   ```bash
+   curl -sSL https://sdk.cloud.google.com | bash
+   source "$HOME/.bashrc"
+   gcloud auth login --no-launch-browser
+   gcloud config set project <your-project-id>
+   gcloud storage cp gs://nanochat-aquarat-datasets/datasets/aqua/aqua_cache.zip .
+   unzip -o aqua_cache.zip -d ~/aqua_cache
+   export AQUA_DATA_DIR=$HOME/aqua_cache
+   ```
+6. Fetch the identity conversation bundle (required for SFT) and the evaluation bundle once so CORE metrics don’t fail:
+   ```bash
+   cd ~/.cache/nanochat
+   curl -L -o identity_conversations.jsonl https://karpathy-public.s3.us-west-2.amazonaws.com/identity_conversations.jsonl
+   curl -L -o eval_bundle.zip https://karpathy-public.s3.us-west-2.amazonaws.com/eval_bundle.zip
+   unzip -q eval_bundle.zip && rm eval_bundle.zip
+   cd ~/nanochatAquaRat
+   ```
+7. Launch the desired script, e.g. `CUDA_VISIBLE_DEVICES=0 bash run_aquarat_lite.sh` or the full `run_aquarat_small.sh`.
+8. Monitor training via tmux/W&B and terminate the VM from Hyperbolic when the run finishes to stop billing.
+### Option 4: Alternative Launcher Script
+A simplified launcher is also available:
+```bash
+export LAMBDA_API_KEY='your-key'
+export WANDB_API_KEY='your-key'
+python launch_lambda.py \
+  --instance-type gpu_8x_h100_sxm5 \
+  --region us-west-1
+```
+See [QUICKSTART.md](QUICKSTART.md) for details.
 ### Option 5: Local/Custom Setup
 ```bash
 - 40GB+ GPU memory per GPU
 - ~100GB disk space
+## Hugging Face Sync
+Keep the GitHub docs mirrored with the Hugging Face model card:
+1. Edit `README.md` (and any linked docs) as usual.
+2. Stage the release payload locally:
+   ```bash
+   uv run python -m scripts.sync_hf_repo --no-push
+   ```
+   This copies every README dependency into `hf_release/`. The script warns if a referenced file such as `LICENSE` is missing.
+3. Push the staged contents to Hugging Face once you are satisfied:
+   ```bash
+   uv run python -m scripts.sync_hf_repo --repo-id HarleyCooper/nanochatAquaRat
+   ```
+   The command requires prior `huggingface-cli login` (or an `HF_TOKEN` env var). Use `--dry-run` to review operations without copying or uploading.
 ---
+## File Structure
 ```
+nanochatAquaRat/
+├── nanochat/…                         # Vendored upstream nanochat package
+├── scripts/
+│   ├── base_train.py                  # Base pretraining stage
+│   ├── mid_train.py                   # Mid-training (now includes AQuA)
+│   ├── chat_sft.py                    # Chat SFT pipeline
+│   ├── sft_train.py                   # Shim so `-m scripts.sft_train` still works
+│   ├── chat_rl.py                     # Reinforcement learning on AQuA-RAT
+│   ├── chat_eval.py                   # Evaluation harness (adds AQuA task)
+│   ├── prepare_aqua.py                # AQuA-RAT JSONL exporter
+│   ├── launch_lambda_training.py      # Lambda Labs automation
+│   ├── launch_hyperbolic_training.py  # Hyperbolic Labs automation
+│   └── upload_to_gcs.sh               # Artifact helper
+├── tasks/
+│   ├── aqua.py                        # AQuA-RAT task implementation
+│   ├── arc.py / gsm8k.py / mmlu.py    # Other reasoning tasks
+│   └── …
+├── run_aquarat_small.sh               # End-to-end orchestration
+├── pyproject.toml / uv.lock           # Environment definitions
+└── README.md
+```
+### Summary of Code Changes
+| File | Type | Description |
+|------|------|-------------|
+| `tasks/aqua.py` | NEW | Conversation + evaluation wrapper for AQuA-RAT |
+| `scripts/prepare_aqua.py` | NEW | Materializes train/validation/test JSONL splits for offline use |
+| `scripts/mid_train.py` | MODIFIED | Adds AQuA to the mid-training mixture |
+| `scripts/chat_sft.py` | MODIFIED | SFT mixture now includes AQuA controls |
+| `scripts/sft_train.py` | NEW | Thin compatibility shim around `chat_sft` |
+| `scripts/chat_rl.py` | MODIFIED | RL loop retargeted from GSM8K to AQuA-RAT |
+| `scripts/chat_eval.py` | MODIFIED | Registers AQuA for categorical evaluation |
+| `run_aquarat_small.sh` | MODIFIED | Pipeline glue aligned with AQuA staging |
+| `scripts/launch_hyperbolic_training.py` | NEW | Hyperbolic Labs automation helper |
+| `launch_lambda.py` / `scripts/launch_lambda_training.py` | EXISTING | Lambda Labs support retained |
+---
+## Monitoring & Visualization
+All metrics stream to [Weights & Biases](https://wandb.ai) in real-time:
+**Training Metrics**:
+- Loss curves (pretraining, SFT, RL)
+- Learning rate schedules
+- Gradient norms
+**RL Metrics**:
+- Policy performance (accuracy, rewards)
+- KL divergence from initial policy
+- Letter-choice distributions (A-E)
+- Confidence margins
+**Interpretability**:
+- Attention heatmaps per layer
+- Entropy evolution across training
+- Token-level attention weights
+Example W&B dashboard:
+```
+rl/acc                    ━━━━━━━━━━ 0.45
+rl/kl_letter_mean        ━━━━━━━━━━ 0.12
+rl/letter_margin_mean    ━━━━━━━━━━ 2.34
+attn/entropy_mean        ━━━━━━━━━━ 3.21
+```
+---
+## Results
+### Model Configurations
+| Depth | Parameters | Training Time | Best Instance Type | Estimated Cost |
+|-------|------------|---------------|-------------------|----------------|
+| 8     | ~60M       | 3-4 hours     | 1-2x A100        | ~$18-35        |
+| 12    | ~180M      | 4-5 hours     | 4x A100          | ~$35-45        |
+| 20    | ~561M      | 6-8 hours     | 8x H100          | ~$144-192      |
+| 26    | ~1.1B      | 10-12 hours   | 8x H100          | ~$240-288      |
+To change model depth, edit the `--depth` parameter in `run_aquarat_small.sh`.
+### Expected Performance
+**After SFT** (before RL):
+- Dev accuracy: 20-30% (depth-8), 30-40% (depth-20)
+- Basic problem-solving capability
+- Some format errors (invalid letters)
+**After RL**:
+- Dev accuracy: 30-50% (depth-8), 40-60% (depth-20)
+- Improved reasoning coherence
+- Better multiple-choice selection confidence
+- Reduced format errors
+- Stable attention patterns
+### Cost Management
+Lambda Labs pricing (8x H100 SXM5 @ ~$24/hour):
+| Model | Training Time | Total Cost |
+|-------|---------------|------------|
+| depth-8 (60M) | 3-4 hours | ~$96 |
+| depth-20 (561M) | 6-8 hours | ~$192 |
+Budget options:
+- Test pipeline: 1x A10 @ $0.60/hr
+- Small model: 2x A100 @ $4.40/hr
+- Production: 8x H100 @ $24/hr
+---
+## Important Notes
+### For Lambda Labs Users
+- **Always terminate instances** after training to avoid charges
+- Monitor spending in the Lambda Labs dashboard
+- Check instance availability before launching (high demand periods)
+### Known Limitations
+- RL on AQuA-RAT is experimental; results may vary
+- Attention logging adds ~5-10% overhead
+- KL computation can be expensive with large batch sizes
+- Smaller models (<100M params) may struggle with complex reasoning
+---
+## Documentation
+- **[scripts/launch_lambda_training.py](scripts/launch_lambda_training.py)** - Full-featured automation
+- **[scripts/launch_hyperbolic_training.py](scripts/launch_hyperbolic_training.py)** - Hyperbolic marketplace automation
+- **[launch_lambda.py](launch_lambda.py)** - Simplified launcher
+- **[QUICKSTART.md](QUICKSTART.md)** - Fast track guide
+- **[LAMBDA_MANUAL_SETUP.md](LAMBDA_MANUAL_SETUP.md)** - Manual setup walkthrough
+- **[GCS_UPLOAD_GUIDE.md](GCS_UPLOAD_GUIDE.md)** - Upload weights to Google Cloud Storage
+- **[.env.template](.env.template)** - Environment configuration
+---
+## Contributing
+This project is based on the nanochat framework. For issues specific to:
+- **AQuA-RAT training**: Open an issue in this repository
+- **Base nanochat framework**: Refer to the upstream nanochat project
+- **Lambda Labs deployment**: See documentation above
+---
+## License
+This project inherits the license from the base nanochat project.
+---
+## Acknowledgments
+- **Andrej Karpathy** - nanochat framework
+- **DeepMind** - AQuA-RAT dataset and mechanistic interpretability tools
+- **Lambda Labs** - Cloud GPU infrastructure
+- **Weights & Biases** - Experiment tracking and visualization
+---
+## Support
+- **Lambda Labs Support**: https://lambdalabs.com/support
+- **Weights & Biases Docs**: https://docs.wandb.ai
+- **Project Issues**: https://github.com/HarleyCoops/nanochatAquaRat/issues

aquarat2.png ADDED Viewed

Git LFS Details

SHA256: 21ba080c26864a6e08c59fe26a7add4d08989850765b6f420914fa590cd3d49f
Pointer size: 131 Bytes
Size of remote file: 559 kB

launch_lambda.py ADDED Viewed

	@@ -0,0 +1,296 @@

+#!/usr/bin/env python3
+"""
+Lambda Labs GPU Instance Launcher for AQuA-RAT Training
+This script automates launching an 8x H100 GPU instance on Lambda Labs
+and deploying the nanochatAquaRat training pipeline.
+Prerequisites:
+1. Lambda Labs API key (set as LAMBDA_API_KEY environment variable)
+2. Your SSH public key added to Lambda Labs account
+3. W&B API key for logging (set as WANDB_API_KEY environment variable)
+Usage:
+    python launch_lambda.py --instance-type gpu_8x_h100_sxm5 --region us-west-1
+"""
+import os
+import sys
+import time
+import argparse
+import subprocess
+from pathlib import Path
+try:
+    import lambda_cloud_client
+    from lambda_cloud_client.rest import ApiException
+except ImportError:
+    print("Installing lambda-cloud-client...")
+    subprocess.check_call([sys.executable, "-m", "pip", "install", "lambda-cloud-client"])
+    import lambda_cloud_client
+    from lambda_cloud_client.rest import ApiException
+def check_env_vars():
+    """Check required environment variables are set"""
+    required_vars = {
+        'LAMBDA_API_KEY': 'Lambda Labs API key',
+        'WANDB_API_KEY': 'Weights & Biases API key'
+    }
+    missing = []
+    for var, description in required_vars.items():
+        if not os.getenv(var):
+            missing.append(f"  - {var} ({description})")
+    if missing:
+        print("ERROR: Missing required environment variables:")
+        print("\n".join(missing))
+        print("\nSet them with:")
+        print("  export LAMBDA_API_KEY='your-lambda-api-key'")
+        print("  export WANDB_API_KEY='your-wandb-api-key'")
+        sys.exit(1)
+def get_api_client():
+    """Initialize Lambda Cloud API client"""
+    api_key = os.getenv('LAMBDA_API_KEY')
+    configuration = lambda_cloud_client.Configuration(
+        host="https://cloud.lambdalabs.com/api/v1",
+        access_token=api_key
+    )
+    return lambda_cloud_client.ApiClient(configuration)
+def list_available_instance_types(api_client):
+    """List available instance types and regions"""
+    api_instance = lambda_cloud_client.DefaultApi(api_client)
+    try:
+        response = api_instance.instance_types()
+        print("\nAvailable Instance Types:")
+        print("-" * 80)
+        for type_name, details in response.data.items():
+            if details.instance_type.regions_with_capacity_available:
+                print(f"\n{type_name}:")
+                print(f"  GPUs: {details.instance_type.specs.gpus}")
+                print(f"  GPU Memory: {details.instance_type.specs.memory_gbs} GB")
+                print(f"  Price: ${details.instance_type.specs.price_cents_per_hour / 100}/hour")
+                print(f"  Available regions: {', '.join(details.instance_type.regions_with_capacity_available)}")
+        return response.data
+    except ApiException as e:
+        print(f"Error fetching instance types: {e}")
+        sys.exit(1)
+def launch_instance(api_client, instance_type, region, name="nanochat-aquarat-training"):
+    """Launch a Lambda Labs GPU instance"""
+    api_instance = lambda_cloud_client.DefaultApi(api_client)
+    # Get SSH keys
+    try:
+        ssh_keys_response = api_instance.list_ssh_keys()
+        if not ssh_keys_response.data:
+            print("ERROR: No SSH keys found in your Lambda Labs account.")
+            print("Please add an SSH key at: https://cloud.lambdalabs.com/ssh-keys")
+            sys.exit(1)
+        ssh_key_names = [key.name for key in ssh_keys_response.data]
+        print(f"Using SSH keys: {', '.join(ssh_key_names)}")
+    except ApiException as e:
+        print(f"Error fetching SSH keys: {e}")
+        sys.exit(1)
+    # Launch instance
+    launch_request = lambda_cloud_client.LaunchInstanceRequest(
+        region_name=region,
+        instance_type_name=instance_type,
+        ssh_key_names=ssh_key_names,
+        name=name,
+        quantity=1
+    )
+    print(f"\nLaunching {instance_type} instance in {region}...")
+    try:
+        response = api_instance.launch_instance(launch_request)
+        if response.data and response.data.instance_ids:
+            instance_id = response.data.instance_ids[0]
+            print(f"✓ Instance launched successfully!")
+            print(f"  Instance ID: {instance_id}")
+            return instance_id
+        else:
+            print("ERROR: Instance launch failed")
+            sys.exit(1)
+    except ApiException as e:
+        print(f"Error launching instance: {e}")
+        sys.exit(1)
+def wait_for_instance(api_client, instance_id, timeout=300):
+    """Wait for instance to be ready"""
+    api_instance = lambda_cloud_client.DefaultApi(api_client)
+    print("\nWaiting for instance to be ready...")
+    start_time = time.time()
+    while time.time() - start_time < timeout:
+        try:
+            response = api_instance.get_instance(instance_id)
+            instance = response.data
+            if instance.status == "active":
+                print(f"✓ Instance is ready!")
+                print(f"  IP Address: {instance.ip}")
+                print(f"  SSH Command: ssh ubuntu@{instance.ip}")
+                return instance
+            print(f"  Status: {instance.status}... waiting")
+            time.sleep(10)
+        except ApiException as e:
+            print(f"Error checking instance status: {e}")
+            time.sleep(10)
+    print("ERROR: Timeout waiting for instance to be ready")
+    sys.exit(1)
+def generate_startup_script():
+    """Generate the startup script to run on the instance"""
+    wandb_key = os.getenv('WANDB_API_KEY')
+    script = f"""#!/bin/bash
+set -euo pipefail
+# Create .env file with credentials
+cat > /home/ubuntu/nanochatAquaRat/.env << 'EOF'
+WANDB_API_KEY={wandb_key}
+WANDB_PROJECT=nanochat-aquarat
+WANDB_ENTITY=${{WANDB_ENTITY:-}}
+EOF
+# Clone repository if not exists
+cd /home/ubuntu
+if [ ! -d "nanochatAquaRat" ]; then
+    git clone https://github.com/HarleyCoops/nanochatAquaRat.git
+fi
+cd nanochatAquaRat
+# Make script executable
+chmod +x run_aquarat_small.sh
+# Run training in screen session
+screen -dmS training bash -c './run_aquarat_small.sh 2>&1 | tee training.log'
+echo "Training started in screen session 'training'"
+echo "To attach: screen -r training"
+echo "To detach: Ctrl+A then D"
+echo "To view log: tail -f training.log"
+"""
+    return script
+def deploy_and_run(instance_ip):
+    """Deploy code and start training on the instance"""
+    print("\nDeploying code and starting training...")
+    startup_script = generate_startup_script()
+    # Save startup script locally
+    script_path = Path("/tmp/lambda_startup.sh")
+    script_path.write_text(startup_script)
+    # Copy startup script to instance
+    print("  Copying startup script...")
+    subprocess.run([
+        "scp", "-o", "StrictHostKeyChecking=no",
+        str(script_path),
+        f"ubuntu@{instance_ip}:/tmp/startup.sh"
+    ], check=True)
+    # Execute startup script
+    print("  Starting training...")
+    subprocess.run([
+        "ssh", "-o", "StrictHostKeyChecking=no",
+        f"ubuntu@{instance_ip}",
+        "bash /tmp/startup.sh"
+    ], check=True)
+    print("\n" + "=" * 80)
+    print("✓ Training deployment complete!")
+    print("=" * 80)
+    print("\nTo monitor your training:")
+    print(f"  1. SSH: ssh ubuntu@{instance_ip}")
+    print(f"  2. Attach to screen: screen -r training")
+    print(f"  3. View log: tail -f ~/nanochatAquaRat/training.log")
+    print(f"  4. W&B Dashboard: https://wandb.ai")
+    print("\nTo detach from screen: Ctrl+A then D")
+    print("\nRemember to terminate the instance when done to avoid charges!")
+def main():
+    parser = argparse.ArgumentParser(description="Launch Lambda Labs instance for AQuA-RAT training")
+    parser.add_argument("--instance-type", default="gpu_8x_h100_sxm5",
+                       help="Instance type (default: gpu_8x_h100_sxm5)")
+    parser.add_argument("--region", default="us-west-1",
+                       help="Region to launch in (default: us-west-1)")
+    parser.add_argument("--name", default="nanochat-aquarat-training",
+                       help="Instance name (default: nanochat-aquarat-training)")
+    parser.add_argument("--list-types", action="store_true",
+                       help="List available instance types and exit")
+    parser.add_argument("--no-deploy", action="store_true",
+                       help="Launch instance but don't deploy code")
+    args = parser.parse_args()
+    print("=" * 80)
+    print("Lambda Labs GPU Instance Launcher for AQuA-RAT Training")
+    print("=" * 80)
+    # Check environment variables
+    check_env_vars()
+    # Initialize API client
+    api_client = get_api_client()
+    # List available types if requested
+    if args.list_types:
+        list_available_instance_types(api_client)
+        return
+    # Launch instance
+    instance_id = launch_instance(api_client, args.instance_type, args.region, args.name)
+    # Wait for instance to be ready
+    instance = wait_for_instance(api_client, instance_id)
+    # Deploy and run training
+    if not args.no_deploy:
+        time.sleep(5)  # Give SSH a moment to be fully ready
+        try:
+            deploy_and_run(instance.ip)
+        except subprocess.CalledProcessError as e:
+            print(f"\nWarning: Deployment encountered an error: {e}")
+            print(f"You can manually SSH to the instance and run the training:")
+            print(f"  ssh ubuntu@{instance.ip}")
+            print(f"  cd nanochatAquaRat && bash run_aquarat_small.sh")
+    print("\n" + "=" * 80)
+    print("Instance Information")
+    print("=" * 80)
+    print(f"Instance ID: {instance_id}")
+    print(f"IP Address: {instance.ip}")
+    print(f"Status: {instance.status}")
+    print("\nTo terminate this instance:")
+    print(f"  python launch_lambda.py --terminate {instance_id}")
+if __name__ == "__main__":
+    main()

scripts/launch_hyperbolic_training.py ADDED Viewed

	@@ -0,0 +1,701 @@

+#!/usr/bin/env python3
+"""
+Automation helper to launch a Hyperbolic Labs marketplace instance and kick off the
+nanochat AQuA-RAT training run.
+The workflow mirrors `launch_lambda_training.py`, but uses Hyperbolic's REST API.
+Example:
+    python scripts/launch_hyperbolic_training.py \\
+        --gpu-count 1 \\
+        --region us-east \\
+        --max-price 4.5 \\
+        --auto-start \\
+        --inject-env WANDB_API_KEY
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import shlex
+import subprocess
+import sys
+import tempfile
+import textwrap
+import time
+from pathlib import Path
+from typing import Any, Dict, Iterable, List, Optional, Sequence, Tuple
+import requests
+API_BASE = "https://api.hyperbolic.xyz"
+MARKETPLACE_BASE = f"{API_BASE}/v1/marketplace"
+READY_STATUSES = {
+    "ready",
+    "running",
+    "instance_running",
+    "node_ready",
+    "active",
+    "online",
+}
+def log(msg: str) -> None:
+    print(f"[info] {msg}")
+def warn(msg: str) -> None:
+    print(f"[warn] {msg}", file=sys.stderr)
+def error(msg: str) -> None:
+    print(f"[error] {msg}", file=sys.stderr)
+def shell_quote(value: str) -> str:
+    return shlex.quote(value)
+def collect_env_pairs(cli_pairs: Sequence[str], inject_names: Sequence[str]) -> List[Tuple[str, str]]:
+    """Merge KEY=VALUE pairs with env vars pulled from the local environment."""
+    merged: Dict[str, str] = {}
+    for item in cli_pairs:
+        if "=" not in item:
+            raise ValueError(f"--env expects KEY=VALUE entries, got '{item}'")
+        key, value = item.split("=", 1)
+        key = key.strip()
+        if not key:
+            raise ValueError(f"Environment key is empty in '{item}'")
+        merged[key] = value
+    for name in inject_names:
+        if not name:
+            raise ValueError("Encountered empty --inject-env name")
+        if name not in os.environ:
+            raise ValueError(f"--inject-env requested '{name}' but it is not set locally")
+        merged[name] = os.environ[name]
+    return list(merged.items())
+def build_bootstrap_script(
+    repo_dir: str,
+    run_script: str,
+    branch: str,
+    repo_url: Optional[str],
+    env_file_remote: str,
+    auto_start: bool,
+    tmux_session: str,
+) -> str:
+    """Compose the bash script executed on the instance to prepare the training run."""
+    lines: List[str] = [
+        "#!/usr/bin/env bash",
+        "set -euxo pipefail",
+        f'REPO_DIR="$HOME/{repo_dir}"',
+        f'RUN_SCRIPT="{run_script}"',
+        f'ENV_FILE="{env_file_remote}"',
+        f'AUTO_START="{1 if auto_start else 0}"',
+    ]
+    if repo_url:
+        lines.append(f"REPO_URL={shell_quote(repo_url)}")
+        lines.extend(
+            [
+                'if [ ! -d "$REPO_DIR/.git" ]; then',
+                '  rm -rf "$REPO_DIR"',
+                '  git clone "$REPO_URL" "$REPO_DIR"',
+                "fi",
+                'cd "$REPO_DIR"',
+                "git fetch --all --prune",
+                f"git switch {shell_quote(branch)}",
+                "git pull --ff-only || true",
+            ]
+        )
+    else:
+        lines.extend(
+            [
+                'mkdir -p "$REPO_DIR"',
+                'cd "$REPO_DIR"',
+            ]
+        )
+    lines.extend(
+        [
+            'if [ -f "$ENV_FILE" ]; then',
+            '  cp "$ENV_FILE" .env',
+            "fi",
+            'if [ -f "$RUN_SCRIPT" ]; then',
+            '  chmod +x "$RUN_SCRIPT"',
+            "else",
+            '  echo "Run script $RUN_SCRIPT not found; auto-start will be skipped." >&2',
+            '  AUTO_START="0"',
+            "fi",
+        ]
+    )
+    if auto_start:
+        tmux_line = (
+            f'tmux new -d -s {shell_quote(tmux_session)} '
+            '"cd \\"$REPO_DIR\\" && bash \\"$RUN_SCRIPT\\""'
+        )
+        nohup_line = (
+            'nohup bash -lc "cd \\"$REPO_DIR\\" && bash \\"$RUN_SCRIPT\\"" '
+            '> "$HOME/nanochat-train.log" 2>&1 &'
+        )
+        lines.extend(
+            [
+                'if [ "$AUTO_START" = "1" ]; then',
+                "  if command -v tmux >/dev/null 2>&1; then",
+                f"    {tmux_line}",
+                "  else",
+                f"    {nohup_line}",
+                "  fi",
+                "fi",
+            ]
+        )
+    return "\n".join(lines) + "\n"
+class HyperbolicClient:
+    def __init__(self, api_key: Optional[str]):
+        if not api_key:
+            raise ValueError("Hyperbolic API key is required. Pass --api-key or set HYPERBOLIC_API_KEY.")
+        self.api_key = api_key
+    def _headers(self, with_auth: bool = True) -> Dict[str, str]:
+        headers = {"Content-Type": "application/json"}
+        if with_auth and self.api_key:
+            headers["Authorization"] = f"Bearer {self.api_key}"
+        return headers
+    def list_marketplace(self) -> List[Dict[str, Any]]:
+        response = requests.post(
+            MARKETPLACE_BASE,
+            headers=self._headers(with_auth=False),
+            json={"filters": {}},
+            timeout=30,
+        )
+        response.raise_for_status()
+        payload = response.json()
+        instances = payload.get("instances")
+        if instances is None:
+            if isinstance(payload, list):
+                instances = payload
+            elif isinstance(payload, dict):
+                instances = payload.get("nodes") or payload.get("data") or []
+            else:
+                instances = []
+        return instances
+    def create_instance(
+        self,
+        cluster_name: str,
+        node_name: str,
+        gpu_count: int,
+        image: Optional[Dict[str, Any]] = None,
+    ) -> Dict[str, Any]:
+        payload: Dict[str, Any] = {
+            "cluster_name": cluster_name,
+            "node_name": node_name,
+            "gpu_count": gpu_count,
+        }
+        if image:
+            payload["image"] = image
+        response = requests.post(
+            f"{MARKETPLACE_BASE}/instances/create",
+            headers=self._headers(),
+            json=payload,
+            timeout=30,
+        )
+        response.raise_for_status()
+        return response.json()
+    def list_instances(self) -> List[Dict[str, Any]]:
+        response = requests.get(
+            f"{MARKETPLACE_BASE}/instances",
+            headers=self._headers(),
+            timeout=30,
+        )
+        response.raise_for_status()
+        payload = response.json()
+        if isinstance(payload, dict):
+            return payload.get("instances") or payload.get("data") or []
+        if isinstance(payload, list):
+            return payload
+        return []
+    def terminate_instance(self, instance_id: str) -> Dict[str, Any]:
+        response = requests.post(
+            f"{MARKETPLACE_BASE}/instances/terminate",
+            headers=self._headers(),
+            json={"id": instance_id},
+            timeout=30,
+        )
+        response.raise_for_status()
+        return response.json()
+    def get_balance(self) -> Optional[float]:
+        try:
+            response = requests.get(
+                f"{API_BASE}/billing/get_current_balance",
+                headers=self._headers(),
+                timeout=30,
+            )
+            response.raise_for_status()
+            data = response.json()
+            if isinstance(data, dict):
+                return float(data.get("balance") or data.get("amount") or data.get("credits"))
+        except (requests.HTTPError, ValueError, TypeError):
+            warn("Unable to fetch current account balance.")
+        return None
+def summarize_node(node: Dict[str, Any]) -> str:
+    def _gpu_models() -> str:
+        gpus = []
+        hardware = node.get("hardware") or {}
+        for gpu in hardware.get("gpus") or []:
+            model = gpu.get("model")
+            ram = gpu.get("ram") or gpu.get("memory") or gpu.get("vram")
+            if model and ram:
+                gpus.append(f"{model} ({ram} GB)")
+            elif model:
+                gpus.append(model)
+        return ", ".join(gpus) if gpus else "Unknown GPUs"
+    price_info = (node.get("pricing") or {}).get("price") or {}
+    price = price_info.get("amount")
+    price_str = f"${price:.2f}/hr" if isinstance(price, (int, float)) else "n/a"
+    region = ((node.get("location") or {}).get("region")) or "unknown region"
+    cluster = node.get("cluster_name") or "unknown cluster"
+    available = (node.get("gpus_total") or 0) - (node.get("gpus_reserved") or 0)
+    supplier = node.get("supplier_id") or "unknown supplier"
+    return (
+        f"{node.get('id', '<unknown>')} | {cluster} | {region} | "
+        f"{available}/{node.get('gpus_total', '?')} GPUs free | "
+        f"{_gpu_models()} | {price_str} | supplier: {supplier}"
+    )
+def filter_nodes(
+    nodes: Iterable[Dict[str, Any]],
+    gpu_count: int,
+    region: Optional[str],
+    supplier: Optional[str],
+    max_price: Optional[float],
+) -> List[Dict[str, Any]]:
+    filtered: List[Dict[str, Any]] = []
+    region = region.lower() if region else None
+    supplier = supplier.lower() if supplier else None
+    for node in nodes:
+        total = node.get("gpus_total") or 0
+        reserved = node.get("gpus_reserved") or 0
+        available = total - reserved
+        if available < gpu_count:
+            continue
+        if region:
+            region_value = ((node.get("location") or {}).get("region") or "").lower()
+            if region not in region_value:
+                continue
+        if supplier:
+            supplier_value = (node.get("supplier_id") or "").lower()
+            if supplier not in supplier_value:
+                continue
+        price_info = (node.get("pricing") or {}).get("price") or {}
+        price = price_info.get("amount")
+        if max_price is not None and isinstance(price, (int, float)) and price > max_price:
+            continue
+        filtered.append(node)
+    filtered.sort(
+        key=lambda n: ((n.get("pricing") or {}).get("price") or {}).get("amount", float("inf"))
+    )
+    return filtered
+def extract_instance_id(payload: Dict[str, Any], before_ids: Sequence[str], client: HyperbolicClient) -> str:
+    candidates: List[str] = []
+    for key in ("id", "instance_id", "instanceId"):
+        value = payload.get(key)
+        if isinstance(value, str) and value:
+            candidates.append(value)
+    instance_obj = payload.get("instance") or payload.get("data")
+    if isinstance(instance_obj, dict):
+        for key in ("id", "instance_id", "instanceId"):
+            value = instance_obj.get(key)
+            if isinstance(value, str) and value:
+                candidates.append(value)
+    if candidates:
+        return candidates[0]
+    # Fall back to diffing current instances.
+    time.sleep(3)
+    current = client.list_instances()
+    current_ids = {str(inst.get("id")) for inst in current if inst.get("id")}
+    diff = current_ids.difference(before_ids)
+    if diff:
+        return diff.pop()
+    raise RuntimeError("Unable to determine instance ID from API response.")
+def extract_ip(instance: Dict[str, Any]) -> Optional[str]:
+    network = instance.get("network") or {}
+    candidates = [
+        instance.get("public_ip"),
+        instance.get("ip_address"),
+        instance.get("ip"),
+        instance.get("ipv4"),
+        network.get("public_ip"),
+        network.get("ip"),
+        network.get("ipv4"),
+    ]
+    for item in instance.get("ip_addresses") or []:
+        candidates.extend(
+            [
+                item.get("public_ip"),
+                item.get("ip"),
+                item.get("ipv4"),
+                item.get("address"),
+            ]
+        )
+    for candidate in candidates:
+        if isinstance(candidate, str) and candidate:
+            return candidate
+    return None
+def extract_ssh_port(instance: Dict[str, Any]) -> int:
+    network = instance.get("network") or {}
+    candidates = [
+        instance.get("ssh_port"),
+        network.get("ssh_port"),
+        (instance.get("ssh") or {}).get("port"),
+    ]
+    for candidate in candidates:
+        if isinstance(candidate, int):
+            return candidate
+        if isinstance(candidate, str) and candidate.isdigit():
+            return int(candidate)
+    return 22
+def extract_status(instance: Dict[str, Any]) -> str:
+    for key in ("status", "instance_status", "state"):
+        value = instance.get(key)
+        if isinstance(value, str):
+            return value
+    return ""
+def wait_for_instance(
+    client: HyperbolicClient,
+    instance_id: str,
+    poll_seconds: int,
+    max_wait_minutes: int,
+) -> Dict[str, Any]:
+    log(f"Waiting for instance {instance_id} to become ready...")
+    deadline = time.time() + max_wait_minutes * 60
+    while time.time() < deadline:
+        instances = client.list_instances()
+        for instance in instances:
+            identifiers = {
+                str(instance.get("id")),
+                str(instance.get("instance_id")),
+                str(instance.get("instanceId")),
+            }
+            if instance_id not in identifiers:
+                continue
+            status = extract_status(instance).lower()
+            ip = extract_ip(instance)
+            if status in READY_STATUSES and ip:
+                log(f"Instance is ready: status={status}, ip={ip}")
+                return instance
+            log(f"  status={status or '<unknown>'}; waiting for ready state...")
+        time.sleep(poll_seconds)
+    raise TimeoutError(f"Timed out waiting for instance {instance_id} to become ready.")
+def build_env_content(pairs: Sequence[Tuple[str, str]]) -> str:
+    return "\n".join(f"{key}={value}" for key, value in pairs) + ("\n" if pairs else "")
+def scp(
+    local_path: Path,
+    remote_path: str,
+    ssh_user: str,
+    host: str,
+    port: int,
+    ssh_key: Optional[str],
+) -> None:
+    cmd = ["scp", "-o", "StrictHostKeyChecking=no"]
+    if ssh_key:
+        cmd.extend(["-i", ssh_key])
+    if port != 22:
+        cmd.extend(["-P", str(port)])
+    cmd.extend([str(local_path), f"{ssh_user}@{host}:{remote_path}"])
+    subprocess.run(cmd, check=True)
+def ssh_command(
+    ssh_user: str,
+    host: str,
+    port: int,
+    ssh_key: Optional[str],
+    *command: str,
+) -> None:
+    cmd = ["ssh", "-o", "StrictHostKeyChecking=no"]
+    if ssh_key:
+        cmd.extend(["-i", ssh_key])
+    if port != 22:
+        cmd.extend(["-p", str(port)])
+    cmd.append(f"{ssh_user}@{host}")
+    if command:
+        cmd.append(" ".join(command))
+    subprocess.run(cmd, check=True)
+def deploy_to_instance(
+    instance: Dict[str, Any],
+    bootstrap_script: str,
+    env_pairs: Sequence[Tuple[str, str]],
+    env_file_remote: str,
+    ssh_user: str,
+    ssh_key: Optional[str],
+) -> None:
+    ip = extract_ip(instance)
+    if not ip:
+        raise RuntimeError("Instance does not report a public IP address yet.")
+    port = extract_ssh_port(instance)
+    log(f"Deploying bootstrap assets to {ssh_user}@{ip}:{port} ...")
+    with tempfile.TemporaryDirectory() as tmpdir:
+        tmp_path = Path(tmpdir)
+        bootstrap_path = tmp_path / "bootstrap.sh"
+        bootstrap_path.write_text(bootstrap_script)
+        scp(bootstrap_path, "/tmp/nanochat_bootstrap.sh", ssh_user, ip, port, ssh_key)
+        if env_pairs:
+            env_path = tmp_path / "nanochat.env"
+            env_path.write_text(build_env_content(env_pairs))
+            ssh_command(ssh_user, ip, port, ssh_key, f"mkdir -p {shell_quote(str(Path(env_file_remote).parent))}")
+            scp(env_path, env_file_remote, ssh_user, ip, port, ssh_key)
+        log("Executing remote bootstrap script...")
+        ssh_command(ssh_user, ip, port, ssh_key, "bash /tmp/nanochat_bootstrap.sh")
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Launch Hyperbolic Labs instance for AQuA-RAT training")
+    parser.add_argument("--api-key", default=os.environ.get("HYPERBOLIC_API_KEY"), help="Hyperbolic API key")
+    parser.add_argument("--gpu-count", type=int, default=1, help="Number of GPUs to request (default: 1)")
+    parser.add_argument("--region", help="Preferred region substring (case-insensitive)")
+    parser.add_argument("--supplier", help="Preferred supplier substring (case-insensitive)")
+    parser.add_argument("--max-price", type=float, help="Maximum hourly price in USD")
+    parser.add_argument("--node-name", help="Specify node name explicitly")
+    parser.add_argument("--cluster-name", help="Cluster name when using --node-name")
+    parser.add_argument("--list", action="store_true", help="List available marketplace nodes and exit")
+    parser.add_argument("--repo-url", help="Repository URL to clone (defaults to git remote origin)")
+    parser.add_argument("--branch", default="main", help="Branch to checkout on the instance (default: main)")
+    parser.add_argument("--run-script", default="run_aquarat_small.sh", help="Script to execute on the instance")
+    parser.add_argument("--repo-dir", default="nanochatAquaRat", help="Directory name for the repo on the instance")
+    parser.add_argument("--auto-start", action="store_true", help="Automatically run the training script")
+    parser.add_argument("--tmux-session", default="training", help="tmux session name when auto-start is enabled")
+    parser.add_argument("--ssh-user", default="ubuntu", help="SSH username for the instance (default: ubuntu)")
+    parser.add_argument("--ssh-key", help="Path to SSH private key for scp/ssh")
+    parser.add_argument("--no-deploy", action="store_true", help="Skip deployment after the instance is ready")
+    parser.add_argument("--env", action="append", default=[], help="Environment variable in KEY=VALUE form")
+    parser.add_argument("--inject-env", action="append", default=[], help="Environment variable name to copy from local env")
+    parser.add_argument("--poll-seconds", type=int, default=20, help="Polling interval while waiting (default: 20)")
+    parser.add_argument("--max-wait-minutes", type=int, default=25, help="Maximum minutes to wait for ready state")
+    return parser.parse_args()
+def guess_repo_url() -> Optional[str]:
+    try:
+        completed = subprocess.run(
+            ["git", "config", "--get", "remote.origin.url"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return None
+    url = completed.stdout.strip()
+    return url or None
+def main() -> int:
+    args = parse_args()
+    try:
+        env_pairs = collect_env_pairs(args.env, args.inject_env)
+    except ValueError as exc:
+        error(str(exc))
+        return 1
+    if not args.api_key:
+        error("Hyperbolic API key not provided. Use --api-key or set HYPERBOLIC_API_KEY.")
+        return 1
+    client = HyperbolicClient(args.api_key)
+    try:
+        nodes = client.list_marketplace()
+    except requests.HTTPError as exc:
+        error(f"Failed to list marketplace nodes: {exc}")
+        return 1
+    if args.list:
+        log("Available marketplace nodes:")
+        for node in nodes:
+            print(summarize_node(node))
+        return 0
+    selected_node: Optional[Dict[str, Any]] = None
+    if args.node_name:
+        for node in nodes:
+            if node.get("id") == args.node_name:
+                selected_node = node
+                break
+        if not selected_node:
+            error(f"Node '{args.node_name}' not found in marketplace list.")
+            return 1
+        if not args.cluster_name:
+            args.cluster_name = selected_node.get("cluster_name")
+    else:
+        filtered_nodes = filter_nodes(nodes, args.gpu_count, args.region, args.supplier, args.max_price)
+        if not filtered_nodes:
+            error("No marketplace nodes match the specified constraints.")
+            return 1
+        selected_node = filtered_nodes[0]
+        log("Selected node:")
+        print("  " + summarize_node(selected_node))
+        args.cluster_name = selected_node.get("cluster_name")
+        args.node_name = selected_node.get("id")
+    if not args.cluster_name:
+        error("Cluster name is required; unable to determine cluster for the selected node.")
+        return 1
+    repo_url = args.repo_url or guess_repo_url()
+    if repo_url:
+        log(f"Using repository: {repo_url}")
+    else:
+        warn("Could not determine repository URL. Auto-start will clone existing repo on instance if present.")
+    balance = client.get_balance()
+    if balance is not None:
+        log(f"Current Hyperbolic balance: ${balance:.2f}")
+    before_instances = client.list_instances()
+    before_ids = {str(inst.get("id")) for inst in before_instances if inst.get("id")}
+    log(f"Launching instance on cluster '{args.cluster_name}' node '{args.node_name}' "
+        f"with {args.gpu_count} GPU(s)...")
+    try:
+        create_response = client.create_instance(
+            cluster_name=args.cluster_name,
+            node_name=args.node_name,
+            gpu_count=args.gpu_count,
+        )
+    except requests.HTTPError as exc:
+        error(f"Failed to launch instance: {exc}")
+        try:
+            warn(f"Response payload: {exc.response.text}")  # type: ignore[attr-defined]
+        except Exception:
+            pass
+        return 1
+    instance_id = extract_instance_id(create_response, before_ids, client)
+    log(f"Instance request acknowledged with id={instance_id}")
+    try:
+        instance = wait_for_instance(
+            client=client,
+            instance_id=instance_id,
+            poll_seconds=args.poll_seconds,
+            max_wait_minutes=args.max_wait_minutes,
+        )
+    except TimeoutError as exc:
+        error(str(exc))
+        return 1
+    ip = extract_ip(instance)
+    port = extract_ssh_port(instance)
+    ssh_user = args.ssh_user
+    log("Instance ready. Connection details:")
+    if ip:
+        ssh_parts = ["ssh", "-o", "StrictHostKeyChecking=no"]
+        if args.ssh_key:
+            ssh_parts.extend(["-i", args.ssh_key])
+        if port != 22:
+            ssh_parts.extend(["-p", str(port)])
+        ssh_parts.append(f"{ssh_user}@{ip}")
+        print("  SSH:", " ".join(ssh_parts))
+    else:
+        warn("Instance IP not available; SSH command cannot be constructed.")
+    if args.no_deploy:
+        log("Skipping deployment (--no-deploy supplied).")
+        return 0
+    env_file_remote = f"/home/{ssh_user}/nanochat_aquarat.env"
+    bootstrap_script = build_bootstrap_script(
+        repo_dir=args.repo_dir,
+        run_script=args.run_script,
+        branch=args.branch,
+        repo_url=repo_url,
+        env_file_remote=env_file_remote,
+        auto_start=args.auto_start,
+        tmux_session=args.tmux_session,
+    )
+    try:
+        deploy_to_instance(
+            instance=instance,
+            bootstrap_script=bootstrap_script,
+            env_pairs=env_pairs,
+            env_file_remote=env_file_remote,
+            ssh_user=ssh_user,
+            ssh_key=args.ssh_key,
+        )
+    except subprocess.CalledProcessError as exc:
+        error(f"Deployment failed: {exc}")
+        warn("You can manually SSH to the instance and run the training script.")
+        return 1
+    log("Deployment complete.")
+    if args.auto_start:
+        log("Training should now be running on the instance.")
+    else:
+        log("Auto-start disabled; after SSHing in, run the configured script manually.")
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())

scripts/launch_lambda_training.py ADDED Viewed

	@@ -0,0 +1,535 @@

+#!/usr/bin/env python3
+"""
+Automation helper to launch a Lambda Labs instance and kick off the nanochat AQuA-RAT run.
+Usage example:
+    python scripts/launch_lambda_training.py \
+        --ssh-key-name my-key \
+        --region us-west-1 \
+        --instance-type gpu_1x_a10 \
+        --repo-url https://github.com/your-org/nanochatAquaRat.git \
+        --auto-start \
+        --inject-env WANDB_API_KEY
+By default the script will create cloud-init user-data that installs basic tooling,
+clones the repository, copies an `.env` file when provided, and (optionally) runs
+`run_aquarat_small.sh` inside a detached tmux session. The Lambda Cloud API key is
+read from the `LAMBDA_API_KEY` environment variable or `--api-key`.
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import shlex
+import subprocess
+import sys
+import textwrap
+import time
+from collections import OrderedDict
+from typing import Dict, Iterable, List, Optional, Sequence, Tuple
+import requests
+API_BASE = "https://cloud.lambda.ai/api/v1"
+DEFAULT_PACKAGES = ["git", "curl", "tmux", "build-essential"]
+def log(msg: str) -> None:
+    print(f"[info] {msg}")
+def warn(msg: str) -> None:
+    print(f"[warn] {msg}", file=sys.stderr)
+def error(msg: str) -> None:
+    print(f"[error] {msg}", file=sys.stderr)
+def shell_quote(value: str) -> str:
+    """Return a shell-escaped string for safe embedding inside scripts."""
+    return shlex.quote(value)
+def guess_repo_url() -> Optional[str]:
+    """Attempt to infer the git remote URL for the current repository."""
+    try:
+        completed = subprocess.run(
+            ["git", "config", "--get", "remote.origin.url"],
+            capture_output=True,
+            text=True,
+            check=True,
+        )
+    except (subprocess.CalledProcessError, FileNotFoundError):
+        return None
+    url = completed.stdout.strip()
+    return url or None
+def collect_env_pairs(cli_pairs: Sequence[str], inject_names: Sequence[str]) -> List[Tuple[str, str]]:
+    """
+    Merge KEY=VALUE pairs declared via the CLI with variables injected from the local env.
+    Later occurrences with the same key take precedence.
+    """
+    merged: "OrderedDict[str, str]" = OrderedDict()
+    for item in cli_pairs:
+        if "=" not in item:
+            raise ValueError(f"--env expects KEY=VALUE entries, got '{item}'")
+        key, value = item.split("=", 1)
+        key = key.strip()
+        if not key:
+            raise ValueError(f"Environment key is empty in '{item}'")
+        merged[key] = value
+    for name in inject_names:
+        if not name:
+            raise ValueError("Encountered empty --inject-env name")
+        if name not in os.environ:
+            raise ValueError(f"--inject-env requested '{name}' but it is not set locally")
+        merged[name] = os.environ[name]
+    return list(merged.items())
+def build_bootstrap_script(
+    repo_dir: str,
+    run_script: str,
+    branch: str,
+    repo_url: Optional[str],
+    env_file_remote: str,
+    auto_start: bool,
+    tmux_session: str,
+) -> str:
+    """Compose the bash script executed on the instance to prepare the training run."""
+    lines: List[str] = [
+        "#!/usr/bin/env bash",
+        "set -euxo pipefail",
+        f'REPO_DIR="$HOME/{repo_dir}"',
+        f'RUN_SCRIPT="{run_script}"',
+        f'ENV_FILE="{env_file_remote}"',
+        f'AUTO_START="{1 if auto_start else 0}"',
+    ]
+    if repo_url:
+        lines.append(f"REPO_URL={shell_quote(repo_url)}")
+        lines.extend(
+            [
+                'if [ ! -d "$REPO_DIR/.git" ]; then',
+                '  rm -rf "$REPO_DIR"',
+                '  git clone "$REPO_URL" "$REPO_DIR"',
+                "fi",
+                'cd "$REPO_DIR"',
+                "git fetch --all --prune",
+                f"git switch {shell_quote(branch)}",
+                "git pull --ff-only || true",
+            ]
+        )
+    else:
+        lines.extend(
+            [
+                'mkdir -p "$REPO_DIR"',
+                'cd "$REPO_DIR"',
+            ]
+        )
+    lines.extend(
+        [
+            'if [ -f "$ENV_FILE" ]; then',
+            '  cp "$ENV_FILE" .env',
+            "fi",
+            'if [ -f "$RUN_SCRIPT" ]; then',
+            '  chmod +x "$RUN_SCRIPT"',
+            "else",
+            '  echo "Run script $RUN_SCRIPT not found; auto-start will be skipped." >&2',
+            '  AUTO_START="0"',
+            "fi",
+        ]
+    )
+    if auto_start:
+        tmux_line = (
+            f'tmux new -d -s {shell_quote(tmux_session)} '
+            '"cd \\"$REPO_DIR\\" && bash \\"$RUN_SCRIPT\\""'
+        )
+        nohup_line = (
+            'nohup bash -lc "cd \\"$REPO_DIR\\" && bash \\"$RUN_SCRIPT\\"" '
+            '> "$HOME/nanochat-train.log" 2>&1 &'
+        )
+        lines.extend(
+            [
+                'if [ "$AUTO_START" = "1" ]; then',
+                "  if command -v tmux >/dev/null 2>&1; then",
+                f"    {tmux_line}",
+                "  else",
+                f"    {nohup_line}",
+                "  fi",
+                "fi",
+            ]
+        )
+    return "\n".join(lines) + "\n"
+def build_user_data(
+    packages: Sequence[str],
+    bootstrap_script: str,
+    env_pairs: Sequence[Tuple[str, str]],
+    env_file_remote: str,
+) -> str:
+    """Render cloud-init user-data with package installs, env file, and bootstrap script."""
+    lines: List[str] = ["#cloud-config", "package_update: true", "package_upgrade: false"]
+    if packages:
+        lines.append("packages:")
+        for package in packages:
+            lines.append(f"  - {package}")
+    lines.append("write_files:")
+    if env_pairs:
+        env_content = "\n".join(f"{key}={value}" for key, value in env_pairs) + "\n"
+        lines.extend(
+            [
+                f"  - path: {env_file_remote}",
+                "    owner: ubuntu:ubuntu",
+                "    permissions: '0640'",
+                "    content: |",
+                textwrap.indent(env_content, "      "),
+            ]
+        )
+    lines.extend(
+        [
+            "  - path: /home/ubuntu/bootstrap_nanochat.sh",
+            "    owner: ubuntu:ubuntu",
+            "    permissions: '0755'",
+            "    content: |",
+            textwrap.indent(bootstrap_script, "      "),
+        ]
+    )
+    lines.extend(
+        [
+            "runcmd:",
+            "  - \"su - ubuntu -c '/home/ubuntu/bootstrap_nanochat.sh'\"",
+        ]
+    )
+    return "\n".join(lines) + "\n"
+class LambdaClient:
+    """Minimal wrapper around the Lambda Cloud REST API."""
+    def __init__(self, api_key: str) -> None:
+        if not api_key:
+            raise ValueError("Lambda Cloud API key not provided")
+        self.session = requests.Session()
+        self.session.headers.update(
+            {
+                "Authorization": f"Bearer {api_key}",
+                "Accept": "application/json",
+                "Content-Type": "application/json",
+            }
+        )
+    def launch_instances(self, payload: Dict[str, object]) -> List[str]:
+        response = self.session.post(
+            f"{API_BASE}/instance-operations/launch",
+            data=json.dumps(payload),
+            timeout=60,
+        )
+        if response.status_code >= 400:
+            raise requests.HTTPError(response.text, response=response)
+        data = response.json()
+        instance_ids = data["data"]["instance_ids"]
+        return instance_ids
+    def get_instance(self, instance_id: str) -> Optional[Dict[str, object]]:
+        response = self.session.get(
+            f"{API_BASE}/instances/{instance_id}",
+            timeout=30,
+        )
+        if response.status_code == 404:
+            return None
+        if response.status_code >= 400:
+            raise requests.HTTPError(response.text, response=response)
+        return response.json()["data"]
+    def wait_for_instance(
+        self,
+        instance_id: str,
+        poll_seconds: int,
+        max_wait_minutes: int,
+    ) -> Dict[str, object]:
+        deadline = time.time() + max_wait_minutes * 60
+        last_status = "unknown"
+        while time.time() < deadline:
+            instance = self.get_instance(instance_id)
+            if not instance:
+                time.sleep(poll_seconds)
+                continue
+            status = str(instance.get("status", "unknown"))
+            if status != last_status:
+                log(f"Instance {instance_id} status: {status}")
+                last_status = status
+            if status == "active":
+                return instance
+            if status in {"terminated", "terminating", "preempted"}:
+                raise RuntimeError(f"Instance {instance_id} entered terminal status '{status}'")
+            if status == "unhealthy":
+                warn(f"Instance {instance_id} reported unhealthy; continuing to poll")
+            time.sleep(poll_seconds)
+        raise TimeoutError(f"Timed out waiting for instance {instance_id} to become active")
+def parse_args(argv: Optional[Sequence[str]] = None) -> argparse.Namespace:
+    parser = argparse.ArgumentParser(
+        description="Launch a Lambda Labs instance and prepare the nanochat training run."
+    )
+    parser.add_argument(
+        "--api-key",
+        default=os.getenv("LAMBDA_API_KEY"),
+        help="Lambda Cloud API key (default: read from LAMBDA_API_KEY).",
+    )
+    parser.add_argument("--region", default="us-west-1", help="Lambda Cloud region name.")
+    parser.add_argument(
+        "--instance-type",
+        default="gpu_1x_a10",
+        help="Instance type to launch (see Lambda Cloud docs for the catalog).",
+    )
+    parser.add_argument(
+        "--ssh-key-name",
+        required=True,
+        help="Name of the SSH key already registered with Lambda Cloud.",
+    )
+    parser.add_argument(
+        "--quantity",
+        type=int,
+        default=1,
+        help="Number of instances to launch (auto-start only supports 1).",
+    )
+    parser.add_argument("--name", help="Friendly name to assign to the instance(s).")
+    parser.add_argument(
+        "--repo-url",
+        help="Git URL for the nanochat repository (default: auto-detect from current repo).",
+    )
+    parser.add_argument("--branch", default="main", help="Git branch to checkout on the instance.")
+    parser.add_argument(
+        "--repo-dir",
+        default="nanochatAquaRat",
+        help="Directory name to clone the repository into on the instance.",
+    )
+    parser.add_argument(
+        "--run-script",
+        default="run_aquarat_small.sh",
+        help="Relative path to the training launch script inside the repo.",
+    )
+    parser.add_argument(
+        "--tmux-session",
+        default="nanochat-train",
+        help="tmux session name used when --auto-start is active.",
+    )
+    parser.add_argument(
+        "--auto-start",
+        action="store_true",
+        help="Kick off the training script automatically after provisioning completes.",
+    )
+    parser.add_argument(
+        "--env",
+        action="append",
+        default=[],
+        help="Additional KEY=VALUE pairs to write into the remote .env file (repeatable).",
+    )
+    parser.add_argument(
+        "--inject-env",
+        action="append",
+        default=[],
+        help="Names of local environment variables whose values should populate the remote .env.",
+    )
+    parser.add_argument(
+        "--env-file-name",
+        default=".env.lambda",
+        help="Filename (relative to /home/ubuntu) for the generated environment file.",
+    )
+    parser.add_argument(
+        "--image-id",
+        help="Optional image ID to use instead of the default Lambda Stack image.",
+    )
+    parser.add_argument(
+        "--image-family",
+        help="Optional image family name to use instead of the default image.",
+    )
+    parser.add_argument(
+        "--max-wait-minutes",
+        type=int,
+        default=25,
+        help="Maximum minutes to wait for the instance to become active.",
+    )
+    parser.add_argument(
+        "--poll-seconds",
+        type=int,
+        default=20,
+        help="Polling interval while waiting for the instance to become active.",
+    )
+    parser.add_argument(
+        "--skip-wait",
+        action="store_true",
+        help="Exit after requesting the launch without waiting for active status.",
+    )
+    parser.add_argument(
+        "--no-user-data",
+        action="store_true",
+        help="Skip sending user-data; instance boots with the stock image configuration.",
+    )
+    parser.add_argument(
+        "--print-user-data",
+        action="store_true",
+        help="Print the generated user-data to stdout before launching.",
+    )
+    return parser.parse_args(argv)
+def main(argv: Optional[Sequence[str]] = None) -> int:
+    args = parse_args(argv)
+    if args.image_id and args.image_family:
+        error("Provide only one of --image-id or --image-family.")
+        return 1
+    if args.auto_start and args.quantity != 1:
+        error("--auto-start currently supports a single instance (set --quantity=1).")
+        return 1
+    if not args.api_key:
+        error("Lambda Cloud API key not provided. Use --api-key or set LAMBDA_API_KEY.")
+        return 1
+    repo_url = args.repo_url
+    if not repo_url:
+        repo_url = guess_repo_url()
+        if repo_url:
+            log(f"Discovered repository URL: {repo_url}")
+        else:
+            warn(
+                "Could not auto-detect repository URL; "
+                "pass --repo-url if you want the instance to clone the repo automatically."
+            )
+    try:
+        env_pairs = collect_env_pairs(args.env, args.inject_env)
+    except ValueError as exc:
+        error(str(exc))
+        return 1
+    env_file_remote = f"/home/ubuntu/{args.env_file_name}"
+    bootstrap_script = build_bootstrap_script(
+        repo_dir=args.repo_dir,
+        run_script=args.run_script,
+        branch=args.branch,
+        repo_url=repo_url,
+        env_file_remote=env_file_remote,
+        auto_start=args.auto_start,
+        tmux_session=args.tmux_session,
+    )
+    user_data: Optional[str] = None
+    if not args.no_user_data:
+        user_data = build_user_data(
+            packages=DEFAULT_PACKAGES,
+            bootstrap_script=bootstrap_script,
+            env_pairs=env_pairs,
+            env_file_remote=env_file_remote,
+        )
+        if args.print_user_data:
+            print(user_data)
+    else:
+        if args.print_user_data:
+            print("# user-data disabled (--no-user-data)")
+    payload: Dict[str, object] = {
+        "region_name": args.region,
+        "instance_type_name": args.instance_type,
+        "ssh_key_names": [args.ssh_key_name],
+    }
+    if args.quantity:
+        payload["quantity"] = args.quantity
+    if args.name:
+        payload["name"] = args.name
+    if user_data:
+        payload["user_data"] = user_data
+    if args.image_id:
+        payload["image"] = {"id": args.image_id}
+    elif args.image_family:
+        payload["image"] = {"family": args.image_family}
+    client = LambdaClient(args.api_key)
+    log(
+        "Requesting instance launch "
+        f"(region={args.region}, type={args.instance_type}, quantity={args.quantity})"
+    )
+    try:
+        instance_ids = client.launch_instances(payload)
+    except requests.HTTPError as exc:
+        error(f"Instance launch failed: {exc}")
+        if exc.response is not None:
+            warn(f"Response content: {exc.response.text}")
+        return 1
+    log(f"Requested instance IDs: {', '.join(instance_ids)}")
+    if args.skip_wait:
+        log("Skipping wait (--skip-wait supplied).")
+        return 0
+    instances: List[Dict[str, object]] = []
+    for instance_id in instance_ids:
+        try:
+            instance = client.wait_for_instance(
+                instance_id=instance_id,
+                poll_seconds=args.poll_seconds,
+                max_wait_minutes=args.max_wait_minutes,
+            )
+        except (RuntimeError, TimeoutError, requests.HTTPError) as exc:
+            error(f"Failed while waiting for instance {instance_id}: {exc}")
+            return 1
+        instances.append(instance)
+    for instance in instances:
+        ip = instance.get("ip") or "<pending>"
+        name = instance.get("name") or instance.get("id")
+        log(f"Instance {name} is active with public IP {ip}")
+        if ip and ip != "<pending>":
+            log(
+                f"SSH command: ssh -i /path/to/key.pem ubuntu@{ip}"
+            )
+    if args.auto_start:
+        log(
+            "Auto-start enabled. Training is running inside tmux; attach with "
+            f"`ssh ...` then `tmux attach -t {args.tmux_session}`."
+        )
+    else:
+        log(
+            "Auto-start disabled. After SSH'ing in, run "
+            f"`cd ~/{args.repo_dir} && bash {args.run_script}`."
+        )
+    return 0
+if __name__ == "__main__":
+    sys.exit(main())