From 06282c731467e8da594d76059220766c39f596d6 Mon Sep 17 00:00:00 2001 From: hc <1328308360@qq.com> Date: Tue, 31 Mar 2026 17:41:36 +0800 Subject: [PATCH] feat(skills): add pytorch-patterns skill --- .opencode/skills/pytorch-patterns/SKILL.md | 528 +++++++++++++++++++++ 1 file changed, 528 insertions(+) create mode 100644 .opencode/skills/pytorch-patterns/SKILL.md diff --git a/.opencode/skills/pytorch-patterns/SKILL.md b/.opencode/skills/pytorch-patterns/SKILL.md new file mode 100644 index 0000000..a25a0db --- /dev/null +++ b/.opencode/skills/pytorch-patterns/SKILL.md @@ -0,0 +1,528 @@ +--- +name: pytorch-patterns +description: Use when writing PyTorch code to follow best practices and common patterns +--- + +# PyTorch Patterns Skill + +## Overview + +This skill provides best practices and common patterns for writing PyTorch code. Use this when implementing neural networks, training loops, data loading, and related deep learning infrastructure. + +**Announce:** "I'm using the pytorch-patterns skill for best practice code." + +--- + +## Model Definition + +### Basic nn.Module Pattern + +```python +from typing import NamedTuple +import torch +import torch.nn as nn +import torch.nn.functional as F + + +class ModelConfig: + """Configuration for the model.""" + def __init__( + self, + input_dim: int = 768, + hidden_dim: int = 512, + output_dim: int = 10, + dropout: float = 0.1, + num_layers: int = 2, + ): + self.input_dim = input_dim + self.hidden_dim = hidden_dim + self.output_dim = output_dim + self.dropout = dropout + self.num_layers = num_layers + + +class ModelOutput(NamedTuple): + """Typed output container for model forward pass.""" + logits: torch.Tensor + hidden_states: torch.Tensor + attention_weights: torch.Tensor | None = None + + +class MyModel(nn.Module): + """Example model following best practices.""" + + def __init__(self, config: ModelConfig): + super().__init__() + self.config = config + + # Define layers + self.input_proj = nn.Linear(config.input_dim, config.hidden_dim) + self.layers = nn.ModuleList([ + nn.Linear(config.hidden_dim, config.hidden_dim) + for _ in range(config.num_layers) + ]) + self.output_proj = nn.Linear(config.hidden_dim, config.output_dim) + self.dropout = nn.Dropout(config.dropout) + self.layer_norm = nn.LayerNorm(config.hidden_dim) + + # Initialize weights + self._init_weights() + + def _init_weights(self): + """Initialize model weights with appropriate schemes.""" + for module in self.modules(): + if isinstance(module, nn.Linear): + nn.init.xavier_uniform_(module.weight) + if module.bias is not None: + nn.init.zeros_(module.bias) + elif isinstance(module, nn.LayerNorm): + nn.init.ones_(module.weight) + nn.init.zeros_(module.bias) + + @property + def device(self) -> torch.device: + """Get the device of model parameters.""" + return next(self.parameters()).device + + def forward(self, x: torch.Tensor) -> ModelOutput: + """Forward pass with typed output.""" + # Input projection + hidden = self.input_proj(x) + hidden = self.layer_norm(hidden) + hidden = F.gelu(hidden) + + # Process through layers + for layer in self.layers: + residual = hidden + hidden = layer(hidden) + hidden = self.dropout(hidden) + hidden = F.gelu(hidden) + hidden = hidden + residual # Residual connection + + # Output projection + logits = self.output_proj(hidden) + + return ModelOutput( + logits=logits, + hidden_states=hidden, + attention_weights=None, + ) +``` + +--- + +## Device Management + +### Device Property Pattern + +```python +class DeviceAwareModule(nn.Module): + """Module with device awareness.""" + + @property + def device(self) -> torch.device: + """Infer device from model parameters.""" + return next(self.parameters()).device + + @property + def dtype(self) -> torch.dtype: + """Infer dtype from model parameters.""" + return next(self.parameters()).dtype +``` + +### Training Script Device Setup + +```python +def get_device() -> torch.device: + """Get the best available device.""" + if torch.cuda.is_available(): + device = torch.device("cuda") + print(f"Using CUDA: {torch.cuda.get_device_name(0)}") + elif torch.backends.mps.is_available(): + device = torch.device("mps") + print("Using Apple MPS") + else: + device = torch.device("cpu") + print("Using CPU") + return device + + +# Usage +device = get_device() +model = MyModel(config).to(device) +``` + +--- + +## Training Loop + +### Standard Training Epoch + +```python +def train_epoch( + model: nn.Module, + dataloader: torch.utils.data.DataLoader, + optimizer: torch.optim.Optimizer, + criterion: nn.Module, + device: torch.device, + max_grad_norm: float | None = 1.0, +) -> dict[str, float]: + """Train for one epoch.""" + model.train() + total_loss = 0.0 + num_batches = 0 + + for batch_idx, (inputs, targets) in enumerate(dataloader): + # Move to device + inputs = inputs.to(device) + targets = targets.to(device) + + # Forward pass + optimizer.zero_grad() + outputs = model(inputs) + loss = criterion(outputs.logits, targets) + + # Backward pass + loss.backward() + + # Gradient clipping (optional but recommended) + if max_grad_norm is not None: + torch.nn.utils.clip_grad_norm_(model.parameters(), max_grad_norm) + + # Update weights + optimizer.step() + + # Accumulate metrics (use .item() to prevent memory leak) + total_loss += loss.item() + num_batches += 1 + + return { + "train_loss": total_loss / num_batches, + } +``` + +### Evaluation Function + +```python +@torch.no_grad() +def evaluate( + model: nn.Module, + dataloader: torch.utils.data.DataLoader, + criterion: nn.Module, + device: torch.device, +) -> dict[str, float]: + """Evaluate the model.""" + model.eval() + total_loss = 0.0 + correct = 0 + total = 0 + + for inputs, targets in dataloader: + inputs = inputs.to(device) + targets = targets.to(device) + + outputs = model(inputs) + loss = criterion(outputs.logits, targets) + + total_loss += loss.item() + + # Calculate accuracy + predictions = outputs.logits.argmax(dim=-1) + correct += (predictions == targets).sum().item() + total += targets.size(0) + + return { + "eval_loss": total_loss / len(dataloader), + "accuracy": correct / total, + } +``` + +--- + +## Data Loading + +### Custom Dataset Pattern + +```python +from torch.utils.data import Dataset, DataLoader + + +class CustomDataset(Dataset): + """Example custom dataset.""" + + def __init__(self, data: list, transform=None): + self.data = data + self.transform = transform + + def __len__(self) -> int: + return len(self.data) + + def __getitem__(self, idx: int) -> tuple[torch.Tensor, torch.Tensor]: + item = self.data[idx] + features = torch.tensor(item["features"], dtype=torch.float32) + label = torch.tensor(item["label"], dtype=torch.long) + + if self.transform: + features = self.transform(features) + + return features, label +``` + +### DataLoader Helper + +```python +def get_dataloader( + dataset: Dataset, + batch_size: int = 32, + shuffle: bool = True, + num_workers: int = 4, + pin_memory: bool = True, + drop_last: bool = False, +) -> DataLoader: + """Create a DataLoader with sensible defaults.""" + # Adjust for Windows/macOS compatibility + if num_workers > 0: + import platform + if platform.system() == "Windows": + num_workers = 0 # Windows has issues with multiprocessing + + return DataLoader( + dataset, + batch_size=batch_size, + shuffle=shuffle, + num_workers=num_workers, + pin_memory=pin_memory and torch.cuda.is_available(), + drop_last=drop_last, + persistent_workers=num_workers > 0, + ) +``` + +--- + +## Checkpointing + +### Save Checkpoint + +```python +def save_checkpoint( + model: nn.Module, + optimizer: torch.optim.Optimizer, + epoch: int, + loss: float, + path: str, + scheduler: torch.optim.lr_scheduler._LRScheduler | None = None, + **kwargs, +) -> None: + """Save a training checkpoint.""" + checkpoint = { + "epoch": epoch, + "model_state_dict": model.state_dict(), + "optimizer_state_dict": optimizer.state_dict(), + "loss": loss, + } + + if scheduler is not None: + checkpoint["scheduler_state_dict"] = scheduler.state_dict() + + # Add any additional data + checkpoint.update(kwargs) + + torch.save(checkpoint, path) + print(f"Checkpoint saved to {path}") +``` + +### Load Checkpoint + +```python +def load_checkpoint( + path: str, + model: nn.Module, + optimizer: torch.optim.Optimizer | None = None, + scheduler: torch.optim.lr_scheduler._LRScheduler | None = None, + device: torch.device | None = None, +) -> dict: + """Load a training checkpoint.""" + # Use weights_only=True for security (prevents arbitrary code execution) + checkpoint = torch.load(path, map_location=device, weights_only=True) + + model.load_state_dict(checkpoint["model_state_dict"]) + + if optimizer is not None and "optimizer_state_dict" in checkpoint: + optimizer.load_state_dict(checkpoint["optimizer_state_dict"]) + + if scheduler is not None and "scheduler_state_dict" in checkpoint: + scheduler.load_state_dict(checkpoint["scheduler_state_dict"]) + + print(f"Loaded checkpoint from {path} (epoch {checkpoint.get('epoch', 'unknown')})") + return checkpoint +``` + +--- + +## Reproducibility + +### Set Seed Function + +```python +import random +import numpy as np +import torch + + +def set_seed(seed: int = 42, deterministic: bool = True) -> None: + """Set random seeds for reproducibility.""" + random.seed(seed) + np.random.seed(seed) + torch.manual_seed(seed) + + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) # For multi-GPU + + if deterministic: + # Makes operations deterministic but may reduce performance + torch.backends.cudnn.deterministic = True + torch.backends.cudnn.benchmark = False + else: + # Better performance but non-deterministic + torch.backends.cudnn.deterministic = False + torch.backends.cudnn.benchmark = True + + +# Usage at start of training script +set_seed(42) +``` + +--- + +## Common Gotchas + +### 1. In-Place Operations Breaking Autograd + +```python +# BAD - In-place operation can break autograd graph +def bad_forward(x): + x += 1 # In-place modification + return x * 2 + +# GOOD - Create new tensor +def good_forward(x): + x = x + 1 # Creates new tensor + return x * 2 +``` + +### 2. Memory Leaks from Not Detaching + +```python +# BAD - Keeps computation graph in memory +losses = [] +for batch in dataloader: + loss = model(batch) + losses.append(loss) # Holds entire graph! + +# GOOD - Detach with .item() for scalars +losses = [] +for batch in dataloader: + loss = model(batch) + losses.append(loss.item()) # Just the number, no graph + +# GOOD - Detach for non-scalar tensors +features = [] +for batch in dataloader: + feat = model.encode(batch) + features.append(feat.detach().cpu()) # Detached, moved to CPU +``` + +### 3. Mixed Precision Training + +```python +from torch.cuda.amp import autocast, GradScaler + + +def train_with_amp( + model: nn.Module, + dataloader: DataLoader, + optimizer: torch.optim.Optimizer, + criterion: nn.Module, + device: torch.device, +) -> float: + """Training with Automatic Mixed Precision.""" + model.train() + scaler = GradScaler() + total_loss = 0.0 + + for inputs, targets in dataloader: + inputs = inputs.to(device) + targets = targets.to(device) + + optimizer.zero_grad() + + # Forward pass with autocast + with autocast(): + outputs = model(inputs) + loss = criterion(outputs.logits, targets) + + # Backward pass with scaled gradients + scaler.scale(loss).backward() + + # Unscale before clipping + scaler.unscale_(optimizer) + torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0) + + # Step with scaler + scaler.step(optimizer) + scaler.update() + + total_loss += loss.item() + + return total_loss / len(dataloader) +``` + +### 4. Forgetting model.eval() and model.train() + +```python +# BAD - Dropout/BatchNorm behave incorrectly +def evaluate_bad(model, dataloader): + # model.train() is still active! + for batch in dataloader: + output = model(batch) + +# GOOD - Always set mode explicitly +def evaluate_good(model, dataloader): + model.eval() # Disable dropout, use running stats for BatchNorm + with torch.no_grad(): + for batch in dataloader: + output = model(batch) + model.train() # Restore training mode if continuing +``` + +### 5. Proper Tensor Creation on Device + +```python +# BAD - Creates on CPU then moves (slow) +tensor = torch.zeros(100, 100).to(device) + +# GOOD - Create directly on device +tensor = torch.zeros(100, 100, device=device) + +# GOOD - Create with same device/dtype as reference +tensor = torch.zeros_like(reference_tensor) +tensor = torch.empty(100, 100, device=model.device, dtype=model.dtype) +``` + +--- + +## Quick Reference Checklist + +- [ ] Use `nn.Module` with proper `__init__` and `forward` +- [ ] Initialize weights with `_init_weights` method +- [ ] Use `@property device` for device inference +- [ ] Always use `.item()` when logging scalar losses +- [ ] Use `@torch.no_grad()` decorator for evaluation +- [ ] Call `model.train()` and `model.eval()` explicitly +- [ ] Use `weights_only=True` when loading checkpoints +- [ ] Set seeds for reproducibility at script start +- [ ] Avoid in-place operations in forward pass +- [ ] Use mixed precision for faster training on GPU +- [ ] Clip gradients to prevent exploding gradients +- [ ] Use `pin_memory=True` with CUDA for faster data transfer