# =============================================================================
# Co-MADDPG Wireless Resource Allocation — Default Configuration
# =============================================================================
# All hyperparameters follow the paper's specifications for semantic-aware
# cooperative multi-agent resource allocation in OFDMA systems.
# =============================================================================

env:
  # OFDMA system parameters
  num_subcarriers: 64              # N: total number of OFDM subcarriers
  bandwidth: 10.0e+6                # B: total system bandwidth (Hz)
  subcarrier_spacing: 156250.0     # Δf: subcarrier spacing (Hz), B/N
  max_power: 1.0                   # P_max: maximum transmit power per user (W)
  noise_psd: -174                  # N0: noise power spectral density (dBm/Hz)
  carrier_freq: 3.5                # f_c: carrier frequency (GHz)

  # Cell geometry
  min_distance: 50                 # d_min: minimum BS-user distance (m)
  max_distance: 500                # d_max: maximum BS-user distance (m)

  # User configuration
  num_semantic_users: 3            # K_s: number of semantic communication users
  num_traditional_users: 3         # K_b: number of traditional bit-rate users

  # QoS constraints
  min_rate_req: 5.0e+5            # R_min: minimum rate requirement for traditional users (bps)

  # Semantic compression ratio bounds
  rho_max: 1.0                     # ρ_max: maximum compression ratio (no compression)
  rho_min: 0.05                    # ρ_min: minimum compression ratio

  # QoE weighting factors
  w1: 0.7                          # w1: weight for semantic similarity (SSIM)
  w2: 0.3                          # w2: weight for compression efficiency

training:
  # Episode configuration
  max_episodes: 5000               # total training episodes
  max_steps: 200                   # maximum steps per episode

  # Replay buffer and sampling
  batch_size: 256                  # mini-batch size for gradient updates
  buffer_capacity: 100000          # replay buffer capacity

  # Learning rates
  actor_lr: 1.0e-4                 # actor network learning rate
  critic_lr: 3.0e-4                # critic network learning rate

  # Discount and soft-update
  gamma: 0.95                      # discount factor γ
  tau: 0.01                        # soft target update rate τ

  # Ornstein-Uhlenbeck exploration noise
  ou_sigma_init: 0.2               # initial noise standard deviation
  ou_sigma_min: 0.01               # minimum noise standard deviation
  ou_theta: 0.15                   # OU mean-reversion rate θ

  # Cooperative mechanism parameters
  beta: 5.0                        # β: cooperation benefit scaling factor
  q_threshold: 0.6                 # Q-value threshold for cooperation mode switch
  update_interval: 5               # target network update interval (episodes)

  # Reproducibility
  seed: 42

network:
  # Actor network hidden layer dimensions
  actor_hidden: [256, 256, 128]

  # Critic network hidden layer dimensions
  critic_hidden: [512, 512, 256]

reward:
  # Cooperative mode reward weights
  coop_self: 0.5                   # α_self: weight on own reward (cooperative)
  coop_other: 0.3                  # α_other: weight on other agents' reward
  coop_sys: 0.2                    # α_sys: weight on system-level reward

  # Competitive mode reward weights
  comp_self: 0.8                   # α_self: weight on own reward (competitive)
  comp_sys: 0.2                    # α_sys: weight on system-level reward