# ============================================================================= # Co-MADDPG Wireless Resource Allocation — Default Configuration # ============================================================================= # All hyperparameters follow the paper's specifications for semantic-aware # cooperative multi-agent resource allocation in OFDMA systems. # ============================================================================= env: # OFDMA system parameters num_subcarriers: 64 # N: total number of OFDM subcarriers bandwidth: 10.0e+6 # B: total system bandwidth (Hz) subcarrier_spacing: 156250.0 # Δf: subcarrier spacing (Hz), B/N max_power: 1.0 # P_max: maximum transmit power per user (W) noise_psd: -174 # N0: noise power spectral density (dBm/Hz) carrier_freq: 3.5 # f_c: carrier frequency (GHz) # Cell geometry min_distance: 50 # d_min: minimum BS-user distance (m) max_distance: 500 # d_max: maximum BS-user distance (m) # User configuration num_semantic_users: 3 # K_s: number of semantic communication users num_traditional_users: 3 # K_b: number of traditional bit-rate users # QoS constraints min_rate_req: 5.0e+5 # R_min: minimum rate requirement for traditional users (bps) # Semantic compression ratio bounds rho_max: 1.0 # ρ_max: maximum compression ratio (no compression) rho_min: 0.05 # ρ_min: minimum compression ratio # QoE weighting factors w1: 0.7 # w1: weight for semantic similarity (SSIM) w2: 0.3 # w2: weight for compression efficiency training: # Episode configuration max_episodes: 5000 # total training episodes max_steps: 200 # maximum steps per episode # Replay buffer and sampling batch_size: 256 # mini-batch size for gradient updates buffer_capacity: 100000 # replay buffer capacity # Learning rates actor_lr: 1.0e-4 # actor network learning rate critic_lr: 3.0e-4 # critic network learning rate # Discount and soft-update gamma: 0.95 # discount factor γ tau: 0.01 # soft target update rate τ # Ornstein-Uhlenbeck exploration noise ou_sigma_init: 0.2 # initial noise standard deviation ou_sigma_min: 0.01 # minimum noise standard deviation ou_theta: 0.15 # OU mean-reversion rate θ # Cooperative mechanism parameters beta: 5.0 # β: cooperation benefit scaling factor q_threshold: 0.6 # Q-value threshold for cooperation mode switch update_interval: 5 # target network update interval (episodes) # Reproducibility seed: 42 network: # Actor network hidden layer dimensions actor_hidden: [256, 256, 128] # Critic network hidden layer dimensions critic_hidden: [512, 512, 256] reward: # Cooperative mode reward weights coop_self: 0.5 # α_self: weight on own reward (cooperative) coop_other: 0.3 # α_other: weight on other agents' reward coop_sys: 0.2 # α_sys: weight on system-level reward # Competitive mode reward weights comp_self: 0.8 # α_self: weight on own reward (competitive) comp_sys: 0.2 # α_sys: weight on system-level reward