82 lines
3.5 KiB
YAML
82 lines
3.5 KiB
YAML
# =============================================================================
|
||
# Co-MADDPG Wireless Resource Allocation — Default Configuration
|
||
# =============================================================================
|
||
# All hyperparameters follow the paper's specifications for semantic-aware
|
||
# cooperative multi-agent resource allocation in OFDMA systems.
|
||
# =============================================================================
|
||
|
||
env:
|
||
# OFDMA system parameters
|
||
num_subcarriers: 64 # N: total number of OFDM subcarriers
|
||
bandwidth: 10.0e+6 # B: total system bandwidth (Hz)
|
||
subcarrier_spacing: 156250.0 # Δf: subcarrier spacing (Hz), B/N
|
||
max_power: 1.0 # P_max: maximum transmit power per user (W)
|
||
noise_psd: -174 # N0: noise power spectral density (dBm/Hz)
|
||
carrier_freq: 3.5 # f_c: carrier frequency (GHz)
|
||
|
||
# Cell geometry
|
||
min_distance: 50 # d_min: minimum BS-user distance (m)
|
||
max_distance: 500 # d_max: maximum BS-user distance (m)
|
||
|
||
# User configuration
|
||
num_semantic_users: 3 # K_s: number of semantic communication users
|
||
num_traditional_users: 3 # K_b: number of traditional bit-rate users
|
||
|
||
# QoS constraints
|
||
min_rate_req: 5.0e+5 # R_min: minimum rate requirement for traditional users (bps)
|
||
|
||
# Semantic compression ratio bounds
|
||
rho_max: 1.0 # ρ_max: maximum compression ratio (no compression)
|
||
rho_min: 0.05 # ρ_min: minimum compression ratio
|
||
|
||
# QoE weighting factors
|
||
w1: 0.7 # w1: weight for semantic similarity (SSIM)
|
||
w2: 0.3 # w2: weight for compression efficiency
|
||
|
||
training:
|
||
# Episode configuration
|
||
max_episodes: 5000 # total training episodes
|
||
max_steps: 200 # maximum steps per episode
|
||
|
||
# Replay buffer and sampling
|
||
batch_size: 256 # mini-batch size for gradient updates
|
||
buffer_capacity: 100000 # replay buffer capacity
|
||
|
||
# Learning rates
|
||
actor_lr: 1.0e-4 # actor network learning rate
|
||
critic_lr: 3.0e-4 # critic network learning rate
|
||
|
||
# Discount and soft-update
|
||
gamma: 0.95 # discount factor γ
|
||
tau: 0.01 # soft target update rate τ
|
||
|
||
# Ornstein-Uhlenbeck exploration noise
|
||
ou_sigma_init: 0.2 # initial noise standard deviation
|
||
ou_sigma_min: 0.01 # minimum noise standard deviation
|
||
ou_theta: 0.15 # OU mean-reversion rate θ
|
||
|
||
# Cooperative mechanism parameters
|
||
beta: 5.0 # β: cooperation benefit scaling factor
|
||
q_threshold: 0.6 # Q-value threshold for cooperation mode switch
|
||
update_interval: 5 # target network update interval (episodes)
|
||
|
||
# Reproducibility
|
||
seed: 42
|
||
|
||
network:
|
||
# Actor network hidden layer dimensions
|
||
actor_hidden: [256, 256, 128]
|
||
|
||
# Critic network hidden layer dimensions
|
||
critic_hidden: [512, 512, 256]
|
||
|
||
reward:
|
||
# Cooperative mode reward weights
|
||
coop_self: 0.5 # α_self: weight on own reward (cooperative)
|
||
coop_other: 0.3 # α_other: weight on other agents' reward
|
||
coop_sys: 0.2 # α_sys: weight on system-level reward
|
||
|
||
# Competitive mode reward weights
|
||
comp_self: 0.8 # α_self: weight on own reward (competitive)
|
||
comp_sys: 0.2 # α_sys: weight on system-level reward
|