import numpy as np """ Baseline: EqualAllocation (等额分配基线) ===================================== Purpose (lower bound): - This baseline represents a simple heuristic approach with no learning involved. - It serves as a lower bound for performance comparison, showing the system behavior under a naive, fixed resource allocation strategy. - 目的(性能下限):该基线代表了一种不涉及学习的简单启发式方法。它作为性能对比的下限,展示了在朴素的固定资源分配策略下系统的表现。 Difference from Co-MADDPG: 1. Learning: No learning vs Deep Reinforcement Learning. 2. Action Selection: Always fixed at [0.5, 0.5, 0.5] for all resource parameters (subcarrier fraction, power, m_param). 3. 与 Co-MADDPG 的区别: - 学习机制:无学习 vs 深度强化学习。 - 动作选择:所有资源参数(子载波比例、功率、m 参数)始终固定为 [0.5, 0.5, 0.5]。 Contribution: - Contributes to performance baseline tables as the "Random/Fixed" comparison point. - 贡献:作为“随机/固定”对比点,用于性能基准表。 """ class DummyBuffer: """ Dummy replay buffer that satisfies train.py's push/len interface. 满足 train.py 中 push/len 接口要求的虚拟重放池。 """ def push(self, *args): # Do nothing as no learning is performed # 不执行任何操作,因为没有学习过程 pass def __len__(self): # Always return 0 to indicate no samples available # 始终返回 0,表示没有可用样本 return 0 class EqualAllocation: """ EqualAllocation algorithm implementation. 等额分配算法实现。 """ def __init__(self, config): # Initialize with configuration and a dummy buffer # 使用配置和虚拟重放池进行初始化 self.config = config self.replay_buffer = DummyBuffer() def select_action(self, obs_s, obs_b, explore=True): """ Always return a fixed action [0.5, 0.5, 0.5]. 始终返回固定动作 [0.5, 0.5, 0.5]。 """ return np.array([0.5, 0.5, 0.5], dtype=np.float32), \ np.array([0.5, 0.5, 0.5], dtype=np.float32) def compute_rewards(self, qoe_s, qoe_b, qoe_sys): """ Compute rewards using a fixed λ=0.5 for consistency in monitoring. 使用固定 λ=0.5 计算奖励,以保持监测的一致性。 Formula: Balanced combination of coop and comp components. 公式说明:协作项与竞争项的平衡组合。 """ lam = 0.5 rew_cfg = self.config.get('reward', {}) coop_self = rew_cfg.get('coop_self', 0.5) coop_other = rew_cfg.get('coop_other', 0.3) coop_sys = rew_cfg.get('coop_sys', 0.2) comp_self = rew_cfg.get('comp_self', 0.8) comp_sys = rew_cfg.get('comp_sys', 0.2) # Compute reward components for S # 计算 S 的奖励组成部分 r_coop_s = coop_self * qoe_s + coop_other * qoe_b + coop_sys * qoe_sys r_comp_s = comp_self * qoe_s + comp_sys * qoe_sys r_s = lam * r_coop_s + (1 - lam) * r_comp_s # Compute reward components for B # 计算 B 的奖励组成部分 r_coop_b = coop_self * qoe_b + coop_other * qoe_s + coop_sys * qoe_sys r_comp_b = comp_self * qoe_b + comp_sys * qoe_sys r_b = lam * r_coop_b + (1 - lam) * r_comp_b return r_s, r_b, lam def update(self): """ No update performed in heuristic baseline. 启发式基线中不执行更新。 """ return None def save(self, path): """No state to save.""" pass def load(self, path): """No state to load.""" pass