102 lines
3.7 KiB
Python
102 lines
3.7 KiB
Python
import numpy as np
|
||
|
||
"""
|
||
Baseline: EqualAllocation (等额分配基线)
|
||
=====================================
|
||
Purpose (lower bound):
|
||
- This baseline represents a simple heuristic approach with no learning involved.
|
||
- It serves as a lower bound for performance comparison, showing the system behavior under a naive, fixed resource allocation strategy.
|
||
- 目的(性能下限):该基线代表了一种不涉及学习的简单启发式方法。它作为性能对比的下限,展示了在朴素的固定资源分配策略下系统的表现。
|
||
|
||
Difference from Co-MADDPG:
|
||
1. Learning: No learning vs Deep Reinforcement Learning.
|
||
2. Action Selection: Always fixed at [0.5, 0.5, 0.5] for all resource parameters (subcarrier fraction, power, m_param).
|
||
3. 与 Co-MADDPG 的区别:
|
||
- 学习机制:无学习 vs 深度强化学习。
|
||
- 动作选择:所有资源参数(子载波比例、功率、m 参数)始终固定为 [0.5, 0.5, 0.5]。
|
||
|
||
Contribution:
|
||
- Contributes to performance baseline tables as the "Random/Fixed" comparison point.
|
||
- 贡献:作为“随机/固定”对比点,用于性能基准表。
|
||
"""
|
||
|
||
class DummyBuffer:
|
||
"""
|
||
Dummy replay buffer that satisfies train.py's push/len interface.
|
||
满足 train.py 中 push/len 接口要求的虚拟重放池。
|
||
"""
|
||
def push(self, *args):
|
||
# Do nothing as no learning is performed
|
||
# 不执行任何操作,因为没有学习过程
|
||
pass
|
||
|
||
def __len__(self):
|
||
# Always return 0 to indicate no samples available
|
||
# 始终返回 0,表示没有可用样本
|
||
return 0
|
||
|
||
|
||
class EqualAllocation:
|
||
"""
|
||
EqualAllocation algorithm implementation.
|
||
等额分配算法实现。
|
||
"""
|
||
def __init__(self, config):
|
||
# Initialize with configuration and a dummy buffer
|
||
# 使用配置和虚拟重放池进行初始化
|
||
self.config = config
|
||
self.replay_buffer = DummyBuffer()
|
||
|
||
def select_action(self, obs_s, obs_b, explore=True):
|
||
"""
|
||
Always return a fixed action [0.5, 0.5, 0.5].
|
||
始终返回固定动作 [0.5, 0.5, 0.5]。
|
||
"""
|
||
return np.array([0.5, 0.5, 0.5], dtype=np.float32), \
|
||
np.array([0.5, 0.5, 0.5], dtype=np.float32)
|
||
|
||
def compute_rewards(self, qoe_s, qoe_b, qoe_sys):
|
||
"""
|
||
Compute rewards using a fixed λ=0.5 for consistency in monitoring.
|
||
使用固定 λ=0.5 计算奖励,以保持监测的一致性。
|
||
|
||
Formula: Balanced combination of coop and comp components.
|
||
公式说明:协作项与竞争项的平衡组合。
|
||
"""
|
||
lam = 0.5
|
||
rew_cfg = self.config.get('reward', {})
|
||
coop_self = rew_cfg.get('coop_self', 0.5)
|
||
coop_other = rew_cfg.get('coop_other', 0.3)
|
||
coop_sys = rew_cfg.get('coop_sys', 0.2)
|
||
comp_self = rew_cfg.get('comp_self', 0.8)
|
||
comp_sys = rew_cfg.get('comp_sys', 0.2)
|
||
|
||
# Compute reward components for S
|
||
# 计算 S 的奖励组成部分
|
||
r_coop_s = coop_self * qoe_s + coop_other * qoe_b + coop_sys * qoe_sys
|
||
r_comp_s = comp_self * qoe_s + comp_sys * qoe_sys
|
||
r_s = lam * r_coop_s + (1 - lam) * r_comp_s
|
||
|
||
# Compute reward components for B
|
||
# 计算 B 的奖励组成部分
|
||
r_coop_b = coop_self * qoe_b + coop_other * qoe_s + coop_sys * qoe_sys
|
||
r_comp_b = comp_self * qoe_b + comp_sys * qoe_sys
|
||
r_b = lam * r_coop_b + (1 - lam) * r_comp_b
|
||
|
||
return r_s, r_b, lam
|
||
|
||
def update(self):
|
||
"""
|
||
No update performed in heuristic baseline.
|
||
启发式基线中不执行更新。
|
||
"""
|
||
return None
|
||
|
||
def save(self, path):
|
||
"""No state to save."""
|
||
pass
|
||
|
||
def load(self, path):
|
||
"""No state to load."""
|
||
pass
|