SemanticCommunication/code/baselines/equal_alloc.py

102 lines
3.7 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import numpy as np
"""
Baseline: EqualAllocation (等额分配基线)
=====================================
Purpose (lower bound):
- This baseline represents a simple heuristic approach with no learning involved.
- It serves as a lower bound for performance comparison, showing the system behavior under a naive, fixed resource allocation strategy.
- 目的(性能下限):该基线代表了一种不涉及学习的简单启发式方法。它作为性能对比的下限,展示了在朴素的固定资源分配策略下系统的表现。
Difference from Co-MADDPG:
1. Learning: No learning vs Deep Reinforcement Learning.
2. Action Selection: Always fixed at [0.5, 0.5, 0.5] for all resource parameters (subcarrier fraction, power, m_param).
3. 与 Co-MADDPG 的区别:
- 学习机制:无学习 vs 深度强化学习。
- 动作选择所有资源参数子载波比例、功率、m 参数)始终固定为 [0.5, 0.5, 0.5]。
Contribution:
- Contributes to performance baseline tables as the "Random/Fixed" comparison point.
- 贡献:作为“随机/固定”对比点,用于性能基准表。
"""
class DummyBuffer:
"""
Dummy replay buffer that satisfies train.py's push/len interface.
满足 train.py 中 push/len 接口要求的虚拟重放池。
"""
def push(self, *args):
# Do nothing as no learning is performed
# 不执行任何操作,因为没有学习过程
pass
def __len__(self):
# Always return 0 to indicate no samples available
# 始终返回 0表示没有可用样本
return 0
class EqualAllocation:
"""
EqualAllocation algorithm implementation.
等额分配算法实现。
"""
def __init__(self, config):
# Initialize with configuration and a dummy buffer
# 使用配置和虚拟重放池进行初始化
self.config = config
self.replay_buffer = DummyBuffer()
def select_action(self, obs_s, obs_b, explore=True):
"""
Always return a fixed action [0.5, 0.5, 0.5].
始终返回固定动作 [0.5, 0.5, 0.5]。
"""
return np.array([0.5, 0.5, 0.5], dtype=np.float32), \
np.array([0.5, 0.5, 0.5], dtype=np.float32)
def compute_rewards(self, qoe_s, qoe_b, qoe_sys):
"""
Compute rewards using a fixed λ=0.5 for consistency in monitoring.
使用固定 λ=0.5 计算奖励,以保持监测的一致性。
Formula: Balanced combination of coop and comp components.
公式说明:协作项与竞争项的平衡组合。
"""
lam = 0.5
rew_cfg = self.config.get('reward', {})
coop_self = rew_cfg.get('coop_self', 0.5)
coop_other = rew_cfg.get('coop_other', 0.3)
coop_sys = rew_cfg.get('coop_sys', 0.2)
comp_self = rew_cfg.get('comp_self', 0.8)
comp_sys = rew_cfg.get('comp_sys', 0.2)
# Compute reward components for S
# 计算 S 的奖励组成部分
r_coop_s = coop_self * qoe_s + coop_other * qoe_b + coop_sys * qoe_sys
r_comp_s = comp_self * qoe_s + comp_sys * qoe_sys
r_s = lam * r_coop_s + (1 - lam) * r_comp_s
# Compute reward components for B
# 计算 B 的奖励组成部分
r_coop_b = coop_self * qoe_b + coop_other * qoe_s + coop_sys * qoe_sys
r_comp_b = comp_self * qoe_b + comp_sys * qoe_sys
r_b = lam * r_coop_b + (1 - lam) * r_comp_b
return r_s, r_b, lam
def update(self):
"""
No update performed in heuristic baseline.
启发式基线中不执行更新。
"""
return None
def save(self, path):
"""No state to save."""
pass
def load(self, path):
"""No state to load."""
pass