194 lines
6.3 KiB
Python
194 lines
6.3 KiB
Python
"""
|
||
Co-MADDPG Evaluation Metrics | Co-MADDPG 评估指标
|
||
|
||
This module provides the core performance metrics and reward calculation logic for
|
||
the cooperative-competitive multi-agent reinforcement learning (Co-MADDPG) framework
|
||
in hybrid semantic-traditional wireless resource allocation.
|
||
|
||
本模块为混合语义-传统无线资源分配中的协作-竞争多智能体强化学习(Co-MADDPG)框架
|
||
提供核心性能指标和奖励计算逻辑。
|
||
|
||
Key Metrics:
|
||
- Jain's Fairness Index / Jain 公平性指数
|
||
- Rate Satisfaction Ratio / 速率满足率
|
||
- System-level QoE / 系统级体验质量
|
||
- Dynamic Cooperation Weight (λ) / 动态协作权重 (λ)
|
||
- Mixed Reward Mechanism / 混合奖励机制
|
||
|
||
Reference:
|
||
- "Dynamic Cooperative-Competitive Multi-Agent Reinforcement Learning for
|
||
Resource Allocation in Semantic-Traditional Hybrid Wireless Networks"
|
||
"""
|
||
|
||
import numpy as np
|
||
|
||
|
||
def jain_fairness(values) -> float:
|
||
"""
|
||
Compute Jain's fairness index. | 计算 Jain 公平性指数。
|
||
|
||
Formula: J = (Σ x_i)² / (n · Σ x_i²)
|
||
公式:J = (Σ x_i)² / (n · Σ x_i²)
|
||
|
||
Returns 0.0 if all values are zero or empty. | 如果所有值均为零或为空,则返回 0.0。
|
||
|
||
Parameters
|
||
----------
|
||
values : array_like
|
||
Resource allocation or performance metrics (e.g., rates, QoE).
|
||
资源分配或性能指标(例如:速率、QoE)。
|
||
|
||
Returns
|
||
-------
|
||
float
|
||
Fairness index in range [1/n, 1.0].
|
||
[1/n, 1.0] 范围内的公平性指数。
|
||
"""
|
||
values = np.asarray(values, dtype=np.float64)
|
||
if len(values) == 0:
|
||
return 0.0
|
||
sum_sq = np.sum(values ** 2)
|
||
# Avoid division by zero | 避免除以零
|
||
if sum_sq == 0:
|
||
return 0.0
|
||
# Calculate J index | 计算 J 指数
|
||
return float(np.sum(values) ** 2 / (len(values) * sum_sq))
|
||
|
||
|
||
def rate_satisfaction(rates, r_req: float) -> float:
|
||
"""
|
||
Fraction of users meeting minimum rate requirement. | 满足最小速率要求的用户比例。
|
||
|
||
Parameters
|
||
----------
|
||
rates : array_like
|
||
Per-user achievable rates.
|
||
每个用户可达到的速率。
|
||
r_req : float
|
||
Minimum rate requirement threshold (R_req).
|
||
最小速率要求阈值 (R_req)。
|
||
|
||
Returns
|
||
-------
|
||
float
|
||
Fraction in [0, 1].
|
||
[0, 1] 范围内的比例。
|
||
"""
|
||
rates = np.asarray(rates)
|
||
if len(rates) == 0:
|
||
return 1.0
|
||
# Count how many users' rates exceed the requirement | 统计速率超过要求的用户数量
|
||
return float(np.mean(rates >= r_req))
|
||
|
||
|
||
def compute_system_qoe(qoe_list) -> float:
|
||
"""
|
||
Compute system-level QoE as mean of per-user QoE values. | 计算系统级 QoE,即用户 QoE 的平均值。
|
||
|
||
Parameters
|
||
----------
|
||
qoe_list : array_like
|
||
List of QoE values for all active users.
|
||
所有活跃用户的 QoE 值列表。
|
||
|
||
Returns
|
||
-------
|
||
float
|
||
Mean system QoE.
|
||
平均系统 QoE。
|
||
"""
|
||
if len(qoe_list) == 0:
|
||
return 0.0
|
||
# Simple arithmetic mean | 简单算术平均值
|
||
return float(np.mean(qoe_list))
|
||
|
||
|
||
def compute_lambda(qoe_sys: float, beta: float = 5.0,
|
||
q_th: float = 0.6) -> float:
|
||
"""
|
||
Compute dynamic cooperation weight λ using sigmoid function. | 使用 Sigmoid 函数计算动态协作权重 λ。
|
||
|
||
λ(t) = 1 / (1 + exp(-β · (QoE_sys - Q_th)))
|
||
|
||
Parameters
|
||
----------
|
||
qoe_sys : float
|
||
Current system-level QoE.
|
||
当前系统级 QoE。
|
||
beta : float
|
||
Steepness of the switching transition (β).
|
||
切换过渡的陡峭程度 (β)。
|
||
q_th : float
|
||
QoE threshold for cooperative behavior (Q_th).
|
||
协作行为的 QoE 阈值 (Q_th)。
|
||
|
||
Returns
|
||
-------
|
||
float
|
||
λ value in [0, 1], representing the degree of cooperation.
|
||
[0, 1] 范围内的 λ 值,代表协作程度。
|
||
"""
|
||
# Sigmoid function maps QoE difference to [0, 1] | Sigmoid 函数将 QoE 差异映射到 [0, 1]
|
||
return float(1.0 / (1.0 + np.exp(-beta * (qoe_sys - q_th))))
|
||
|
||
|
||
def compute_mixed_reward(qoe_self: float, qoe_other: float,
|
||
qoe_sys: float, lambda_val: float,
|
||
coop_w=(0.5, 0.3, 0.2),
|
||
comp_w=(0.8, 0.2)) -> float:
|
||
"""
|
||
Compute dynamically mixed cooperative-competitive reward. | 计算动态混合的协作-竞争奖励。
|
||
|
||
r = λ · r_coop + (1-λ) · r_comp
|
||
|
||
Parameters
|
||
----------
|
||
qoe_self : float
|
||
Individual QoE of the agent. | 智能体自身的 QoE。
|
||
qoe_other : float
|
||
Mean QoE of other agents in the same cell. | 同小区内其他智能体的平均 QoE。
|
||
qoe_sys : float
|
||
Overall system QoE. | 系统整体 QoE。
|
||
lambda_val : float
|
||
Dynamic cooperation weight (λ). | 动态协作权重 (λ)。
|
||
coop_w : tuple
|
||
Weights for cooperative reward (self, others, system). | 协作奖励权重(自身、他人、系统)。
|
||
comp_w : tuple
|
||
Weights for competitive reward (self, system). | 竞争奖励权重(自身、系统)。
|
||
|
||
Returns
|
||
-------
|
||
float
|
||
The final mixed reward value. | 最终混合奖励值。
|
||
"""
|
||
# Cooperative reward emphasizes global performance | 协作奖励强调全局性能
|
||
r_coop = coop_w[0] * qoe_self + coop_w[1] * qoe_other + coop_w[2] * qoe_sys
|
||
# Competitive reward focuses more on individual gain | 竞争奖励更关注个人收益
|
||
r_comp = comp_w[0] * qoe_self + comp_w[1] * qoe_sys
|
||
|
||
# Linear combination based on lambda | 基于 lambda 的线性组合
|
||
return float(lambda_val * r_coop + (1.0 - lambda_val) * r_comp)
|
||
|
||
|
||
def moving_average(values, window: int = 50) -> np.ndarray:
|
||
"""
|
||
Compute moving average of a series for visualization smoothing. | 计算序列的移动平均值,用于可视化平滑。
|
||
|
||
Parameters
|
||
----------
|
||
values : array_like
|
||
Input time series data. | 输入的时间序列数据。
|
||
window : int
|
||
Smoothing window size. | 平滑窗口大小。
|
||
|
||
Returns
|
||
-------
|
||
np.ndarray
|
||
Smoothed series. | 平滑后的序列。
|
||
"""
|
||
values = np.asarray(values, dtype=np.float64)
|
||
if len(values) < window:
|
||
return values
|
||
# Standard 1D convolution for moving average | 用于移动平均的标准一维卷积
|
||
return np.convolve(values, np.ones(window) / window, mode='valid')
|