"""
Co-MADDPG Evaluation Metrics | Co-MADDPG 评估指标

This module provides the core performance metrics and reward calculation logic for 
the cooperative-competitive multi-agent reinforcement learning (Co-MADDPG) framework 
in hybrid semantic-traditional wireless resource allocation.

本模块为混合语义-传统无线资源分配中的协作-竞争多智能体强化学习（Co-MADDPG）框架
提供核心性能指标和奖励计算逻辑。

Key Metrics:
- Jain's Fairness Index / Jain 公平性指数
- Rate Satisfaction Ratio / 速率满足率
- System-level QoE / 系统级体验质量
- Dynamic Cooperation Weight (λ) / 动态协作权重 (λ)
- Mixed Reward Mechanism / 混合奖励机制

Reference:
- "Dynamic Cooperative-Competitive Multi-Agent Reinforcement Learning for 
   Resource Allocation in Semantic-Traditional Hybrid Wireless Networks"
"""

import numpy as np


def jain_fairness(values) -> float:
    """
    Compute Jain's fairness index. | 计算 Jain 公平性指数。

    Formula: J = (Σ x_i)² / (n · Σ x_i²)
    公式：J = (Σ x_i)² / (n · Σ x_i²)

    Returns 0.0 if all values are zero or empty. | 如果所有值均为零或为空，则返回 0.0。

    Parameters
    ----------
    values : array_like
        Resource allocation or performance metrics (e.g., rates, QoE).
        资源分配或性能指标（例如：速率、QoE）。

    Returns
    -------
    float
        Fairness index in range [1/n, 1.0].
        [1/n, 1.0] 范围内的公平性指数。
    """
    values = np.asarray(values, dtype=np.float64)
    if len(values) == 0:
        return 0.0
    sum_sq = np.sum(values ** 2)
    # Avoid division by zero | 避免除以零
    if sum_sq == 0:
        return 0.0
    # Calculate J index | 计算 J 指数
    return float(np.sum(values) ** 2 / (len(values) * sum_sq))


def rate_satisfaction(rates, r_req: float) -> float:
    """
    Fraction of users meeting minimum rate requirement. | 满足最小速率要求的用户比例。

    Parameters
    ----------
    rates : array_like
        Per-user achievable rates.
        每个用户可达到的速率。
    r_req : float
        Minimum rate requirement threshold (R_req).
        最小速率要求阈值 (R_req)。

    Returns
    -------
    float
        Fraction in [0, 1].
        [0, 1] 范围内的比例。
    """
    rates = np.asarray(rates)
    if len(rates) == 0:
        return 1.0
    # Count how many users' rates exceed the requirement | 统计速率超过要求的用户数量
    return float(np.mean(rates >= r_req))


def compute_system_qoe(qoe_list) -> float:
    """
    Compute system-level QoE as mean of per-user QoE values. | 计算系统级 QoE，即用户 QoE 的平均值。

    Parameters
    ----------
    qoe_list : array_like
        List of QoE values for all active users.
        所有活跃用户的 QoE 值列表。

    Returns
    -------
    float
        Mean system QoE.
        平均系统 QoE。
    """
    if len(qoe_list) == 0:
        return 0.0
    # Simple arithmetic mean | 简单算术平均值
    return float(np.mean(qoe_list))


def compute_lambda(qoe_sys: float, beta: float = 5.0,
                   q_th: float = 0.6) -> float:
    """
    Compute dynamic cooperation weight λ using sigmoid function. | 使用 Sigmoid 函数计算动态协作权重 λ。

    λ(t) = 1 / (1 + exp(-β · (QoE_sys - Q_th)))

    Parameters
    ----------
    qoe_sys : float
        Current system-level QoE.
        当前系统级 QoE。
    beta : float
        Steepness of the switching transition (β).
        切换过渡的陡峭程度 (β)。
    q_th : float
        QoE threshold for cooperative behavior (Q_th).
        协作行为的 QoE 阈值 (Q_th)。

    Returns
    -------
    float
        λ value in [0, 1], representing the degree of cooperation.
        [0, 1] 范围内的 λ 值，代表协作程度。
    """
    # Sigmoid function maps QoE difference to [0, 1] | Sigmoid 函数将 QoE 差异映射到 [0, 1]
    return float(1.0 / (1.0 + np.exp(-beta * (qoe_sys - q_th))))


def compute_mixed_reward(qoe_self: float, qoe_other: float,
                         qoe_sys: float, lambda_val: float,
                         coop_w=(0.5, 0.3, 0.2),
                         comp_w=(0.8, 0.2)) -> float:
    """
    Compute dynamically mixed cooperative-competitive reward. | 计算动态混合的协作-竞争奖励。

    r = λ · r_coop + (1-λ) · r_comp

    Parameters
    ----------
    qoe_self : float
        Individual QoE of the agent. | 智能体自身的 QoE。
    qoe_other : float
        Mean QoE of other agents in the same cell. | 同小区内其他智能体的平均 QoE。
    qoe_sys : float
        Overall system QoE. | 系统整体 QoE。
    lambda_val : float
        Dynamic cooperation weight (λ). | 动态协作权重 (λ)。
    coop_w : tuple
        Weights for cooperative reward (self, others, system). | 协作奖励权重（自身、他人、系统）。
    comp_w : tuple
        Weights for competitive reward (self, system). | 竞争奖励权重（自身、系统）。

    Returns
    -------
    float
        The final mixed reward value. | 最终混合奖励值。
    """
    # Cooperative reward emphasizes global performance | 协作奖励强调全局性能
    r_coop = coop_w[0] * qoe_self + coop_w[1] * qoe_other + coop_w[2] * qoe_sys
    # Competitive reward focuses more on individual gain | 竞争奖励更关注个人收益
    r_comp = comp_w[0] * qoe_self + comp_w[1] * qoe_sys
    
    # Linear combination based on lambda | 基于 lambda 的线性组合
    return float(lambda_val * r_coop + (1.0 - lambda_val) * r_comp)


def moving_average(values, window: int = 50) -> np.ndarray:
    """
    Compute moving average of a series for visualization smoothing. | 计算序列的移动平均值，用于可视化平滑。

    Parameters
    ----------
    values : array_like
        Input time series data. | 输入的时间序列数据。
    window : int
        Smoothing window size. | 平滑窗口大小。

    Returns
    -------
    np.ndarray
        Smoothed series. | 平滑后的序列。
    """
    values = np.asarray(values, dtype=np.float64)
    if len(values) < window:
        return values
    # Standard 1D convolution for moving average | 用于移动平均的标准一维卷积
    return np.convolve(values, np.ones(window) / window, mode='valid')