"""
无线资源分配环境 / Main Gym-like environment for wireless resource allocation.

该模块实现了一个用于语义和传统用户共存系统的无线资源分配环境。
它通过 Gym 风格的 reset/step 接口，处理子载波分配、功率控制和压缩率优化。
This module implements a wireless resource allocation environment for systems 
with coexisting semantic and traditional users. It handles subcarrier allocation, 
power control, and compression ratio optimization via a Gym-like reset/step interface.

作者/Author: Sisyphus-Junior
日期/Date: 2026-02-28
论文引用/Paper Reference: Co-MADDPG based Resource Allocation for Semantic Communication
依赖/Dependencies: numpy, envs.channel_model, envs.semantic_module
"""

import numpy as np
from envs.channel_model import ChannelModel
from envs.semantic_module import SemanticModule

class WirelessEnv:
    """
    语义与传统通信共存环境。
    Wireless environment with semantic and traditional communication.

    管理信道状态、执行动作并计算系统范围内的 QoE。
    Manages channel states, executes actions, and computes system-wide QoE.

    Parameters
    ----------
    config : dict
        包含 'env' 和 'training' 部分的配置字典。
        Configuration dictionary containing 'env' and 'training' sections.
    """
    def __init__(self, config):
        # 提取环境和训练配置 / Extract environment and training configs
        env_config = config['env']
        train_config = config['training']
        
        # 核心系统参数 / Core system parameters
        self.N = env_config['num_subcarriers']  # 子载波数量 N / Number of subcarriers
        self.K_s = env_config['num_semantic_users']  # 语义用户数 / Number of semantic users
        self.K_b = env_config['num_traditional_users']  # 传统用户数 / Number of traditional users
        self.K = self.K_s + self.K_b  # 总用户数 / Total number of users
        
        # 物理层参数 / Physical layer parameters
        self.P_max = env_config['max_power']  # 最大总发射功率 / Maximum total transmit power
        self.R_req = env_config['min_rate_req']  # 传统用户最小速率需求 / Min rate requirement for traditional users
        self.delta_f = env_config['subcarrier_spacing']  # 子载波间隔 / Subcarrier spacing
        self.rho_min = env_config['rho_min']  # 最小压缩率 / Minimum compression ratio
        self.rho_max = env_config['rho_max']  # 最大压缩率 / Maximum compression ratio
        self.w1 = env_config['w1']  # 语义 QoE 权重 1 / Semantic QoE weight 1
        self.w2 = env_config['w2']  # 语义 QoE 权重 2 / Semantic QoE weight 2
        
        # 距离限制 / Distance limits
        self.min_d = env_config.get('min_distance', 50.0)
        self.max_d = env_config.get('max_distance', 500.0)
        
        # 训练步数控制 / Training step control
        self.max_steps = train_config['max_steps']
        self.step_count = 0
        
        # 初始化模型 / Initialize models
        self.channel_model = ChannelModel(config)
        self.semantic_module = SemanticModule(config)
        
        # 初始状态变量 / Initial state variables
        self.distances = np.zeros(self.K)  # 用户距离 / User distances
        self.channel_gains = np.zeros((self.K, self.N), dtype=complex)  # 复信道增益 / Complex channel gains
        self.content_sensitivity = 0.5  # 内容敏感度 / Content sensitivity
        self.business_priority = 0.5  # 业务优先级 / Business priority
        self.load_s = 0.5  # 语义流量负载 / Semantic traffic load
        self.load_b = 0.5  # 传统流量负载 / Traditional traffic load
        self.alloc_s = 0.0  # 语义子载波分配比例 / Semantic subcarrier allocation fraction
        self.alloc_b = 0.0  # 传统子载波分配比例 / Traditional subcarrier allocation fraction
        self.qoe_avg_s = 0.0  # 语义平均 QoE / Rolling average semantic QoE
        self.qoe_avg_b = 0.0  # 传统平均 QoE / Rolling average traditional QoE

    @property
    def obs_dim(self):
        """观察维度: 子载波 (N) + 4 个额外特征。 / Observation dimension: Subcarriers (N) + 4 extra features."""
        return self.N + 4
        
    @property
    def act_dim(self):
        """动作维度: 子载波比例, 功率比例, [语义: 压缩率]。 / Action dimension: Subcarrier fraction, Power fraction, [Semantic: Compression ratio]."""
        return 3

    def reset(self):
        """
        重置环境状态。
        Reset environment state.

        Returns
        -------
        tuple
            (语义智能体观察, 传统智能体观察)。
            (semantic_observation, traditional_observation).
        """
        # 在 [min_distance, max_distance] 内随机分配用户距离 / Random user distances in [min_distance, max_distance]
        self.distances = np.random.uniform(self.min_d, self.max_d, size=self.K)
        
        # 生成信道 (形状: K x N 复数) / Generate channel (shape: K x N complex) - Eq.(6)
        self.channel_gains = self.channel_model.generate_channel(self.distances, self.N)
        self.step_count = 0
        
        # 随机设置观察参数 / Random params for observation
        self.content_sensitivity = np.random.uniform(0.3, 0.8)
        self.business_priority = np.random.uniform(0.3, 0.8)
        self.load_s = np.random.uniform(0.2, 0.8)
        self.load_b = np.random.uniform(0.2, 0.8)
        
        # 重置分配比例和移动平均值 / Reset allocations and moving averages
        self.alloc_s = 0.0
        self.alloc_b = 0.0
        self.qoe_avg_s = 0.0
        self.qoe_avg_b = 0.0
        
        # 获取初始观察 / Get initial observations
        obs_s = self._get_observation('semantic')
        obs_b = self._get_observation('traditional')
        return obs_s, obs_b

    def _get_observation(self, agent_type):
        """
        构造智能体的观察向量。
        Construct observation vector for agents.

        Parameters
        ----------
        agent_type : str
            'semantic' 或 'traditional'。
            'semantic' or 'traditional'.

        Returns
        -------
        np.ndarray
            归一化后的观察向量。
            Normalized observation vector.
        """
        if agent_type == 'semantic':
            # 语义用户索引范围 / Semantic user indices range
            user_indices = range(self.K_b, self.K)
            if len(user_indices) > 0:
                # 计算平均信道增益平方 (功率) / Mean channel power
                channel_power = np.mean(np.abs(self.channel_gains[user_indices])**2, axis=0)
            else:
                channel_power = np.zeros(self.N)
                
            # 归一化信道功率 / Normalize channel power
            channel_norm = channel_power / (np.max(channel_power) + 1e-10)
            # 拼接额外特征 / Concatenate extra features
            obs = np.concatenate([channel_norm, 
                [self.qoe_avg_s, self.content_sensitivity, self.alloc_s, self.load_s]])
                
        else:  # 传统 / traditional
            # 传统用户索引范围 / Traditional user indices range
            user_indices = range(0, self.K_b)
            if len(user_indices) > 0:
                # 计算平均信道功率 / Mean channel power
                channel_power = np.mean(np.abs(self.channel_gains[user_indices])**2, axis=0)
            else:
                channel_power = np.zeros(self.N)
                
            # 归一化信道功率 / Normalize channel power
            channel_norm = channel_power / (np.max(channel_power) + 1e-10)
            # 拼接额外特征 / Concatenate extra features
            obs = np.concatenate([channel_norm,
                [self.qoe_avg_b, self.business_priority, self.alloc_b, self.load_b]])
                
        # 返回 32位浮点型观察 / Return float32 observation
        return obs.astype(np.float32)

    def step(self, action_s, action_b):
        """
        执行一个时间步。
        Execute a single environment step.

        Parameters
        ----------
        action_s : np.ndarray
            语义智能体动作 [子载波比例, 功率比例, 压缩率]。
            Semantic agent action [sub_fraction, power_fraction, compression_ratio].
        action_b : np.ndarray
            传统智能体动作 [子载波比例, 功率比例, 冗余参数]。
            Traditional agent action [sub_fraction, power_fraction, redundant_param].

        Returns
        -------
        tuple
            (obs_s, obs_b, reward_s, reward_b, done, info).
        """
        self.step_count += 1
        
        # 1. 解码动作 / Decode actions
        # 计算子载波分配数量 / Compute number of subcarriers
        n_sub_s = max(1, int(round(action_s[0] * self.N)))
        n_sub_b = max(1, int(round(action_b[0] * self.N)))
        
        # 限制总子载波数量 / Clip total subcarriers
        if n_sub_s + n_sub_b > self.N:
            total = n_sub_s + n_sub_b
            n_sub_s = int(round(n_sub_s * self.N / total))
            n_sub_b = self.N - n_sub_s
            
        # 计算功率分配 / Compute power allocation
        p_s = action_s[1] * self.P_max
        p_b = action_b[1] * self.P_max
        
        # 限制总功率 / Limit total power
        if p_s + p_b > self.P_max:
            total_p = p_s + p_b
            p_s = p_s * self.P_max / total_p
            p_b = p_b * self.P_max / total_p
            
        # 解码语义压缩率 / Decode semantic compression ratio
        rho = action_s[2] * (self.rho_max - self.rho_min) + self.rho_min
        
        # 2. 分配子载波 (基于信道质量的贪婪算法) / Allocate subcarriers (greedy by channel quality)
        # 计算两组用户的平均信道质量 / Mean channel quality for both groups
        sem_channel = np.mean(np.abs(self.channel_gains[self.K_b:])**2, axis=0) if self.K_s > 0 else np.zeros(self.N)
        trad_channel = np.mean(np.abs(self.channel_gains[:self.K_b])**2, axis=0) if self.K_b > 0 else np.zeros(self.N)
        
        # 语义用户优先挑选最好的子载波 / Semantic users pick best subcarriers first
        all_subs = np.arange(self.N)
        sem_sorted = np.argsort(-sem_channel)
        sem_subs = sem_sorted[:n_sub_s]
        # 剩余子载波给传统用户 / Remaining subcarriers for traditional users
        remaining = np.setdiff1d(all_subs, sem_subs)
        
        if len(remaining) >= n_sub_b:
            trad_quality = trad_channel[remaining]
            best_idx = np.argsort(-trad_quality)[:n_sub_b]
            trad_subs = remaining[best_idx]
        else:
            trad_subs = remaining
            n_sub_b = len(trad_subs)
            
        # 3. 功率分配 (组内均分) / Power allocation (equal within group)
        noise_power = self.channel_model.noise_power
        
        # 分配矩阵和功率矩阵 / Allocation and power matrices
        alloc_matrix = np.zeros((self.K, self.N))
        power_matrix = np.zeros((self.K, self.N))
        
        # 在 K_s 个用户中循环分配语义子载波 / Distribute semantic subcarriers among K_s users round-robin
        for i, k in enumerate(range(self.K_b, self.K)):
            user_subs = sem_subs[i::max(1, self.K_s)]
            if len(user_subs) > 0:
                alloc_matrix[k, user_subs] = 1
                power_matrix[k, user_subs] = p_s / max(n_sub_s, 1)
                
        # 在 K_b 个用户中循环分配传统子载波 / Distribute traditional subcarriers among K_b users round-robin
        for i, k in enumerate(range(0, self.K_b)):
            user_subs = trad_subs[i::max(1, self.K_b)]
            if len(user_subs) > 0:
                alloc_matrix[k, user_subs] = 1
                power_matrix[k, user_subs] = p_b / max(n_sub_b, 1)
                
        # 4. 计算 SNR / Compute SNR - Eq.(8)
        snr_matrix = self.channel_model.compute_snr(self.channel_gains, power_matrix, noise_power)
        
        # 5. 计算每个用户的 QoE / Compute QoE for each user
        qoe_list = []
        rates = []
        ssim_values = []
        
        # 传统用户 QoE 计算 / Traditional users QoE computation - Eq.(QoE_b)
        for k in range(self.K_b):
            user_subs = np.where(alloc_matrix[k] > 0)[0]
            if len(user_subs) == 0:
                rate_k = 0.0
            else:
                # R_k = Σ α * Δf * log2(1 + γ) / R_k = Σ α * Δf * log2(1 + γ)
                rate_k = np.sum(self.delta_f * np.log2(1 + snr_matrix[k, user_subs]))
            rates.append(rate_k)
            # QoE_b = min(R_k / R_req, 1) / QoE_b = min(R_k / R_req, 1)
            qoe_k = min(rate_k / self.R_req, 1.0)
            qoe_list.append(qoe_k)
            
        # 语义用户 QoE 计算 / Semantic users QoE computation - Eq.(QoE_s)
        for k in range(self.K_b, self.K):
            user_subs = np.where(alloc_matrix[k] > 0)[0]
            if len(user_subs) == 0:
                ssim_k = 0.0
            else:
                avg_snr = np.mean(snr_matrix[k, user_subs])
                # 计算语义相似度 / Compute SSim - Eq. (SSim)
                ssim_k = self.semantic_module.compute_ssim(avg_snr, rho)
            ssim_values.append(float(ssim_k))
            # 计算语义 QoE / Compute semantic QoE
            qoe_k = self.semantic_module.compute_semantic_qoe(ssim_k, rho, self.w1, self.w2, self.rho_max)
            qoe_list.append(qoe_k)
            
        # 6. 系统平均 QoE / System QoE
        qoe_sys = np.mean(qoe_list) if len(qoe_list) > 0 else 0.0
        qoe_s = np.mean(qoe_list[self.K_b:]) if self.K_s > 0 else 0.0
        qoe_b = np.mean(qoe_list[:self.K_b]) if self.K_b > 0 else 0.0
        
        # 更新滚动平均值 / Update rolling averages
        alpha_smooth = 0.1
        self.qoe_avg_s = alpha_smooth * qoe_s + (1 - alpha_smooth) * self.qoe_avg_s
        self.qoe_avg_b = alpha_smooth * qoe_b + (1 - alpha_smooth) * self.qoe_avg_b
        # 记录当前分配比例 / Record current allocation ratios
        self.alloc_s = n_sub_s / self.N
        self.alloc_b = n_sub_b / self.N
        
        # 7. 为下一步生成新信道 (块衰落) / Regenerate channel for next step (block fading)
        self.channel_gains = self.channel_model.generate_channel(self.distances, self.N)
        
        # 8. 构造输出数据 / Build output
        obs_s = self._get_observation('semantic')
        obs_b = self._get_observation('traditional')
        done = (self.step_count >= self.max_steps)
        
        # 计算速率满足度 / Compute rate satisfaction for traditional users
        if len(rates) > 0:
            rate_satisfaction = float(np.mean([1.0 if r >= self.R_req else 0.0 for r in rates]))
        else:
            rate_satisfaction = 1.0
            
        # 构造信息字典 / Construct info dictionary
        info = {
            'qoe_semantic': qoe_s,
            'qoe_traditional': qoe_b,
            'qoe_sys': qoe_sys,
            'qoe_list': qoe_list,
            'rates': rates,
            'ssim_values': ssim_values,
            'rate_satisfaction': rate_satisfaction,
            'rho': rho,
            'n_sub_s': n_sub_s,
            'n_sub_b': n_sub_b,
        }
        
        # 返回结果 (奖励值设为各自的平均 QoE) / Return results (rewards set to respective mean QoEs)
        return obs_s, obs_b, qoe_s, qoe_b, done, info