62 lines
2.8 KiB
Python
62 lines
2.8 KiB
Python
"""
|
|
Actor Network for Wireless Resource Allocation / 无线资源分配中的 Actor 网络
|
|
|
|
This file defines the Actor network architecture for the Co-MADDPG project.
|
|
The Actor maps local observations to deterministic resource allocation actions.
|
|
本文档定义了 Co-MADDPG 项目中的 Actor 网络架构。
|
|
Actor 网络将局部观测值映射到确定性的资源分配动作。
|
|
|
|
Network Architecture / 网络架构:
|
|
FC(obs_dim \u2192 256 \u2192 256 \u2192 128 \u2192 act_dim)
|
|
Output Mapping / 输出映射: (Tanh + 1) / 2 \u2208 [0, 1]
|
|
Reference / 参考文献: Section 3.2.1 Actor-Critic Structure in the project paper.
|
|
"""
|
|
import torch
|
|
import torch.nn as nn
|
|
|
|
class Actor(nn.Module):
|
|
"""
|
|
Actor network for mapping observations to deterministic actions in [0, 1].
|
|
Actor 网络,将观测值映射到 [0, 1] 范围内的确定性动作。
|
|
|
|
Architecture / 架构: FC(obs_dim \u2192 256 \u2192 256 \u2192 128 \u2192 act_dim)
|
|
Paper Ref / 论文参考: Section 3.2.1 - Policy Network implementation.
|
|
|
|
Args / 参数:
|
|
obs_dim (int): Dimension of the observation space. / 观测空间的维度。
|
|
act_dim (int): Dimension of the action space. / 动作空间的维度。
|
|
hidden_sizes (list): Sizes of the three hidden layers (default: [256, 256, 128]). / 三个隐藏层的维度(默认:[256, 256, 128])。
|
|
"""
|
|
def __init__(self, obs_dim, act_dim, hidden_sizes=[256, 256, 128]):
|
|
super(Actor, self).__init__()
|
|
|
|
# Ensure exactly 3 hidden layers as per model design / 确保按照模型设计包含恰好 3 个隐藏层
|
|
assert len(hidden_sizes) == 3, "Actor requires exactly 3 hidden layer sizes"
|
|
|
|
# Define the feedforward neural network / 定义前馈神经网络
|
|
# FC(obs_dim \u2192 256 \u2192 256 \u2192 128 \u2192 act_dim)
|
|
self.net = nn.Sequential(
|
|
nn.Linear(obs_dim, hidden_sizes[0]),
|
|
nn.ReLU(),
|
|
nn.Linear(hidden_sizes[0], hidden_sizes[1]),
|
|
nn.ReLU(),
|
|
nn.Linear(hidden_sizes[1], hidden_sizes[2]),
|
|
nn.ReLU(),
|
|
nn.Linear(hidden_sizes[2], act_dim)
|
|
)
|
|
|
|
def forward(self, obs):
|
|
"""
|
|
Forward pass for the Actor network. / Actor 网络的前向传播。
|
|
|
|
Args / 参数:
|
|
obs (torch.Tensor): Local observation tensor. / 局部观测张量。
|
|
|
|
Returns / 返回:
|
|
torch.Tensor: Actions mapped to the [0, 1] range. / 映射到 [0, 1] 范围的动作。
|
|
"""
|
|
# Pass observations through the linear layers / 将观测值传入线性层
|
|
out = self.net(obs)
|
|
# Formula / 公式: Output (Tanh + 1) / 2 maps result to [0, 1] range / 将输出结果映射到 [0, 1] 范围
|
|
return (torch.tanh(out) + 1.0) / 2.0
|