commit 8b0eadc876d9f31ed6e6181fc4c677ad2687c991 Author: hc Date: Wed Dec 31 19:58:09 2025 +0800 首次提交 diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..01dd004 --- /dev/null +++ b/.gitignore @@ -0,0 +1,50 @@ +# .gitignore - MND-IA 项目 + +# 配置文件(包含敏感信息) +config.json + +# Python 缓存 +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python + +# 虚拟环境 +venv/ +env/ +ENV/ +.venv + +# 数据文件 +data/ +*.csv +*.json.bak +*.db + +# 日志文件 +*.log +logs/ + +# IDE 配置 +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# Jupyter Notebook +.ipynb_checkpoints + +# 系统文件 +.DS_Store +Thumbs.db + +# 测试覆盖率 +.coverage +htmlcov/ + +# 构建产物 +build/ +dist/ +*.egg-info/ diff --git a/CONFIG_GUIDE.md b/CONFIG_GUIDE.md new file mode 100644 index 0000000..1672f27 --- /dev/null +++ b/CONFIG_GUIDE.md @@ -0,0 +1,137 @@ +# Configuration Example +# 配置文件使用说明 + +这是 MND-IA 系统的配置文件模板。 + +## 配置项说明 + +### LLM 配置 +```json +"llm": { + "model_name": "gpt-4", // 模型名称(支持 gpt-4, gpt-3.5-turbo, glm-4, claude-3.5-sonnet 等) + "api_base": "https://api.openai.com/v1", // API 地址 + "api_key": "your-api-key-here", // API Key(必填) + "temperature": 0.7, // 生成温度(0-1) + "max_tokens": 2000, // 最大生成长度 + "timeout": 60 // 超时时间(秒) +} +``` + +### 系统配置 +```json +"system": { + "data_dir": "data", // 数据存储目录 + "cache_dir": "data/news_cache", // 新闻缓存目录 + "log_level": "INFO" // 日志级别 +} +``` + +### 爬虫配置 +```json +"crawler": { + "max_retries": 3, // 最大重试次数 + "retry_delay": 5, // 重试间隔(秒) + "cache_days": 7 // 缓存保留天数 +} +``` + +### 交易配置 +```json +"trading": { + "max_position_ratio": 0.2, // 单个资产最大仓位比例 + "risk_free_rate": 0.02 // 无风险利率 +} +``` + +## 使用不同的 LLM 提供商 + +### OpenAI +```json +"llm": { + "model_name": "gpt-4", + "api_base": "https://api.openai.com/v1", + "api_key": "sk-xxxxxxxxxxxx" +} +``` + +### 智谱 GLM +```json +"llm": { + "model_name": "glm-4", + "api_base": "https://open.bigmodel.cn/api/paas/v4", + "api_key": "xxxxxxxxxxxx.xxxxxxxxxxxx" +} +``` + +### Claude (Anthropic) +```json +"llm": { + "model_name": "claude-3-5-sonnet-20241022", + "api_base": "https://api.anthropic.com/v1", + "api_key": "sk-ant-xxxxxxxxxxxx" +} +``` + +### 本地部署(Ollama) +```json +"llm": { + "model_name": "qwen2.5:14b", + "api_base": "http://localhost:11434/v1", + "api_key": "ollama" +} +``` + +## 代码中使用配置 + +### 方式 1:直接访问配置属性 +```python +from core.config import config + +# 获取模型名称 +model = config.model_name + +# 获取 API Key +api_key = config.api_key + +# 获取完整 LLM 配置 +llm_config = config.llm_config +``` + +### 方式 2:使用统一的 LLM 调用接口 +```python +from core.config import llm_call + +# 直接调用 LLM +response = llm_call( + messages=[ + {"role": "system", "content": "你是 A 股分析师"}, + {"role": "user", "content": "分析这条新闻..."} + ], + temperature=0.5 # 可以覆盖配置文件中的默认值 +) +``` + +### 方式 3:获取配置好的客户端 +```python +from core.config import get_llm_client + +client = get_llm_client() +if client: + response = client.chat.completions.create(...) +``` + +## 环境变量支持(TODO) + +未来版本将支持环境变量覆盖: +```bash +export MND_IA_API_KEY="sk-xxxxxxxxxxxx" +export MND_IA_MODEL_NAME="gpt-4" +``` + +## 安全提示 + +⚠️ **重要**: +1. 不要将包含真实 API Key 的 `config.json` 提交到 Git +2. 建议将 `config.json` 添加到 `.gitignore` +3. 团队协作时,提供 `config.example.json` 模板 +4. 生产环境使用环境变量或密钥管理服务 diff --git a/QUICKSTART.md b/QUICKSTART.md new file mode 100644 index 0000000..20935b5 --- /dev/null +++ b/QUICKSTART.md @@ -0,0 +1,149 @@ +# 🚀 MND-IA 快速开始指南 + +## 安装依赖 + +```bash +pip install -r requirements.txt +``` + +**核心依赖说明**: +- `akshare`: A股行情数据获取 +- `cloudscraper`: 绕过 Cloudflare 防护的爬虫工具 +- `beautifulsoup4`: HTML 解析 +- `pandas`, `numpy`: 数据处理 + +## 运行测试 + +### 1. 测试核心模块(World Book) + +```bash +python core/world_book.py +``` + +### 2. 测试 Agent Skills + +```bash +# 测试量化模块 +python skills/quant.py + +# 测试情报分析 +python skills/analyst.py + +# 测试策略师 +python skills/strategist.py + +# 测试基金经理 +python skills/pm.py + +# 测试审计员 +python skills/auditor.py + +# 测试新闻联播爬虫 +python skills/crawler.py +``` + +### 3. 运行完整系统 + +```bash +# 测试模式(使用模拟数据) +python main.py --mode test --no-crawler + +# 测试模式(使用爬虫抓取新闻联播) +python main.py --mode test + +# 每日批处理模式(自动爬取新闻) +python main.py --mode daily --capital 1000000 + +# 盘后审计模式 +python main.py --mode audit +``` + +## 系统架构 + +``` +MND-IA/ +├── core/ # 共享内核 +│ ├── world_book.py # WorldBook 核心类 +│ ├── asset_map.json # 资产映射表 +│ └── macro_matrix.json # 宏观因子矩阵 +├── skills/ # Agent 技能群 +│ ├── analyst.py # Skill A: 情报分析师 +│ ├── quant.py # Skill B: 量化研究员 +│ ├── strategist.py # Skill C: 宏观策略师 +│ ├── pm.py # Skill D: 基金经理 +│ ├── auditor.py # Skill E: 复盘审计员 +│ └── crawler.py # Skill F: 新闻联播爬虫 +├── data/ # 数据存储 +├── logs/ # 日志文件 +└── main.py # 主调度程序 +``` + +## 数据流示意 + +``` +[新闻联播] ──→ Crawler (Skill F) ──→ 新闻文本 + ↓ + Analyst ──→ Narrative_JSON + ↓ +[行情] ──→ Quant ──→ Market_Data ──→ Strategist ──→ WorldBook + ↓ + PM ──→ Trust Index ──→ Orders + ↑ + Auditor (复盘) +``` + +## 核心概念 + +### Trust Index 公式 + +``` +TrustIndex = (Score_narrative × 0.6 + Score_flow × 0.4) - Penalty + +一票否决: +if Narrative > 80 and Flow < 30: + Penalty = 100 +``` + +### 叙事生命周期 + +1. **incubation (潜伏期)**: 新闻刚出现,市场未反应 +2. **fermentation (发酵期)**: 政策出台,市场开始关注 +3. **realization (兑现期)**: Level A 重大利好,资金涌入 +4. **decay (衰退期)**: 热度下降,逐步退出 + +## 下一步 + +1. ~~配置新闻数据源(可接入财联社、东方财富等 API)~~ ✅ 已实现新闻联播爬虫 +2. 配置 LLM(如 GLM-4 或 Claude-3.5)用于 Analyst 增强 +3. 实现实盘交易接口(如东方财富、同花顺) +4. 部署定时任务(每日 08:30 自动执行) +5. 添加更多新闻源(如财联社快讯、部委官网等) + +## 常见问题 + +**Q: akshare 获取数据失败?** +A: 确保网络连接正常,某些 API 有频率限制。 + +**Q: 爬虫无法获取新闻联播?** +A: +1. 检查网站是否可访问(可能被墙或维护) +2. 查看 `data/news_cache/` 目录下的缓存文件 +3. 手动测试:`python skills/crawler.py` +4. 新闻联播通常 20:00 后更新,提前爬取会返回空 + +**Q: cloudscraper 安装失败?** +A: +```bash +pip install --upgrade pip +pip install cloudscraper requests[security] pyparsing +``` + +**Q: 如何添加新的 ETF?** +A: 编辑 `core/asset_map.json`,在对应板块添加 ETF 代码。 + +**Q: 如何调整 Trust Index 公式权重?** +A: 修改 `skills/pm.py` 中的 `calculate_trust_index()` 方法。 + +## 许可证 + +MIT License diff --git a/core/__init__.py b/core/__init__.py new file mode 100644 index 0000000..5b4f151 --- /dev/null +++ b/core/__init__.py @@ -0,0 +1,16 @@ +""" +MND-IA Core Module +================== +共享内核 - 系统数据中心和逻辑基石 +""" + +from .world_book import WorldBook, Narrative, MacroCycle, create_narrative_id + +__all__ = [ + 'WorldBook', + 'Narrative', + 'MacroCycle', + 'create_narrative_id' +] + +__version__ = '1.0.0' diff --git a/core/asset_map.json b/core/asset_map.json new file mode 100644 index 0000000..c8c8eab --- /dev/null +++ b/core/asset_map.json @@ -0,0 +1,155 @@ +{ + "_meta": { + "version": "4.0.0", + "description": "Agent专用资产映射表。包含资产代码、关键词触发器及宏观因子敏感度矩阵。", + "instruction": "当计算Trust Index时,使用sensitivity中的数值作为基础权重(x),结合Narrative强度(y)进行最终打分。" + }, + + "assets": { + "core_A500": { + "name": "中证A500", + "etfs": ["512050", "512020", "563360"], + "keywords": ["核心资产", "大盘", "指数增强", "国家队", "长线资金"], + "sensitivity": { + "liquidity_easing": 0.6, + "policy_support": 0.8, + "cpi_rebound": 0.5, + "risk_on": 0.9 + } + }, + "tech_hardware": { + "name": "半导体与硬科技", + "etfs": ["512480", "588000"], + "keywords": ["芯片", "光刻机", "集成电路", "卡脖子", "科创板"], + "sensitivity": { + "interest_rate_down": 0.9, + "policy_tech_self_reliance": 1.0, + "geopolitics_tension": 0.7, + "risk_on": 1.0 + } + }, + "tech_software": { + "name": "软件与数字经济", + "etfs": ["515230", "159869"], + "keywords": ["信创", "国产软件", "操作系统", "数据要素", "AI应用", "网络安全"], + "sensitivity": { + "policy_digital_economy": 1.0, + "govt_spending": 0.8, + "risk_on": 0.9, + "interest_rate_down": 0.7 + } + }, + "tech_infra": { + "name": "5G与新基建", + "etfs": ["515050", "515880"], + "keywords": ["5G", "6G", "光模块", "通信基站", "算力底座", "东数西算"], + "sensitivity": { + "policy_new_infra": 1.0, + "tech_cycle_up": 0.8, + "export_growth": 0.6 + } + }, + "finance_broker": { + "name": "券商/牛市旗手", + "etfs": ["512880", "512900"], + "keywords": ["成交量", "资本市场", "印花税", "T+0", "金融强国"], + "sensitivity": { + "market_volume_spike": 1.0, + "policy_capital_market": 1.0, + "liquidity_easing": 0.8 + } + }, + "finance_bank": { + "name": "银行/高股息", + "etfs": ["512800", "159887"], + "keywords": ["高股息", "中特估", "红利", "避险", "存款利率"], + "sensitivity": { + "interest_rate_down": -0.5, + "risk_off": 0.8, + "policy_soe_reform": 0.7 + } + }, + "new_energy_vehicle": { + "name": "新能源车与电池", + "etfs": ["515030", "159840"], + "keywords": ["电动车", "锂电池", "宁德时代", "汽车下乡", "碳中和"], + "sensitivity": { + "consumption_stimulus": 0.8, + "export_growth": 0.9, + "oil_price_up": 0.5 + } + }, + "consumption_staples": { + "name": "主要消费(白酒)", + "etfs": ["512690"], + "keywords": ["白酒", "内需", "CPI", "春节", "高端消费"], + "sensitivity": { + "cpi_rebound": 0.9, + "liquidity_easing": 0.6, + "foreign_inflow": 0.8 + } + }, + "healthcare": { + "name": "医药医疗", + "etfs": ["512010", "159898"], + "keywords": ["创新药", "医疗器械", "老龄化", "医保", "CXO"], + "sensitivity": { + "interest_rate_down": 0.8, + "policy_public_health": 0.7, + "demographic_trend": 0.5 + } + }, + "cross_border_tech": { + "name": "跨境科技(T+0)", + "etfs": ["513330", "513300", "513500"], + "keywords": ["港股", "美股", "纳斯达克", "中概股", "平台经济"], + "sensitivity": { + "fed_rate_cut": 0.9, + "currency_rmb_depreciation": 0.8, + "policy_platform_economy": 0.7 + } + }, + "agriculture": { + "name": "农业与粮食安全", + "etfs": ["159825"], + "keywords": ["粮食安全", "一号文件", "猪周期", "种业", "食品通胀"], + "sensitivity": { + "cpi_rebound": 0.8, + "policy_food_security": 1.0, + "geopolitics_tension": 0.6 + } + }, + "resources_gold": { + "name": "资源与黄金", + "etfs": ["518880", "512400"], + "keywords": ["黄金", "有色", "通胀", "避险", "美元下跌"], + "sensitivity": { + "dollar_index_down": 0.9, + "geopolitics_tension": 1.0, + "inflation_expectations": 0.8 + } + }, + "defense_low_altitude": { + "name": "军工与低空", + "etfs": ["512660"], + "keywords": ["低空经济", "国防", "军费", "无人机", "地缘"], + "sensitivity": { + "policy_low_altitude": 1.0, + "geopolitics_tension": 0.9, + "govt_spending": 0.7 + } + } + }, + + "macro_factors_definition": { + "liquidity_easing": "央行降准降息,或市场资金利率下行", + "interest_rate_down": "无风险利率下行(利好成长股)", + "cpi_rebound": "通胀回升,消费复苏", + "risk_on": "市场情绪高涨,追逐高风险高收益", + "risk_off": "市场恐慌,资金流向避险资产", + "policy_tech_self_reliance": "政策强调自主可控、国产替代", + "geopolitics_tension": "外部冲突、制裁或地缘紧张", + "currency_rmb_depreciation": "人民币贬值(利好出口及跨境资产)", + "fed_rate_cut": "美联储降息(利好港股/美股/黄金)" + } +} \ No newline at end of file diff --git a/core/config.py b/core/config.py new file mode 100644 index 0000000..257218e --- /dev/null +++ b/core/config.py @@ -0,0 +1,219 @@ +""" +配置管理模块 +提供统一的配置加载和访问接口,支持大模型配置、系统配置等 +""" +import json +import os +from pathlib import Path +from typing import Dict, Any, Optional + + +class Config: + """配置管理类,单例模式""" + + _instance = None + _config_data: Dict[str, Any] = {} + + def __new__(cls): + if cls._instance is None: + cls._instance = super().__new__(cls) + return cls._instance + + def __init__(self): + if not self._config_data: + self.load_config() + + def load_config(self, config_path: Optional[str] = None): + """ + 加载配置文件 + + Args: + config_path: 配置文件路径,默认为项目根目录下的 config.json + """ + if config_path is None: + # 获取项目根目录(core 目录的上级目录) + project_root = Path(__file__).parent.parent + config_path = project_root / "config.json" + + if not os.path.exists(config_path): + raise FileNotFoundError(f"配置文件不存在: {config_path}") + + with open(config_path, 'r', encoding='utf-8') as f: + self._config_data = json.load(f) + + def get(self, key: str, default: Any = None) -> Any: + """ + 获取配置项(支持点号分隔的多级key) + + Args: + key: 配置项键,如 "llm.model_name" 或 "llm" + default: 默认值 + + Returns: + 配置值 + """ + keys = key.split('.') + value = self._config_data + + for k in keys: + if isinstance(value, dict): + value = value.get(k) + if value is None: + return default + else: + return default + + return value + + @property + def llm_config(self) -> Dict[str, Any]: + """获取大模型配置""" + return self.get('llm', {}) + + @property + def model_name(self) -> str: + """获取模型名称""" + return self.get('llm.model_name', 'gpt-4') + + @property + def api_base(self) -> str: + """获取API地址""" + return self.get('llm.api_base', 'https://api.openai.com/v1') + + @property + def api_key(self) -> str: + """获取API Key""" + return self.get('llm.api_key', '') + + @property + def temperature(self) -> float: + """获取温度参数""" + return self.get('llm.temperature', 0.7) + + @property + def max_tokens(self) -> int: + """获取最大token数""" + return self.get('llm.max_tokens', 2000) + + @property + def timeout(self) -> int: + """获取超时时间""" + return self.get('llm.timeout', 60) + + def update(self, key: str, value: Any): + """ + 更新配置项 + + Args: + key: 配置项键,支持点号分隔的多级key + value: 新值 + """ + keys = key.split('.') + config = self._config_data + + for k in keys[:-1]: + if k not in config: + config[k] = {} + config = config[k] + + config[keys[-1]] = value + + def save_config(self, config_path: Optional[str] = None): + """ + 保存配置到文件 + + Args: + config_path: 配置文件路径,默认为项目根目录下的 config.json + """ + if config_path is None: + project_root = Path(__file__).parent.parent + config_path = project_root / "config.json" + + with open(config_path, 'w', encoding='utf-8') as f: + json.dump(self._config_data, f, ensure_ascii=False, indent=2) + + +# 全局配置实例 +config = Config() + + +def get_llm_client(): + """ + 获取配置好的LLM客户端(OpenAI SDK) + + Returns: + OpenAI客户端实例 + """ + try: + from openai import OpenAI + + client = OpenAI( + api_key=config.api_key, + base_url=config.api_base, + timeout=config.timeout + ) + return client + except ImportError: + print("警告: openai 库未安装,无法创建LLM客户端") + return None + except Exception as e: + print(f"创建LLM客户端失败: {e}") + return None + + +def llm_call(messages: list, **kwargs) -> Optional[str]: + """ + 统一的LLM调用接口 + + Args: + messages: 消息列表,格式为 [{"role": "user", "content": "..."}] + **kwargs: 其他参数(会覆盖配置文件中的默认值) + + Returns: + LLM返回的文本内容,失败返回None + """ + client = get_llm_client() + if client is None: + return None + + try: + # 合并配置和传入的参数 + params = { + 'model': config.model_name, + 'temperature': config.temperature, + 'max_tokens': config.max_tokens, + } + params.update(kwargs) + + response = client.chat.completions.create( + messages=messages, + **params + ) + return response.choices[0].message.content + except Exception as e: + print(f"LLM调用失败: {e}") + return None + + +if __name__ == "__main__": + # 测试配置加载 + print("=== 配置管理模块测试 ===") + print(f"模型名称: {config.model_name}") + print(f"API地址: {config.api_base}") + print(f"API Key: {config.api_key[:10]}..." if config.api_key else "API Key: 未配置") + print(f"温度: {config.temperature}") + print(f"最大tokens: {config.max_tokens}") + + print("\n完整LLM配置:") + print(json.dumps(config.llm_config, indent=2, ensure_ascii=False)) + + # 测试配置更新 + print("\n测试配置更新...") + config.update('llm.temperature', 0.5) + print(f"更新后的温度: {config.temperature}") + print() + # 测试获取不存在的配置 + print(f"\n获取不存在的配置: {config.get('nonexistent.key', 'default_value')}") + # 测试调用 + respodse = llm_call([{"role": "user", "content": "你好,用100字介绍一下你自己。"}]) + print(f"\nLLM调用结果:\n{respodse}") diff --git a/core/macro_matrix.json b/core/macro_matrix.json new file mode 100644 index 0000000..5ae95c7 --- /dev/null +++ b/core/macro_matrix.json @@ -0,0 +1,243 @@ +{ + "_comment": "宏观逻辑矩阵 - 定义宏观因子对各 ETF 板块的基础理论影响值", + "_version": "1.0.0", + "_last_updated": "2025-12-30", + "_scoring_rules": { + "range": "影响值范围: -10 (极度负面) 到 +10 (极度正面)", + "neutral": "0 表示无明显影响", + "interpretation": "数值越大表示正面影响越强,数值越小表示负面影响越强" + }, + + "macro_factors": { + "rate_cut": { + "description": "降准降息 - 货币宽松政策", + "impact": { + "tech_etf": 8, + "ai_etf": 8, + "semiconductor_etf": 7, + "new_energy_vehicle_etf": 7, + "solar_etf": 6, + "pharma_etf": 5, + "consumer_etf": 7, + "bank_etf": -5, + "insurance_etf": -4, + "broker_etf": 6, + "realestate_etf": 9, + "military_etf": 4, + "metal_etf": 5, + "coal_etf": 2 + } + }, + "rate_hike": { + "description": "加息收紧 - 货币紧缩政策", + "impact": { + "tech_etf": -7, + "ai_etf": -7, + "semiconductor_etf": -6, + "new_energy_vehicle_etf": -6, + "solar_etf": -5, + "pharma_etf": -3, + "consumer_etf": -6, + "bank_etf": 5, + "insurance_etf": 4, + "broker_etf": -4, + "realestate_etf": -9, + "military_etf": -2, + "metal_etf": -4, + "coal_etf": -1 + } + }, + "currency_depreciation": { + "description": "人民币贬值 - 有利出口型企业", + "impact": { + "tech_etf": 3, + "ai_etf": 1, + "semiconductor_etf": 2, + "new_energy_vehicle_etf": 5, + "solar_etf": 6, + "pharma_etf": 2, + "consumer_etf": -3, + "bank_etf": -2, + "insurance_etf": -1, + "broker_etf": -1, + "realestate_etf": -4, + "military_etf": 3, + "metal_etf": 4, + "coal_etf": 1 + } + }, + "currency_appreciation": { + "description": "人民币升值 - 有利进口型企业", + "impact": { + "tech_etf": -2, + "ai_etf": -1, + "semiconductor_etf": -1, + "new_energy_vehicle_etf": -4, + "solar_etf": -5, + "pharma_etf": -1, + "consumer_etf": 4, + "bank_etf": 3, + "insurance_etf": 2, + "broker_etf": 2, + "realestate_etf": 5, + "military_etf": -2, + "metal_etf": -3, + "coal_etf": -1 + } + }, + "fiscal_stimulus": { + "description": "财政刺激政策 - 基建、消费刺激", + "impact": { + "tech_etf": 6, + "ai_etf": 7, + "semiconductor_etf": 6, + "new_energy_vehicle_etf": 8, + "solar_etf": 7, + "pharma_etf": 4, + "consumer_etf": 9, + "bank_etf": 5, + "insurance_etf": 4, + "broker_etf": 7, + "realestate_etf": 8, + "military_etf": 6, + "metal_etf": 8, + "coal_etf": 6 + } + }, + "regulation_tightening": { + "description": "监管收紧 - 行业整顿", + "impact": { + "tech_etf": -6, + "ai_etf": -4, + "semiconductor_etf": -3, + "new_energy_vehicle_etf": -4, + "solar_etf": -3, + "pharma_etf": -7, + "consumer_etf": -5, + "bank_etf": -2, + "insurance_etf": -2, + "broker_etf": -5, + "realestate_etf": -8, + "military_etf": -1, + "metal_etf": -2, + "coal_etf": -3 + } + }, + "export_strong": { + "description": "出口数据强劲", + "impact": { + "tech_etf": 5, + "ai_etf": 3, + "semiconductor_etf": 4, + "new_energy_vehicle_etf": 7, + "solar_etf": 8, + "pharma_etf": 3, + "consumer_etf": 2, + "bank_etf": 3, + "insurance_etf": 2, + "broker_etf": 4, + "realestate_etf": 1, + "military_etf": 4, + "metal_etf": 6, + "coal_etf": 3 + } + }, + "pmi_expansion": { + "description": "PMI 扩张 (>50) - 制造业景气", + "impact": { + "tech_etf": 7, + "ai_etf": 6, + "semiconductor_etf": 7, + "new_energy_vehicle_etf": 8, + "solar_etf": 7, + "pharma_etf": 4, + "consumer_etf": 6, + "bank_etf": 5, + "insurance_etf": 4, + "broker_etf": 6, + "realestate_etf": 4, + "military_etf": 6, + "metal_etf": 8, + "coal_etf": 7 + } + }, + "oil_price_surge": { + "description": "原油价格大涨", + "impact": { + "tech_etf": -3, + "ai_etf": -2, + "semiconductor_etf": -2, + "new_energy_vehicle_etf": 5, + "solar_etf": 6, + "pharma_etf": -1, + "consumer_etf": -4, + "bank_etf": -1, + "insurance_etf": -1, + "broker_etf": 0, + "realestate_etf": -3, + "military_etf": -1, + "metal_etf": 3, + "coal_etf": 8 + } + }, + "us_tech_weakness": { + "description": "美国科技股大跌", + "impact": { + "tech_etf": -6, + "ai_etf": -7, + "semiconductor_etf": -8, + "new_energy_vehicle_etf": -4, + "solar_etf": -3, + "pharma_etf": -2, + "consumer_etf": -3, + "bank_etf": 2, + "insurance_etf": 1, + "broker_etf": -2, + "realestate_etf": 1, + "military_etf": 0, + "metal_etf": 0, + "coal_etf": 1 + } + } + }, + + "etf_mapping": { + "tech_etf": ["515980", "159813"], + "ai_etf": ["515980", "159813"], + "semiconductor_etf": ["512480", "159995"], + "new_energy_vehicle_etf": ["515030", "159806"], + "solar_etf": ["515790", "159857"], + "pharma_etf": ["512010", "159938"], + "consumer_etf": ["159928", "510630"], + "bank_etf": ["510230"], + "insurance_etf": ["512880"], + "broker_etf": ["512000"], + "realestate_etf": ["512200", "159707"], + "military_etf": ["512660", "512810"], + "metal_etf": ["512400", "159881"], + "coal_etf": ["515220", "159678"] + }, + + "composite_scenarios": { + "bull_market_start": { + "description": "牛市启动期 - 货币宽松 + 政策利好", + "factors": ["rate_cut", "fiscal_stimulus"], + "multiplier": 1.2 + }, + "bear_market_bottom": { + "description": "熊市底部 - 政策转向前", + "factors": ["rate_hike", "regulation_tightening"], + "multiplier": 0.8 + }, + "economic_recovery": { + "description": "经济复苏期", + "factors": ["pmi_expansion", "export_strong", "fiscal_stimulus"], + "multiplier": 1.15 + }, + "risk_off": { + "description": "风险规避模式", + "factors": ["us_tech_weakness", "currency_depreciation"], + "multiplier": 0.9 + } + } +} diff --git a/core/world_book.py b/core/world_book.py new file mode 100644 index 0000000..532155c --- /dev/null +++ b/core/world_book.py @@ -0,0 +1,400 @@ +""" +MND-IA Core: World Book (世界书) +==================================== +这是系统的数据中心和逻辑基石,存储宏观状态、活跃叙事对象和历史经验。 +所有 Agent 必须通过此模块访问和更新系统状态。 +""" + +import json +import os +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Any +from pathlib import Path +from copy import deepcopy + + +class Narrative: + """单个叙事对象 (Narrative Object)""" + + def __init__( + self, + id: str, + topic: str, + related_etfs: List[str], + lifecycle_stage: str = "incubation", + base_score: float = 50.0, + decay_factor: float = 0.95, + current_weight: float = None + ): + self.id = id + self.topic = topic + self.related_etfs = related_etfs + self.lifecycle_stage = lifecycle_stage # incubation | fermentation | realization | decay + self.base_score = base_score + self.decay_factor = decay_factor + self.current_weight = current_weight if current_weight is not None else base_score + self.last_updated = datetime.now().strftime("%Y-%m-%d") + + def decay(self) -> None: + """应用时间衰减""" + self.current_weight *= self.decay_factor + self.last_updated = datetime.now().strftime("%Y-%m-%d") + + def boost(self, new_score: float) -> None: + """新闻强化叙事权重""" + # 使用加权平均,新信息占40%,历史占60% + self.current_weight = self.current_weight * 0.6 + new_score * 0.4 + self.last_updated = datetime.now().strftime("%Y-%m-%d") + + def update_stage(self, new_stage: str) -> None: + """更新生命周期阶段""" + valid_stages = ["incubation", "fermentation", "realization", "decay"] + if new_stage in valid_stages: + self.lifecycle_stage = new_stage + self.last_updated = datetime.now().strftime("%Y-%m-%d") + + def to_dict(self) -> Dict[str, Any]: + """转换为字典格式""" + return { + "id": self.id, + "topic": self.topic, + "related_etfs": self.related_etfs, + "lifecycle_stage": self.lifecycle_stage, + "base_score": self.base_score, + "decay_factor": self.decay_factor, + "current_weight": round(self.current_weight, 2), + "last_updated": self.last_updated + } + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> 'Narrative': + """从字典创建对象""" + return cls( + id=data["id"], + topic=data["topic"], + related_etfs=data["related_etfs"], + lifecycle_stage=data.get("lifecycle_stage", "incubation"), + base_score=data.get("base_score", 50.0), + decay_factor=data.get("decay_factor", 0.95), + current_weight=data.get("current_weight") + ) + + +class MacroCycle: + """宏观周期状态""" + + def __init__( + self, + status: str = "neutral", # upward | downward | neutral + liquidity: str = "neutral", # loose | neutral | tight + policy_wind: str = "wait_and_see" # stimulus | regulation | wait_and_see + ): + self.status = status + self.liquidity = liquidity + self.policy_wind = policy_wind + + def to_dict(self) -> Dict[str, str]: + return { + "status": self.status, + "liquidity": self.liquidity, + "policy_wind": self.policy_wind + } + + @classmethod + def from_dict(cls, data: Dict[str, str]) -> 'MacroCycle': + return cls( + status=data.get("status", "neutral"), + liquidity=data.get("liquidity", "neutral"), + policy_wind=data.get("policy_wind", "wait_and_see") + ) + + +class WorldBook: + """ + MCP World Book - 系统核心数据中心 + ====================================== + 动态更新的状态管理器,存储: + 1. 宏观周期状态 + 2. 活跃叙事对象列表 + 3. 宏观因子影响矩阵快照 + """ + + def __init__(self, data_dir: str = "data"): + self.data_dir = Path(data_dir) + self.data_dir.mkdir(exist_ok=True) + + self.world_book_path = self.data_dir / "world_book.json" + + # 初始化数据结构 + self.timestamp = datetime.now().isoformat() + self.macro_cycle = MacroCycle() + self.active_narratives: Dict[str, Narrative] = {} + self.static_matrix_snapshot: Dict[str, Dict[str, float]] = {} + + # 新增: 宏观因子向量 (用于向量点积计算) + self.macro_factor_vector: Dict[str, float] = {} + + # 尝试加载已有数据 + self.load() + + def load(self) -> bool: + """从磁盘加载 World Book""" + if not self.world_book_path.exists(): + print(f"[WorldBook] 未找到已有数据,初始化新的 World Book") + self._load_static_matrix() + return False + + try: + with open(self.world_book_path, 'r', encoding='utf-8') as f: + data = json.load(f) + + self.timestamp = data.get("timestamp", datetime.now().isoformat()) + self.macro_cycle = MacroCycle.from_dict(data.get("macro_cycle", {})) + + # 加载叙事对象 + self.active_narratives = {} + for narrative_data in data.get("active_narratives", []): + narrative = Narrative.from_dict(narrative_data) + self.active_narratives[narrative.id] = narrative + + # 加载矩阵快照 + self.static_matrix_snapshot = data.get("static_matrix_snapshot", {}) + + # 加载宏观因子向量 + self.macro_factor_vector = data.get("macro_factor_vector", {}) + + print(f"[WorldBook] 成功加载数据,包含 {len(self.active_narratives)} 个活跃叙事") + return True + + except Exception as e: + print(f"[WorldBook] 加载数据失败: {e}") + self._load_static_matrix() + return False + + def save(self) -> bool: + """保存 World Book 到磁盘""" + try: + self.timestamp = datetime.now().isoformat() + + data = { + "timestamp": self.timestamp, + "macro_cycle": self.macro_cycle.to_dict(), + "active_narratives": [ + narrative.to_dict() + for narrative in self.active_narratives.values() + ], + "static_matrix_snapshot": self.static_matrix_snapshot, + "macro_factor_vector": self.macro_factor_vector + } + + with open(self.world_book_path, 'w', encoding='utf-8') as f: + json.dump(data, f, ensure_ascii=False, indent=2) + + print(f"[WorldBook] 成功保存数据到 {self.world_book_path}") + return True + + except Exception as e: + print(f"[WorldBook] 保存数据失败: {e}") + return False + + def _load_static_matrix(self) -> None: + """加载静态宏观矩阵""" + matrix_path = Path("core") / "macro_matrix.json" + if matrix_path.exists(): + try: + with open(matrix_path, 'r', encoding='utf-8') as f: + self.static_matrix_snapshot = json.load(f) + print(f"[WorldBook] 加载宏观矩阵成功") + except Exception as e: + print(f"[WorldBook] 加载宏观矩阵失败: {e}") + self.static_matrix_snapshot = {} + + def add_narrative(self, narrative: Narrative) -> None: + """添加新叙事""" + self.active_narratives[narrative.id] = narrative + print(f"[WorldBook] 添加新叙事: {narrative.topic} (ID: {narrative.id})") + + def update_narrative(self, narrative_id: str, **kwargs) -> bool: + """更新叙事属性""" + if narrative_id not in self.active_narratives: + print(f"[WorldBook] 叙事不存在: {narrative_id}") + return False + + narrative = self.active_narratives[narrative_id] + + if 'new_score' in kwargs: + narrative.boost(kwargs['new_score']) + if 'stage' in kwargs: + narrative.update_stage(kwargs['stage']) + + return True + + def decay_all_narratives(self) -> None: + """对所有叙事应用时间衰减""" + for narrative in self.active_narratives.values(): + narrative.decay() + print(f"[WorldBook] 对 {len(self.active_narratives)} 个叙事应用了时间衰减") + + def remove_weak_narratives(self, threshold: float = 10.0) -> List[str]: + """移除权重过低的叙事""" + to_remove = [ + nid for nid, narrative in self.active_narratives.items() + if narrative.current_weight < threshold + ] + + for nid in to_remove: + topic = self.active_narratives[nid].topic + del self.active_narratives[nid] + print(f"[WorldBook] 移除衰退叙事: {topic} (ID: {nid})") + + return to_remove + + def get_narrative_by_topic(self, topic: str) -> Optional[Narrative]: + """根据主题获取叙事""" + for narrative in self.active_narratives.values(): + if narrative.topic == topic: + return narrative + return None + + def get_narratives_by_etf(self, etf_code: str) -> List[Narrative]: + """获取与特定 ETF 相关的所有叙事""" + return [ + narrative for narrative in self.active_narratives.values() + if etf_code in narrative.related_etfs + ] + + def update_macro_cycle(self, **kwargs) -> None: + """更新宏观周期状态""" + if 'status' in kwargs: + self.macro_cycle.status = kwargs['status'] + if 'liquidity' in kwargs: + self.macro_cycle.liquidity = kwargs['liquidity'] + if 'policy_wind' in kwargs: + self.macro_cycle.policy_wind = kwargs['policy_wind'] + + print(f"[WorldBook] 宏观周期更新: {self.macro_cycle.to_dict()}") + + def update_macro_factor_vector(self, factor_updates: Dict[str, float]) -> None: + """ + 更新宏观因子向量 + + Args: + factor_updates: 宏观因子及其强度,例如: + { + "interest_rate_down": 1.0, # 强烈降息预期 + "geopolitics_tension": 0.5, # 中等地缘紧张 + "policy_digital_economy": 1.0 # 数字经济政策头条 + } + """ + self.macro_factor_vector.update(factor_updates) + print(f"[WorldBook] 宏观因子向量更新: {self.macro_factor_vector}") + + def get_macro_factor_value(self, factor_name: str) -> float: + """获取特定宏观因子的当前值""" + return self.macro_factor_vector.get(factor_name, 0.0) + + def clear_macro_factor_vector(self) -> None: + """清空宏观因子向量(通常在每日开盘前重置)""" + self.macro_factor_vector = {} + print("[WorldBook] 宏观因子向量已清空") + + def get_top_narratives(self, top_n: int = 5) -> List[Narrative]: + """获取权重最高的 N 个叙事""" + sorted_narratives = sorted( + self.active_narratives.values(), + key=lambda n: n.current_weight, + reverse=True + ) + return sorted_narratives[:top_n] + + def export_snapshot(self) -> Dict[str, Any]: + """导出完整快照(供其他 Agent 读取)""" + return { + "timestamp": self.timestamp, + "macro_cycle": self.macro_cycle.to_dict(), + "macro_factor_vector": self.macro_factor_vector, + "active_narratives": [n.to_dict() for n in self.active_narratives.values()], + "static_matrix_snapshot": self.static_matrix_snapshot, + "summary": { + "total_narratives": len(self.active_narratives), + "top_3_topics": [n.topic for n in self.get_top_narratives(3)] + } + } + + def __repr__(self) -> str: + return ( + f"" + ) + + +# ==================== 工具函数 ==================== + +def create_narrative_id(topic: str, date: Optional[str] = None) -> str: + """生成叙事 ID""" + if date is None: + date = datetime.now().strftime("%Y%m") + + # 简化主题名称作为 ID 的一部分 + topic_slug = topic.replace(" ", "_").replace(",", "_").replace("、", "_") + return f"narrative_{topic_slug}_{date}" + + +if __name__ == "__main__": + # 测试代码 + print("=" * 50) + print("World Book 核心模块测试") + print("=" * 50) + + # 创建 World Book 实例 + wb = WorldBook(data_dir="data") + + # 添加测试叙事 + narrative1 = Narrative( + id=create_narrative_id("低空经济"), + topic="低空经济", + related_etfs=["512980", "159969"], + lifecycle_stage="fermentation", + base_score=85.0, + decay_factor=0.95 + ) + + narrative2 = Narrative( + id=create_narrative_id("AI算力"), + topic="AI算力", + related_etfs=["515980", "159813"], + lifecycle_stage="realization", + base_score=92.0, + decay_factor=0.93 + ) + + wb.add_narrative(narrative1) + wb.add_narrative(narrative2) + + # 更新宏观周期 + wb.update_macro_cycle( + status="upward", + liquidity="loose", + policy_wind="stimulus" + ) + + # 保存数据 + wb.save() + + # 测试导出快照 + snapshot = wb.export_snapshot() + print("\n当前状态快照:") + print(json.dumps(snapshot, ensure_ascii=False, indent=2)) + + # 测试衰减 + print("\n应用时间衰减...") + wb.decay_all_narratives() + + # 测试获取 Top 叙事 + top = wb.get_top_narratives(2) + print(f"\nTop 2 叙事:") + for n in top: + print(f" - {n.topic}: {n.current_weight:.2f}") + + print("\n✅ World Book 核心模块测试完成") diff --git a/docs/crawler_guide.md b/docs/crawler_guide.md new file mode 100644 index 0000000..21a4ade --- /dev/null +++ b/docs/crawler_guide.md @@ -0,0 +1,303 @@ +# Skill F: 新闻联播爬虫使用指南 + +## 功能说明 + +Skill F (Crawler) 使用 **cloudscraper** + **BeautifulSoup** 自动抓取新闻联播官方网站的文字稿,并转换为符合 Skill A (Analyst) 要求的格式。 + +## 技术特点 + +- **绕过防护**: 使用 cloudscraper 自动处理 Cloudflare 等反爬机制 +- **稳定可靠**: 同步爬取,无需处理异步复杂性 +- **精准解析**: 基于实际网站 DOM 结构(.content-section) + +## 数据源 + +- **网站**: https://cn.govopendata.com/xinwenlianbo/ +- **更新频率**: 每日 19:00 后更新 +- **数据格式**: 新闻标题 + 正文内容 + +## 安装依赖 + +```bash +pip install cloudscraper beautifulsoup4 +``` + +## 基础使用 + +### 1. 命令行测试 + +```bash +# 测试爬虫功能 +python skills/crawler.py +``` + +### 2. Python 代码调用 + +```python +from skills.crawler import XinwenLianboCrawler + +# 创建爬虫实例 +crawler = XinwenLianboCrawler() + +# 爬取今日新闻 +news_list = crawler.crawl_sync() + +# 爬取指定日期 +news_list = crawler.crawl_sync(date="20251229") + +# 输出格式: +# [ +# {"title": "新闻标题1", "text": "新闻正文1..."}, +# {"title": "新闻标题2", "text": "新闻正文2..."}, +# ... +# ] +``` + +### 3. 批量爬取 + +```python +from skills.crawler import XinwenLianboCrawler + +crawler = XinwenLianboCrawler() + +# 爬取一段时间范围的新闻 +results = crawler.crawl_date_range( + start_date="20251220", + end_date="20251229" +) + +# results 是一个字典: {date: news_list} +for date, news_list in results.items(): + print(f"{date}: {len(news_list)} 条新闻") +``` + +## 与 Skill A 集成 + +```python +from skills.crawler import XinwenLianboCrawler +from skills.analyst import NewsAnalyzer + +# 1. 爬取新闻 +crawler = XinwenLianboCrawler() +news_list = crawler.crawl_sync() + +# 2. 分析新闻 +analyzer = NewsAnalyzer() +analysis_results = analyzer.batch_analyze(news_list) + +# 3. 生成叙事 JSON +narrative_json = analyzer.generate_narrative_json(analysis_results) + +print(f"识别到 {len(narrative_json['narratives'])} 个投资叙事") +``` + +## 在主流程中使用 + +```bash +# 方式1: 使用爬虫(默认) +python main.py --mode daily + +# 方式2: 不使用爬虫(使用测试数据) +python main.py --mode daily --no-crawler +``` + +## 数据缓存 + +爬虫会自动缓存抓取的新闻到 `data/news_cache/` 目录: + +``` +data/news_cache/ +├── xinwenlianbo_20251229.json +├── xinwenlianbo_20251230.json +└── ... +``` + +当网络失败时,系统会自动尝试从缓存读取。 + +## 批量爬取历史数据 + +```python +from skills.crawler import XinwenLianboCrawler + +crawler = XinwenLianboCrawler() + +# 爬取一段时间范围的新闻 +results = crawler.crawl_date_range( + start_date="20251220", + end_date="20251229" +) + +# results 是一个字典: {date: news_list} +for date, news_list in results.items(): + print(f"{date}: {len(news_list)} 条新闻") +``` + +## 自定义解析逻辑 + +如果网站结构发生变化,可以修改 `_parse_content()` 方法: + +```python +def _parse_content(self, markdown: str, html: str) -> List[Dict[str, str]]: + """ + 自定义解析逻辑 + + Args: + markdown: Crawl4AI 提取的 Markdown + html: 原始 HTML + + Returns: + [{"title": "", "text": ""}] + """ + # 在这里添加你的解析逻辑 + pass +``` + +## 故障排查 + +### 问题1: 爬虫返回空列表 + +**可能原因**: +- 网站结构变化 +- 当天新闻尚未更新(新闻联播通常 19:30 播出,网站 20:00 后更新) +- 网络连接问题 +- 被 Cloudflare 拦截(虽然使用了 cloudscraper,但仍可能发生) + +**解决方法**: +```bash +# 检查爬虫输出 +python skills/crawler.py + +# 查看缓存文件 +cat data/news_cache/xinwenlianbo_*.json + +# 检查网站是否可访问 +curl https://cn.govopendata.com/xinwenlianbo/20251229/ +``` + +### 问题2: cloudscraper 安装失败 + +**解决方法**: +```bash +# 升级 pip +pip install --upgrade pip + +# 单独安装 cloudscraper +pip install cloudscraper --upgrade + +# 如果仍然失败,安装依赖 +pip install requests[security] pyparsing +``` + +### 问题3: 解析结果不准确 + +**解决方法**: +编辑 `skills/crawler.py`,检查 `_parse_content()` 方法中的 CSS 选择器是否与网站实际结构匹配: + +```python +# 当前使用的选择器 +sections = soup.select('.content-section') +title_tag = section.select_one('.content-heading') +content_tag = section.select_one('.content-body') +``` + +可以手动访问网站,使用浏览器开发者工具(F12)查看实际的 DOM 结构。 + +## 性能优化 + +### 1. 使用缓存 + +爬虫会自动缓存结果,避免重复请求: + +```python +# 首次爬取(从网站) +news_list = crawler.crawl_sync(date="20251229") + +# 再次爬取(从缓存,如果网络失败) +news_list = crawler.crawl_sync(date="20251229") # 自动读取缓存 +``` + +### 2. 批量爬取时添加延迟 + +```python +crawler = XinwenLianboCrawler() + +# crawl_date_range 已内置 2 秒延迟 +results = crawler.crawl_date_range("20251220", "20251229") +``` + +## 定时任务配置 + +### Linux/Mac (crontab) + +```bash +# 编辑 crontab +crontab -e + +# 添加定时任务(每天 20:30 执行) +30 20 * * * cd /path/to/MND-IA && python skills/crawler.py >> logs/crawler.log 2>&1 +``` + +### Windows (任务计划程序) + +1. 打开"任务计划程序" +2. 创建基本任务 +3. 触发器:每天 20:30 +4. 操作:启动程序 `python.exe` +5. 参数:`E:\02projects\MND-IA\skills\crawler.py` + +## API 参考 + +### XinwenLianboCrawler + +#### `__init__(base_url)` +- 初始化爬虫 +- `base_url`: 新闻联播网站基础 URL +- 自动创建 cloudscraper 会话 + +#### `crawl_sync(date=None)` +- 同步爬取接口(推荐使用) +- `date`: 日期字符串 (YYYYMMDD),默认今天 +- 返回: `List[Dict[str, str]]` + +#### `crawl_xinwenlianbo(date=None)` +- 爬取核心方法 +- `date`: 日期字符串 +- 返回: `List[Dict[str, str]]` + +#### `crawl_date_range(start_date, end_date)` +- 批量爬取日期范围 +- `start_date`: 开始日期 (YYYYMMDD) +- `end_date`: 结束日期 (YYYYMMDD) +- 返回: `Dict[str, List[Dict]]` +- 自动添加 2 秒延迟避免请求过快 + +## 扩展建议 + +### 1. 添加更多新闻源 + +可以参考 Skill F 的实现,添加其他新闻源爬虫: + +- 财联社快讯 +- 人民日报 +- 发改委官网 +- 各部委新闻发布 + +### 2. 使用 LLM 增强提取 + +```python +# 配置 LLM 客户端 +from openai import OpenAI + +client = OpenAI(api_key="your-api-key") + +# 使用 LLM 辅助提取(需要实现相关代码) +news_list = await crawler.crawl_xinwenlianbo(use_llm=True) +``` + +### 3. 集成到实时监控 + +监听新闻联播更新,实时触发分析流程。 + +## 许可证 + +MIT License diff --git a/main.py b/main.py new file mode 100644 index 0000000..49ff3fc --- /dev/null +++ b/main.py @@ -0,0 +1,417 @@ +""" +MND-IA Main Orchestrator +========================= +主调度程序 - 协调所有 Agent Skills 完成每日投资决策流程 + +执行流程: +1. Ingest (数据摄取): Analyst 读取新闻, Quant 读取行情 +2. Cognition (认知更新): Strategist 更新 WorldBook +3. Decision (决策生成): PM 计算 Trust Index 并生成订单 +4. Execution (执行输出): 输出交易指令 +5. Audit (复盘审计): Auditor 评估昨日预测准确性 +""" + +import sys +from pathlib import Path +from datetime import datetime +import json +import argparse + +# 导入核心模块 +from core.world_book import WorldBook + +# 导入 Agent Skills +from skills.analyst import NewsAnalyzer +from skills.quant import QuantAnalyzer, get_all_etf_codes_from_asset_map +from skills.strategist import MacroStrategist +from skills.pm import PortfolioManager, save_orders_to_file +from skills.auditor import PerformanceAuditor +from skills.crawler import XinwenLianboCrawler + + +class MNDAgent: + """MND-IA 主智能体""" + + def __init__( + self, + data_dir: str = "data", + total_capital: float = 1000000.0, + llm_client=None, + use_llm: bool = True + ): + """ + Args: + data_dir: 数据目录 + total_capital: 总资金(元) + llm_client: LLM 客户端(可选,用于 Analyst 增强) + use_llm: 是否使用 LLM 进行智能分析(默认 True) + """ + self.data_dir = Path(data_dir) + self.data_dir.mkdir(exist_ok=True) + self.use_llm = use_llm + + # 初始化核心 + print("=" * 60) + print("🚀 MND-IA 系统启动") + print(f" 模式: {'LLM 智能分析' if use_llm else '规则引擎(降级模式)'}") + print("=" * 60) + + self.world_book = WorldBook(data_dir=data_dir) + + # 初始化 Agent Skills(传递 use_llm 参数) + self.analyst = NewsAnalyzer(llm_client=llm_client, use_llm=use_llm) + self.quant = QuantAnalyzer() + self.strategist = MacroStrategist(self.world_book, use_llm=use_llm) + self.pm = PortfolioManager(self.world_book, total_capital=total_capital, use_llm=use_llm) + self.auditor = PerformanceAuditor(data_dir=data_dir) + self.crawler = XinwenLianboCrawler() + + print("✅ 所有 Agent Skills 初始化完成\n") + + def daily_batch_workflow( + self, + news_list: list = None, + etf_codes: list = None, + use_crawler: bool = True + ) -> dict: + """ + 每日批处理流程(08:30 AM 执行) + + Args: + news_list: 新闻列表(如为 None 则使用爬虫或测试数据) + etf_codes: ETF 代码列表(如为 None 则使用全部) + use_crawler: 是否使用爬虫抓取新闻联播(默认 True) + + Returns: + 执行结果汇总 + """ + print(f"📅 开始执行每日批处理流程") + print(f"⏰ 执行时间: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}") + print("=" * 60) + + # ========== Phase 1: 数据摄取 (Ingest) ========== + print("\n【Phase 1: 数据摄取】") + print("-" * 60) + + # 1.1 获取新闻数据 + if news_list is None: + if use_crawler: + print("🕷️ 使用爬虫抓取新闻联播...") + news_list = self.crawler.crawl_sync() + + if not news_list: + print("⚠️ 爬虫未获取到数据,使用测试数据") + news_list = self._get_test_news() + else: + print("⚠️ 未提供新闻数据,使用测试数据") + news_list = self._get_test_news() + + print(f"📰 新闻分析: {len(news_list)} 条") + news_analysis = self.analyst.batch_analyze(news_list) + narrative_json = self.analyst.generate_narrative_json(news_analysis) + + # 1.2 获取行情数据 + if etf_codes is None: + etf_codes = get_all_etf_codes_from_asset_map() + print(f"📊 行情获取: {len(etf_codes)} 个 ETF") + + market_data = self.quant.batch_analyze(etf_codes) + market_report = self.quant.generate_market_report(market_data) + + # ========== Phase 2: 认知更新 (Cognition) ========== + print("\n【Phase 2: 认知更新】") + print("-" * 60) + + # 2.1 处理叙事 JSON + self.strategist.process_narrative_json(narrative_json) + + # 2.2 检测宏观周期变化 + cycle_change = self.strategist.detect_macro_cycle_change( + narrative_json, + market_report + ) + + if cycle_change: + self.strategist.apply_macro_cycle_change(cycle_change) + + # 2.3 每日维护 + maintenance_report = self.strategist.daily_maintenance() + + # 2.4 生成 WorldBook 快照 + wb_snapshot = self.strategist.generate_world_book_snapshot() + + # ========== Phase 3: 决策生成 (Decision) ========== + print("\n【Phase 3: 决策生成】") + print("-" * 60) + + # 3.1 计算 Trust Index + trust_results = self.pm.batch_calculate_trust_index(market_data) + + # 3.2 生成交易订单 + raw_orders = self.pm.generate_trade_orders(trust_results, market_data) + + # 3.3 风控检查 + final_orders = self.pm.apply_risk_control(raw_orders) + + # 3.4 生成投资组合报告 + portfolio_report = self.pm.generate_portfolio_report( + trust_results, + final_orders + ) + + # ========== Phase 4: 执行输出 (Execution) ========== + print("\n【Phase 4: 执行输出】") + print("-" * 60) + + if final_orders: + # 保存订单到文件 + timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") + save_orders_to_file(final_orders, f"orders_{timestamp}.json") + + print(f"📋 生成 {len(final_orders)} 条交易指令:") + for order in final_orders[:5]: # 显示前5条 + print(f" {order['action'].upper()} {order['code']}: " + f"{order['shares']}股 @ ¥{order['price']}") + else: + print("⚠️ 无符合条件的交易机会") + + # 保存 WorldBook + self.world_book.save() + + # ========== Phase 5: 结果汇总 ========== + execution_result = { + "timestamp": datetime.now().isoformat(), + "workflow": "daily_batch", + "phases": { + "ingest": { + "news_count": len(news_list), + "etf_count": len(etf_codes), + "narratives_identified": len(narrative_json['narratives']) + }, + "cognition": { + "active_narratives": len(self.world_book.active_narratives), + "macro_cycle": self.world_book.macro_cycle.to_dict(), + "cycle_changed": cycle_change is not None + }, + "decision": { + "trust_results": len(trust_results), + "orders_generated": len(final_orders), + "capital_utilization": portfolio_report['risk_control']['capital_utilization'] + } + }, + "top_opportunities": portfolio_report['top_opportunities'][:3], + "orders": final_orders + } + + # 保存执行记录 + self._save_execution_log(execution_result) + + print("\n" + "=" * 60) + print("✅ 每日批处理流程完成") + print("=" * 60) + + return execution_result + + def post_market_audit( + self, + yesterday_predictions: list = None + ) -> dict: + """ + 盘后复盘流程(16:00 PM 执行) + + Args: + yesterday_predictions: 昨日的预测记录 + + Returns: + 审计报告 + """ + print("\n📊 开始盘后复盘审计") + print("=" * 60) + + if yesterday_predictions is None: + print("⚠️ 无昨日预测数据,跳过审计") + return {"status": "skipped", "reason": "no_predictions"} + + # 获取实际表现数据(这里需要从行情数据计算) + actual_performances = self._fetch_actual_performances(yesterday_predictions) + + # 执行审计 + audit_summary = self.auditor.batch_audit( + yesterday_predictions, + actual_performances + ) + + # 生成报告 + report = self.auditor.generate_correction_report(audit_summary) + + # 保存报告 + report_path = self.data_dir / f"audit_report_{datetime.now().strftime('%Y%m%d')}.md" + with open(report_path, 'w', encoding='utf-8') as f: + f.write(report) + + print(f"\n📄 审计报告已保存: {report_path}") + print(f"准确率: {audit_summary['performance']['accuracy']:.1f}%") + + return audit_summary + + def intraday_sentinel( + self, + breaking_news: dict + ) -> dict: + """ + 盘中哨兵流程(09:30 - 15:00) + + 当检测到 Level A 重大新闻时触发 + + Args: + breaking_news: 快讯内容 + + Returns: + 应急响应结果 + """ + print("\n🚨 盘中哨兵触发") + print("=" * 60) + + # 分析快讯 + analysis = self.analyst.analyze_news( + breaking_news.get('text', ''), + breaking_news.get('title', '') + ) + + # 如果不是 Level A,忽略 + if analysis['level'] != 'A': + print("⚠️ 非 A 级新闻,不触发应急响应") + return {"triggered": False, "reason": "not_level_a"} + + print(f"⚠️ 检测到 Level A 重大新闻: {analysis['title']}") + print(f" 涉及板块: {', '.join(analysis['sectors'])}") + + # 立即更新 WorldBook + narrative_json = self.analyst.generate_narrative_json([analysis]) + self.strategist.process_narrative_json(narrative_json) + + # 检查宏观周期变化 + cycle_change = self.strategist.detect_macro_cycle_change(narrative_json) + if cycle_change: + self.strategist.apply_macro_cycle_change(cycle_change) + + # 重新评估持仓(如果有) + # 这里简化处理,实际应该重新计算 Trust Index + + self.world_book.save() + + return { + "triggered": True, + "news_level": "A", + "sectors_affected": analysis['sectors'], + "macro_cycle_changed": cycle_change is not None, + "timestamp": datetime.now().isoformat() + } + + def _get_test_news(self) -> list: + """获取测试新闻数据""" + return [ + { + "title": "工信部发布低空经济指导意见 2027年规模达1万亿", + "text": "工业和信息化部发布《低空经济高质量发展指导意见》,推动 eVTOL 产业化。" + }, + { + "title": "AI算力需求持续旺盛 多家公司业绩预增50%", + "text": "智算中心建设加速,算力租赁业务快速增长。" + } + ] + + def _fetch_actual_performances( + self, + predictions: list + ) -> dict: + """获取实际表现数据(简化版)""" + # 实际应该从 Quant 模块获取真实数据 + # 这里返回模拟数据 + performances = {} + for pred in predictions: + code = pred['code'] + performances[code] = { + "code": code, + "price_change": 0, # 需要实际计算 + "volume_change": 1.0, + "date": datetime.now().strftime("%Y-%m-%d") + } + return performances + + def _save_execution_log(self, result: dict) -> None: + """保存执行日志""" + log_path = self.data_dir / "execution_logs.json" + + logs = [] + if log_path.exists(): + with open(log_path, 'r', encoding='utf-8') as f: + logs = json.load(f) + + logs.append(result) + + # 只保留最近30天 + logs = logs[-30:] + + with open(log_path, 'w', encoding='utf-8') as f: + json.dump(logs, f, ensure_ascii=False, indent=2) + + +def main(): + """命令行入口""" + parser = argparse.ArgumentParser(description="MND-IA 投资智能体") + parser.add_argument( + '--mode', + choices=['daily', 'audit', 'test'], + default='test', + help='运行模式: daily(每日批处理), audit(盘后审计), test(测试)' + ) + parser.add_argument( + '--capital', + type=float, + default=1000000.0, + help='总资金(元)' + ) + parser.add_argument( + '--no-crawler', + action='store_true', + help='不使用爬虫,使用测试数据' + ) + parser.add_argument( + '--no-llm', + action='store_true', + help='不使用 LLM,使用规则引擎(降级模式)' + ) + + args = parser.parse_args() + + # 创建 Agent 实例 + agent = MNDAgent( + total_capital=args.capital, + use_llm=not args.no_llm + ) + + if args.mode == 'daily': + # 每日批处理 + result = agent.daily_batch_workflow(use_crawler=not args.no_crawler) + print(f"\n执行结果:") + print(json.dumps(result, ensure_ascii=False, indent=2)) + + elif args.mode == 'audit': + # 盘后审计(需要提供昨日数据) + print("盘后审计模式需要提供昨日预测数据") + + else: + # 测试模式 + print("\n🧪 测试模式:执行完整流程演示\n") + result = agent.daily_batch_workflow(use_crawler=not args.no_crawler) + + print("\n" + "=" * 60) + print("📈 Top 投资机会:") + print("=" * 60) + for opp in result['top_opportunities']: + print(f" {opp['code']}: Trust Index {opp['trust_index']:.1f} - {opp['verdict']}") + + +if __name__ == "__main__": + main() diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..eef2db5 --- /dev/null +++ b/readme.md @@ -0,0 +1,153 @@ + +# Macro-Narrative Driven ETF Investment Agent (MND-IA) + +## 1. 项目概述 (Project Overview) + +**MND-IA** 是一个基于 **“宏观叙事生命周期 + 资金流向交叉验证”** 的自动化 ETF 投资智能体系统。 + +### 核心哲学 (Core Philosophy) + +1. **叙事即信号 (Narrative as Signal):** A 股是政策市,新闻联播及部委政策是市场趋势的先行指标。 +2. **资金即真相 (Flow as Truth):** 只有资金实际流入的板块,叙事才具备可信度。 +3. **信任指数 (Trust Index):** 决策基于叙事热度与资金强度的共振程度。 +4. **生命周期管理 (Lifecycle):** 新闻不是孤立的脉冲,而是具有“潜伏-发酵-兑现-衰退”周期的持久对象。 + +--- + +## 2. 系统架构 (System Architecture) + +本系统采用 **Multi-Agent "Skills" 架构**,由一个不可拆分的**共享内核 (Shared Kernel)** 和五个独立职能的 **Agent Skills** 组成。 + +### 2.1 共享内核 (Shared Kernel) - *不可拆分* + +这是系统的数据中心和逻辑基石,所有 Agent 必须遵循此协议。 + +1. **MCP World Book (世界书):** 一个动态更新的 JSON 数据库,存储宏观状态、活跃叙事对象和历史经验。 +2. **Asset Map (资产映射表):** 连接自然语言(如“低空经济”)与金融代码(如 `512XXX`)的静态字典。 +3. **Macro Matrix (宏观逻辑矩阵):** 定义宏观因子(如利率、汇率)对各 ETF 板块的**基础理论影响值 ()**。 + +### 2.2 Agent Skills 群 (The Squad) + +| Agent ID | 名称 | 职能 (Skill) | 输入 | 输出 | +| --- | --- | --- | --- | --- | +| **Skill_A** | **情报分析师** (Analyst) | NLP 提取、新闻分级、叙事打分 | 新闻文本 (News) | `Narrative_JSON` | +| **Skill_B** | **量化研究员** (Quant) | 行情分析、资金流计算、异常检测 | 行情数据 (Akshare) | `Market_Data_JSON` | +| **Skill_C** | **宏观策略师** (Strategist) | 维护世界书、判断周期、更新叙事权重 | `Narrative_JSON` + 历史 WorldBook | 更新后的 `World_Book` | +| **Skill_D** | **基金经理** (PM) | 计算信任指数、风控、生成订单 | `World_Book` + `Market_Data_JSON` | `Trade_Orders` | +| **Skill_E** | **复盘审计员** (Auditor) | 绩效归因、偏差修正 | 历史预测 vs 实际走势 | `Correction_Report` | +| **Skill_F** | **新闻联播爬虫** (Crawler) | 抓取新闻联播文字稿 | 目标日期 | 新闻文本列表 | + +--- + +## 3. 数据协议与 Schema (Data Protocols) + +**注意:AI 生成代码时必须严格遵守以下 JSON 结构。** + +### 3.1 World Book Schema (世界书结构) + +```json +{ + "timestamp": "2025-10-01T09:00:00", + "macro_cycle": { + "status": "upward", // upward(上行) | downward(下行) + "liquidity": "loose", // loose | neutral | tight + "policy_wind": "stimulus" // stimulus | regulation | wait_and_see + }, + "active_narratives": [ + { + "id": "narrative_low_altitude_economy_202510", + "topic": "低空经济", + "related_etfs": ["512XXX", "159XXX"], + "lifecycle_stage": "fermentation", // incubation | fermentation | realization | decay + "base_score": 85, // 初始新闻强度 + "decay_factor": 0.95, // 每日衰减系数 + "current_weight": 80.75, // 动态权重 + "last_updated": "2025-10-01" + } + ], + "static_matrix_snapshot": { + // 基础宏观因子影响表 ($x$) + "tech_etf": {"rate_cut": 8, "currency_depreciation": 2}, + "bank_etf": {"rate_cut": -5, "currency_depreciation": -1} + } +} + +``` + +### 3.2 Trust Index Formula (信任指数算法) + +Agent D (PM) 必须使用以下逻辑计算得分: + +* **Score_narrative:** 来自 World Book 中 `current_weight`。 +* **Score_flow:** 来自 Skill_B (Quant) 的资金净流入归一化评分。 +* **Penalty_divergence:** 当 `Narrative > 80` 但 `Flow < 30` 时,惩罚值设为 100 (一票否决)。 + +--- + +## 4. 业务流程 (Workflows) + +### 4.1 每日批处理流程 (Daily Batch) - *08:30 AM* + +1. **Ingest:** Skill_A 读取昨夜今晨新闻,Skill_B 读取昨日行情。 +2. **Cognition:** Skill_C 读取 Skill_A 的结果,更新 MCP World Book(增强或衰减叙事权重)。 +3. **Decision:** Skill_D 读取最新的 World Book 和 Skill_B 的资金数据,计算 Trust Index。 +4. **Execution:** Skill_D 输出具体的 `buy/sell` 指令列表。 + +### 4.2 盘中哨兵流程 (Intraday Sentinel) - *09:30 - 15:00* + +1. **Monitor:** 系统监听快讯接口(如财联社红色资讯)。 +2. **Trigger:** 若检测到 `Level A` 级新闻(如印花税、降准)。 +3. **Interrupt:** 唤醒 Skill_C 临时修改 World Book 状态。 +4. **Re-eval:** 触发 Skill_D 重新计算当日持仓建议。 + +### 4.3 盘后复盘流程 (Post-Market Feedback) - *16:00 PM* + +1. Skill_E 对比昨日的 `Trade_Orders` 和今日实际涨跌幅。 +2. 若偏差过大,Skill_E 向 MCP 写入一条 `correction_log`,调整 Skill_C 未来对同类新闻的打分权重。 + +--- + +## 5. 项目结构 (Project Structure) + +建议 AI 按照以下目录结构生成代码: + +```text +/mnd_agent +├── /core # 共享内核 +│ ├── world_book.py # MCP 读写逻辑 +│ ├── asset_map.json # 资产映射字典 +│ └── macro_matrix.json # 静态因子表 ($x$) +├── /skills # Agent 能力群 +│ ├── analyst.py # Skill A: LLM Prompts for News +│ ├── quant.py # Skill B: Akshare Data Analysis +│ ├── strategist.py # Skill C: World Book Update Logic +│ ├── pm.py # Skill D: Trust Index & Trade Logic +│ ├── auditor.py # Skill E: Feedback Loop +│ └── crawler.py # Skill F: XinwenLianbo Crawler +├── /data # 本地存储 (Parquet/SQLite) +├── /logs # 运行日志 +├── main.py # 主调度入口 +└── README.md # 本文件 + +``` + +--- + +## 6. 技术栈约束 (Tech Stack Constraints) + +* **Language:** Python 3.10+ +* **LLM Interface:** OpenAI SDK (兼容 GLM-4/Claude-3.5) +* **Data Source:** `akshare` (A股行情), `crawl4ai` (新闻联播爬虫) +* **Data Process:** `pandas`, `numpy` +* **Storage:** 本地 JSON 文件模拟 MCP Server (MVP阶段) + +--- + +## 7. 给 AI 工具的构建指令 (Instructions for AI Builder) + +1. **Step 1:** 首先创建 `core/` 目录,定义好 `WorldBook` 类和 JSON 数据结构,这是所有 Skill 通信的基础。 +2. **Step 2:** 实现 `skills/quant.py`,确保能从 Akshare 获取 ETF 数据并计算“资金放量”指标。 +3. **Step 3:** 实现 `skills/analyst.py`,编写能够将新闻文本转化为 JSON 对象的 Prompt。 +4. **Step 4:** 实现 `skills/pm.py` 中的 **Trust Index** 算法,确保逻辑与上述公式一致。 +5. **Step 5:** 编写 `main.py` 将上述模块串联起来,完成每日一次的模拟运行。 + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d9c56eb --- /dev/null +++ b/requirements.txt @@ -0,0 +1,23 @@ +# MND-IA 项目依赖 + +# 核心数据处理 +numpy>=1.24.0 +pandas>=2.0.0 + +# 金融数据获取 +akshare>=1.12.0 + +# 网页爬虫 +cloudscraper>=1.2.0 +beautifulsoup4>=4.12.0 + +# LLM 接口(可选) +openai>=1.0.0 + +# 其他工具 +python-dateutil>=2.8.2 +requests>=2.31.0 + +# 开发工具(可选) +jupyter>=1.0.0 +matplotlib>=3.7.0 diff --git a/skills/__init__.py b/skills/__init__.py new file mode 100644 index 0000000..6218862 --- /dev/null +++ b/skills/__init__.py @@ -0,0 +1,21 @@ +""" +MND-IA Skills Module +==================== +Agent 技能群 +""" + +from .analyst import NewsAnalyzer +from .quant import QuantAnalyzer +from .strategist import MacroStrategist +from .pm import PortfolioManager +from .auditor import PerformanceAuditor +from .crawler import XinwenLianboCrawler + +__all__ = [ + 'NewsAnalyzer', + 'QuantAnalyzer', + 'MacroStrategist', + 'PortfolioManager', + 'PerformanceAuditor', + 'XinwenLianboCrawler' +] diff --git a/skills/analyst.py b/skills/analyst.py new file mode 100644 index 0000000..06743d2 --- /dev/null +++ b/skills/analyst.py @@ -0,0 +1,582 @@ +""" +Skill A: Analyst (情报分析师) +============================== +职能:NLP 提取、新闻分级、叙事打分 + +输入:新闻文本列表 +输出:Narrative_JSON (叙事对象、评分、生命周期阶段) + +设计原则: +- 核心分析逻辑必须通过 LLM 完成,而非简单关键词匹配 +- 关键词仅作为 LLM 分析的辅助信息和兜底方案 +- 支持 LLM 失败时的优雅降级 +""" + +import json +import re +from datetime import datetime +from typing import Dict, List, Optional, Tuple +from pathlib import Path +import os +import sys + +# 添加项目根目录到路径 +sys.path.insert(0, str(Path(__file__).parent.parent)) +from core.config import config, get_llm_client, llm_call + + +class NewsAnalyzer: + """ + 情报分析师 - 负责新闻文本分析和叙事提取 + + 核心能力: + 1. 使用 LLM 进行新闻语义分析(主要方法) + 2. 新闻分级(Level A/B/C) + 3. 叙事主题提取与关联 + 4. 热度评分与市场影响判断 + 5. 生命周期阶段推断 + + 分析流程: + 1. 首先尝试使用 LLM 进行深度分析 + 2. LLM 返回结构化的分析结果 + 3. 如果 LLM 调用失败,降级到规则引擎 + """ + + # LLM 分析系统提示词 + SYSTEM_PROMPT = """你是专业的 A 股市场情报分析师,专门负责解读政策新闻和市场信息。 + +你的核心任务是: +1. 判断新闻的重要性级别 (A/B/C级) +2. 识别受影响的行业板块 +3. 评估新闻对市场的影响强度 +4. 判断叙事所处的生命周期阶段 + +评级标准: +- A级:央行/国务院级别的重大政策(如降准降息、印花税调整、重大改革),通常能引发市场大幅波动 +- B级:部委级别的行业政策(如工信部新规、发改委规划),对特定行业有显著影响 +- C级:一般性新闻、行业动态、公司公告,影响有限 + +生命周期阶段: +- incubation (潜伏期): 政策风向初现,尚未被市场充分认知 +- fermentation (发酵期): 市场开始关注,资金试探性流入 +- realization (兑现期): 政策落地,市场充分反应 +- decay (衰退期): 利好出尽,边际效应递减 + +请严格按照 JSON 格式返回分析结果。""" + + def __init__(self, llm_client=None, use_llm: bool = True): + """ + Args: + llm_client: OpenAI SDK 客户端(兼容 GLM/Claude),如果不提供则使用配置文件中的默认客户端 + use_llm: 是否使用 LLM 进行分析(默认 True) + """ + self.llm_client = llm_client or get_llm_client() + self.use_llm = use_llm and (self.llm_client is not None) + self.asset_map = self._load_asset_map() + + # 板块关键词映射 - 仅作为 LLM 的参考信息 + self.sector_keywords = {} + self.sector_names = {} # 资产ID到中文名的映射 + if self.asset_map: + for asset_id, asset_data in self.asset_map.get("assets", {}).items(): + self.sector_keywords[asset_id] = asset_data.get("keywords", []) + self.sector_names[asset_id] = asset_data.get("name", asset_id) + + if self.use_llm: + print("[Analyst] ✅ LLM 模式启用 - 将使用大模型进行智能分析") + else: + print("[Analyst] ⚠️ 降级模式 - 将使用规则引擎(关键词匹配)") + + def _load_asset_map(self) -> Optional[Dict]: + """加载资产映射表""" + asset_map_path = Path("core") / "asset_map.json" + if not asset_map_path.exists(): + print("[Analyst] 警告: 未找到 asset_map.json") + return None + + with open(asset_map_path, 'r', encoding='utf-8') as f: + return json.load(f) + + def _get_available_sectors_prompt(self) -> str: + """生成可用板块列表的提示信息""" + if not self.asset_map: + return "" + + sectors_info = [] + for asset_id, asset_data in self.asset_map.get("assets", {}).items(): + name = asset_data.get("name", asset_id) + keywords = asset_data.get("keywords", [])[:5] # 取前5个关键词 + sectors_info.append(f"- {asset_id} ({name}): {', '.join(keywords)}") + + return "\n".join(sectors_info) + + def _build_analysis_prompt(self, news_text: str, title: str) -> str: + """构建 LLM 分析的用户提示词""" + sectors_info = self._get_available_sectors_prompt() + + return f"""请分析以下新闻,并以 JSON 格式返回结果: + +【新闻标题】 +{title} + +【新闻正文】 +{news_text} + +【可用板块列表】 +{sectors_info} + +请返回如下格式的 JSON(不要包含其他文字): +{{ + "level": "A/B/C", + "level_reason": "判断级别的理由(50字内)", + "sectors": ["asset_id1", "asset_id2"], + "sector_reason": "选择这些板块的理由(50字内)", + "score": 0-100, + "score_reason": "评分依据(30字内)", + "lifecycle_stage": "incubation/fermentation/realization/decay", + "stage_reason": "生命周期判断理由(30字内)", + "sentiment": "positive/negative/neutral", + "key_signal": "新闻中最关键的投资信号(30字内)" +}}""" + + def analyze_news(self, news_text: str, title: str = "") -> Dict: + """ + 单条新闻分析(主要方法) + + 优先使用 LLM 进行智能分析,失败时降级到规则引擎 + + Args: + news_text: 新闻正文 + title: 新闻标题(可选) + + Returns: + 分析结果字典 + """ + if self.use_llm: + result = self._analyze_with_llm(news_text, title) + if result: + return result + print("[Analyst] ⚠️ LLM 分析失败,降级到规则引擎") + + # 降级到规则引擎 + return self._analyze_with_rules(news_text, title) + + def _analyze_with_llm(self, news_text: str, title: str) -> Optional[Dict]: + """ + 使用 LLM 进行深度新闻分析 + + Args: + news_text: 新闻正文 + title: 新闻标题 + + Returns: + 分析结果字典,失败返回 None + """ + full_text = f"{title}\n{news_text}" if title else news_text + + # 构建提示词 + user_prompt = self._build_analysis_prompt(news_text, title) + + try: + # 调用 LLM + llm_output = llm_call( + messages=[ + {"role": "system", "content": self.SYSTEM_PROMPT}, + {"role": "user", "content": user_prompt} + ], + temperature=0.3, + max_tokens=800 + ) + + if not llm_output: + return None + + # 清理并解析 JSON + llm_output = llm_output.strip() + # 移除可能的 markdown 代码块标记 + if llm_output.startswith("```"): + llm_output = re.sub(r'^```(?:json)?\s*', '', llm_output) + llm_output = re.sub(r'\s*```$', '', llm_output) + + llm_result = json.loads(llm_output) + + # 验证并规范化结果 + level = llm_result.get("level", "C").upper() + if level not in ["A", "B", "C"]: + level = "C" + + sectors = llm_result.get("sectors", []) + # 验证板块是否存在于 asset_map 中 + valid_sectors = [s for s in sectors if s in self.sector_keywords] + + score = float(llm_result.get("score", 50)) + score = max(0, min(100, score)) # 限制在 0-100 + + lifecycle_stage = llm_result.get("lifecycle_stage", "incubation") + if lifecycle_stage not in ["incubation", "fermentation", "realization", "decay"]: + lifecycle_stage = "incubation" + + # 关联 ETF + related_etfs = [] + if self.asset_map: + for sector in valid_sectors: + asset_data = self.asset_map.get("assets", {}).get(sector, {}) + related_etfs.extend(asset_data.get("etfs", [])) + related_etfs = list(set(related_etfs)) + + return { + "title": title, + "level": level, + "level_reason": llm_result.get("level_reason", ""), + "sectors": valid_sectors, + "sector_reason": llm_result.get("sector_reason", ""), + "score": round(score, 2), + "score_reason": llm_result.get("score_reason", ""), + "lifecycle_stage": lifecycle_stage, + "stage_reason": llm_result.get("stage_reason", ""), + "related_etfs": related_etfs, + "sentiment": llm_result.get("sentiment", "neutral"), + "key_signal": llm_result.get("key_signal", ""), + "timestamp": datetime.now().isoformat(), + "analysis_method": "llm", + "summary": f"{level}级 {len(valid_sectors)}板块 评分{score:.0f} [{llm_result.get('sentiment', 'neutral')}]" + } + + except json.JSONDecodeError as e: + print(f"[Analyst] LLM 返回格式解析失败: {e}") + return None + except Exception as e: + print(f"[Analyst] LLM 分析异常: {e}") + return None + + def _analyze_with_rules(self, news_text: str, title: str) -> Dict: + """ + 规则引擎分析(降级方案) + + 当 LLM 不可用时,使用关键词匹配进行基础分析 + + Args: + news_text: 新闻正文 + title: 新闻标题 + + Returns: + 分析结果字典 + """ + full_text = f"{title}\n{news_text}" if title else news_text + + # 1. 新闻分级(关键词匹配) + level = self._classify_level_by_keywords(full_text) + + # 2. 板块识别(关键词匹配) + sectors = self._extract_sectors_by_keywords(full_text) + + # 3. 评分计算 + score = self._calculate_score_by_rules(full_text, level, sectors) + + # 4. 生命周期阶段 + stage = self._determine_stage_by_rules(level, score) + + # 5. 关联 ETF + related_etfs = [] + if self.asset_map: + for sector in sectors: + asset_data = self.asset_map.get("assets", {}).get(sector, {}) + related_etfs.extend(asset_data.get("etfs", [])) + related_etfs = list(set(related_etfs)) + + return { + "title": title, + "level": level, + "level_reason": "基于关键词匹配(降级模式)", + "sectors": sectors, + "sector_reason": "基于关键词匹配(降级模式)", + "score": score, + "score_reason": "规则引擎计算", + "lifecycle_stage": stage, + "stage_reason": "基于级别和评分推断", + "related_etfs": related_etfs, + "sentiment": "neutral", + "key_signal": "", + "timestamp": datetime.now().isoformat(), + "analysis_method": "rules", + "summary": f"{level}级 {len(sectors)}板块 评分{score} [降级模式]" + } + + # ==================== 规则引擎辅助方法(降级方案) ==================== + + # 新闻分级关键词库 + LEVEL_A_KEYWORDS = [ + "印花税", "降准", "降息", "降低存款准备金率", + "央行宣布", "国务院", "重磅", "历史性", "里程碑", + "全面降息", "万亿", "大规模刺激", "救市", "重大改革" + ] + + LEVEL_B_KEYWORDS = [ + "部委", "指导意见", "规划", "通知", + "发改委", "工信部", "证监会", "银保监会", + "新政", "改革", "试点", "专项", "行动计划" + ] + + def _classify_level_by_keywords(self, text: str) -> str: + """基于关键词的新闻分级""" + for keyword in self.LEVEL_A_KEYWORDS: + if keyword in text: + return "A" + for keyword in self.LEVEL_B_KEYWORDS: + if keyword in text: + return "B" + return "C" + + def _extract_sectors_by_keywords(self, text: str) -> List[str]: + """基于关键词的板块提取""" + matched_sectors = [] + for sector, keywords in self.sector_keywords.items(): + for keyword in keywords: + if keyword in text: + matched_sectors.append(sector) + break + return matched_sectors + + def _calculate_score_by_rules(self, text: str, level: str, sectors: List[str]) -> float: + """基于规则的评分计算""" + base_score = {"A": 85, "B": 65, "C": 45}.get(level, 45) + sector_bonus = min(len(sectors) * 5, 15) + + # 情绪词检测 + positive_keywords = ["利好", "刺激", "支持", "鼓励", "加快", "推动", "大力"] + emotion_bonus = 10 if any(kw in text for kw in positive_keywords) else 0 + + return min(base_score + sector_bonus + emotion_bonus, 100) + + def _determine_stage_by_rules(self, level: str, score: float) -> str: + """基于规则的生命周期阶段判断""" + if level == "A" and score >= 80: + return "realization" + elif level in ["A", "B"] and score >= 60: + return "fermentation" + else: + return "incubation" + + def batch_analyze( + self, + news_list: List[Dict[str, str]] + ) -> List[Dict]: + """ + 批量分析新闻 + + Args: + news_list: 新闻列表,每条包含 {"title": "", "text": ""} + + Returns: + 分析结果列表 + """ + results = [] + llm_count = 0 + rules_count = 0 + + mode = "LLM智能分析" if self.use_llm else "规则引擎(降级模式)" + print(f"[Analyst] 开始批量分析 {len(news_list)} 条新闻 | 模式: {mode}") + + for i, news in enumerate(news_list, 1): + title = news.get("title", "") + text = news.get("text", "") + + # 简化标题显示 + display_title = title[:25] + "..." if len(title) > 25 else title + + result = self.analyze_news(text, title) + results.append(result) + + # 统计分析方法使用情况 + if result.get("analysis_method") == "llm": + llm_count += 1 + method_icon = "🤖" + else: + rules_count += 1 + method_icon = "📏" + + print(f"[Analyst] {method_icon} ({i}/{len(news_list)}) {result['level']}级 | {result['score']:.0f}分 | {display_title}") + + print(f"[Analyst] ✅ 批量分析完成 | LLM: {llm_count}条, 规则: {rules_count}条") + return results + + def generate_narrative_json( + self, + analysis_results: List[Dict] + ) -> Dict: + """ + 生成标准化的 Narrative_JSON(供 Strategist 使用) + + 聚合逻辑: + 1. 按板块(sector)聚合所有相关新闻 + 2. 计算平均分和最高分 + 3. 汇总 LLM 分析的关键信号 + 4. 确定最终的生命周期阶段 + + Args: + analysis_results: batch_analyze 的输出 + + Returns: + 标准化 JSON 格式 + """ + # 按板块聚合新闻 + sector_narratives = {} + + for result in analysis_results: + for sector in result['sectors']: + if sector not in sector_narratives: + sector_narratives[sector] = { + "topic": sector, + "topic_name": self.sector_names.get(sector, sector), + "news_count": 0, + "avg_score": 0, + "max_score": 0, + "level_a_count": 0, + "level_b_count": 0, + "related_etfs": set(), + "lifecycle_stage": "incubation", + "key_signals": [], # 汇总关键信号 + "sentiments": [], # 情绪统计 + "analysis_methods": {"llm": 0, "rules": 0} + } + + narrative = sector_narratives[sector] + narrative["news_count"] += 1 + narrative["avg_score"] += result['score'] + narrative["max_score"] = max(narrative["max_score"], result['score']) + narrative["related_etfs"].update(result['related_etfs']) + + # 统计新闻级别 + if result['level'] == "A": + narrative["level_a_count"] += 1 + narrative["lifecycle_stage"] = result['lifecycle_stage'] + elif result['level'] == "B": + narrative["level_b_count"] += 1 + if narrative["lifecycle_stage"] == "incubation": + narrative["lifecycle_stage"] = "fermentation" + + # 收集关键信号(来自 LLM 分析) + if result.get("key_signal"): + narrative["key_signals"].append(result["key_signal"]) + + # 收集情绪 + sentiment = result.get("sentiment", "neutral") + narrative["sentiments"].append(sentiment) + + # 统计分析方法 + method = result.get("analysis_method", "rules") + narrative["analysis_methods"][method] = narrative["analysis_methods"].get(method, 0) + 1 + + # 后处理:计算平均分、汇总情绪等 + for narrative in sector_narratives.values(): + narrative["avg_score"] = round( + narrative["avg_score"] / narrative["news_count"], 2 + ) + narrative["related_etfs"] = list(narrative["related_etfs"]) + + # 计算情绪倾向 + sentiments = narrative["sentiments"] + positive_count = sentiments.count("positive") + negative_count = sentiments.count("negative") + if positive_count > negative_count: + narrative["overall_sentiment"] = "positive" + elif negative_count > positive_count: + narrative["overall_sentiment"] = "negative" + else: + narrative["overall_sentiment"] = "neutral" + + # 去重关键信号 + narrative["key_signals"] = list(set(narrative["key_signals"]))[:5] + + # 清理临时字段 + del narrative["sentiments"] + + # 排序输出 + sorted_narratives = sorted( + sector_narratives.values(), + key=lambda x: x['max_score'], + reverse=True + ) + + return { + "timestamp": datetime.now().isoformat(), + "total_news": len(analysis_results), + "narratives": sorted_narratives + } + + +# ==================== 测试代码 ==================== + +if __name__ == "__main__": + print("=" * 60) + print("Skill A: Analyst 情报分析师测试") + print("=" * 60) + + # 创建分析器实例(默认启用 LLM) + analyst = NewsAnalyzer() + + # 测试新闻样本 + test_news = [ + { + "title": "央行宣布降准0.5个百分点 释放长期资金约1万亿", + "text": "中国人民银行决定于2025年1月5日降低金融机构存款准备金率0.5个百分点," + "此次降准为全面降准,除已执行5%存款准备金率的部分法人金融机构外," + "对其他金融机构普遍下调存款准备金率0.5个百分点,此次降准共计释放长期资金约1万亿元。" + }, + { + "title": "工信部发布低空经济发展指导意见 推动eVTOL产业化", + "text": "工业和信息化部发布《低空经济高质量发展指导意见》,提出到2027年," + "低空经济规模达到1万亿元,培育10家以上龙头企业。重点支持无人机、" + "电动垂直起降飞行器(eVTOL)等装备的研发和产业化。" + }, + { + "title": "AI算力需求持续旺盛 多家数据中心公司业绩预增", + "text": "随着人工智能应用的快速发展,算力需求呈现爆发式增长。多家上市公司发布业绩预告," + "预计2024年净利润增长50%以上。分析师认为,智算中心建设将成为未来3年的投资主线。" + } + ] + + # 批量分析 + print("\n") + results = analyst.batch_analyze(test_news) + + print("\n" + "=" * 60) + print("详细分析结果:") + print("=" * 60) + + for i, result in enumerate(results, 1): + print(f"\n【新闻 {i}】{result['title'][:50]}") + print(f" 📊 分析方法: {result.get('analysis_method', 'unknown').upper()}") + print(f" 📌 级别: {result['level']} | 评分: {result['score']}") + print(f" 🎯 板块: {', '.join(result['sectors']) if result['sectors'] else '未识别'}") + print(f" 🔄 周期阶段: {result['lifecycle_stage']}") + print(f" 📈 情绪: {result.get('sentiment', 'neutral')}") + if result.get('key_signal'): + print(f" 💡 关键信号: {result['key_signal']}") + if result.get('level_reason'): + print(f" 📝 级别理由: {result['level_reason']}") + print(f" 🏷️ 关联 ETF: {', '.join(result['related_etfs'][:4]) if result['related_etfs'] else '无'}...") + + # 生成叙事 JSON + print("\n" + "=" * 60) + print("聚合叙事 JSON:") + print("=" * 60) + + narrative_json = analyst.generate_narrative_json(results) + print(f"\n📰 总新闻数: {narrative_json['total_news']}") + print(f"🎯 识别叙事: {len(narrative_json['narratives'])} 个\n") + + for narrative in narrative_json['narratives']: + topic_name = narrative.get('topic_name', narrative['topic']) + level_info = f"A级×{narrative['level_a_count']}" if narrative['level_a_count'] > 0 else \ + f"B级×{narrative.get('level_b_count', 0)}" if narrative.get('level_b_count', 0) > 0 else "C级" + + print(f" 📍 {topic_name} ({narrative['topic']})") + print(f" 评分: {narrative['max_score']:.0f} (平均{narrative['avg_score']:.0f}) | " + f"新闻: {narrative['news_count']}条 | {level_info}") + print(f" 阶段: {narrative['lifecycle_stage']} | 情绪: {narrative.get('overall_sentiment', 'neutral')}") + if narrative.get('key_signals'): + print(f" 信号: {'; '.join(narrative['key_signals'][:2])}") + print() + + print("✅ Analyst 模块测试完成") diff --git a/skills/auditor.py b/skills/auditor.py new file mode 100644 index 0000000..ed5eaeb --- /dev/null +++ b/skills/auditor.py @@ -0,0 +1,475 @@ +""" +Skill E: Auditor (复盘审计员) +============================== +职能:绩效归因、偏差修正 + +输入:历史预测 vs 实际走势 +输出:Correction_Report(修正建议) +""" + +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from core.config import config, llm_call +from typing import Dict, List, Optional +from datetime import datetime, timedelta +import json +import pandas as pd + + +class PerformanceAuditor: + """ + 复盘审计员 - 负责绩效归因和策略修正 + + 核心职责: + 1. 对比预测与实际表现 + 2. 分析偏差原因 + 3. 提出修正建议 + 4. 记录经验教训 + """ + + def __init__(self, data_dir: str = "data"): + self.data_dir = Path(data_dir) + self.audit_log_path = self.data_dir / "audit_logs.json" + self.correction_history = self._load_correction_history() + + def _load_correction_history(self) -> List[Dict]: + """加载历史修正记录""" + if not self.audit_log_path.exists(): + return [] + + try: + with open(self.audit_log_path, 'r', encoding='utf-8') as f: + return json.load(f) + except Exception as e: + print(f"[Auditor] 加载修正历史失败: {e}") + return [] + + def _save_correction_history(self) -> None: + """保存修正记录""" + try: + with open(self.audit_log_path, 'w', encoding='utf-8') as f: + json.dump(self.correction_history, f, ensure_ascii=False, indent=2) + print(f"[Auditor] 修正记录已保存") + except Exception as e: + print(f"[Auditor] 保存失败: {e}") + + def audit_prediction( + self, + prediction: Dict, + actual_performance: Dict + ) -> Dict: + """ + 审计单个预测 + + Args: + prediction: 预测记录 + { + "code": "515980", + "trust_index": 75.0, + "narrative_score": 85.0, + "flow_score": 60.0, + "predicted_action": "buy", + "timestamp": "2025-12-29T08:30:00" + } + + actual_performance: 实际表现 + { + "code": "515980", + "price_change": -2.5, # 实际涨跌幅 % + "volume_change": 1.3, # 成交量变化倍数 + "date": "2025-12-29" + } + + Returns: + 审计结果 + """ + code = prediction['code'] + predicted_action = prediction['predicted_action'] + trust_index = prediction['trust_index'] + price_change = actual_performance['price_change'] + + # 判断预测是否正确 + if predicted_action == "buy": + is_correct = price_change > 0 + expected_direction = "上涨" + elif predicted_action == "sell": + is_correct = price_change < 0 + expected_direction = "下跌" + else: + is_correct = abs(price_change) < 2 + expected_direction = "震荡" + + # 计算偏差程度 + deviation = abs(price_change) if not is_correct else 0 + + # 分析原因 + reason = self._analyze_deviation_reason( + prediction, + actual_performance, + is_correct + ) + + # 生成修正建议 + correction = self._generate_correction( + prediction, + actual_performance, + is_correct, + reason + ) + + audit_result = { + "code": code, + "prediction": { + "action": predicted_action, + "trust_index": trust_index, + "expected": expected_direction + }, + "actual": { + "price_change": price_change, + "volume_change": actual_performance.get('volume_change', 1.0) + }, + "result": { + "is_correct": is_correct, + "deviation": round(deviation, 2), + "reason": reason + }, + "correction": correction, + "timestamp": datetime.now().isoformat() + } + + return audit_result + + def _analyze_deviation_reason( + self, + prediction: Dict, + actual: Dict, + is_correct: bool + ) -> str: + """分析偏差原因""" + if is_correct: + return "预测准确" + + narrative_score = prediction.get('narrative_score', 0) + flow_score = prediction.get('flow_score', 0) + price_change = actual['price_change'] + volume_change = actual.get('volume_change', 1.0) + + # 情况1:叙事高但资金未跟进 + if narrative_score > 70 and flow_score < 40: + return "叙事未兑现:新闻热度高但资金流入不足,市场观望情绪浓厚" + + # 情况2:资金放量但方向相反 + if volume_change > 1.5 and price_change < -2: + return "放量下跌:资金大量流出,可能遭遇利空或止盈盘涌出" + + # 情况3:黑天鹅事件 + if abs(price_change) > 5: + return "极端波动:可能受突发事件或外部市场剧烈波动影响" + + # 情况4:叙事过度衰减 + if narrative_score < 50 and prediction['predicted_action'] == "buy": + return "叙事衰减过快:市场关注度下降速度超预期" + + return "其他因素:需进一步分析宏观环境或板块轮动" + + def _generate_correction( + self, + prediction: Dict, + actual: Dict, + is_correct: bool, + reason: str + ) -> Dict: + """生成修正建议""" + if is_correct: + return { + "action": "none", + "message": "预测准确,保持当前策略" + } + + narrative_score = prediction.get('narrative_score', 0) + flow_score = prediction.get('flow_score', 0) + + corrections = [] + + # 修正1:提高资金流权重 + if "资金流入不足" in reason: + corrections.append({ + "target": "trust_index_formula", + "suggestion": "提高资金流权重至 0.5,降低叙事权重至 0.5", + "reason": "叙事未能转化为实际资金流动" + }) + + # 修正2:加强止损 + if "放量下跌" in reason: + corrections.append({ + "target": "risk_control", + "suggestion": "设置 -3% 硬止损线,出现放量下跌立即清仓", + "reason": "未能及时识别资金流出信号" + }) + + # 修正3:降低 Trust Index 阈值 + if narrative_score > 60 and flow_score > 50 and not is_correct: + corrections.append({ + "target": "min_trust_score", + "suggestion": "提高最低信任指数阈值至 65", + "reason": "当前阈值可能过于宽松,需要更高确定性" + }) + + # 修正4:叙事衰减系数调整 + if "衰减过快" in reason: + corrections.append({ + "target": "narrative_decay", + "suggestion": "对该板块叙事的衰减系数从 0.95 调整为 0.97", + "reason": "市场对该主题的持续关注度被低估" + }) + + return { + "action": "adjust" if corrections else "monitor", + "corrections": corrections, + "priority": "high" if len(corrections) >= 2 else "medium" + } + + def batch_audit( + self, + predictions: List[Dict], + actual_performances: Dict[str, Dict] + ) -> Dict: + """ + 批量审计 + + Args: + predictions: 预测列表 + actual_performances: 实际表现字典 {code: performance} + + Returns: + 汇总审计报告 + """ + print(f"[Auditor] 开始审计 {len(predictions)} 条预测...") + + audit_results = [] + correct_count = 0 + + for prediction in predictions: + code = prediction['code'] + + if code not in actual_performances: + print(f"[Auditor] 警告: 缺少 {code} 的实际数据") + continue + + result = self.audit_prediction( + prediction, + actual_performances[code] + ) + + audit_results.append(result) + + if result['result']['is_correct']: + correct_count += 1 + + # 计算准确率 + accuracy = correct_count / len(audit_results) * 100 if audit_results else 0 + + # 统计偏差原因 + reason_counts = {} + for result in audit_results: + reason = result['result']['reason'] + reason_counts[reason] = reason_counts.get(reason, 0) + 1 + + # 收集修正建议 + all_corrections = [] + for result in audit_results: + if result['correction']['action'] != "none": + all_corrections.extend(result['correction'].get('corrections', [])) + + # 生成汇总报告 + summary_report = { + "timestamp": datetime.now().isoformat(), + "audit_period": { + "start": min(r['timestamp'] for r in audit_results) if audit_results else None, + "end": max(r['timestamp'] for r in audit_results) if audit_results else None + }, + "performance": { + "total_predictions": len(audit_results), + "correct_predictions": correct_count, + "wrong_predictions": len(audit_results) - correct_count, + "accuracy": round(accuracy, 2) + }, + "deviation_analysis": reason_counts, + "corrections_needed": len(all_corrections), + "high_priority_corrections": [ + c for c in all_corrections + if any('止损' in c.get('suggestion', '') for c in all_corrections) + ], + "detailed_results": audit_results + } + + # 保存到历史记录 + self.correction_history.append(summary_report) + self._save_correction_history() + + print(f"[Auditor] 审计完成,准确率: {accuracy:.1f}%") + return summary_report + + def generate_correction_report( + self, + audit_summary: Dict + ) -> str: + """ + 生成人类可读的修正报告 + + Args: + audit_summary: batch_audit() 的输出 + + Returns: + Markdown 格式的报告 + """ + accuracy = audit_summary['performance']['accuracy'] + total = audit_summary['performance']['total_predictions'] + correct = audit_summary['performance']['correct_predictions'] + + report = f""" +# 投资决策复盘审计报告 + +**生成时间:** {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} + +## 一、整体表现 + +- **预测数量:** {total} 条 +- **正确预测:** {correct} 条 +- **错误预测:** {total - correct} 条 +- **准确率:** {accuracy:.1f}% + +## 二、偏差原因分析 + +""" + for reason, count in audit_summary['deviation_analysis'].items(): + report += f"- **{reason}:** {count} 次\n" + + report += f"\n## 三、修正建议\n\n" + report += f"需要修正的参数: {audit_summary['corrections_needed']} 项\n\n" + + # 列出高优先级修正 + high_priority = audit_summary.get('high_priority_corrections', []) + if high_priority: + report += "### 🚨 高优先级修正\n\n" + for i, correction in enumerate(high_priority[:3], 1): + report += f"{i}. **{correction['target']}**\n" + report += f" - 建议: {correction['suggestion']}\n" + report += f" - 原因: {correction['reason']}\n\n" + + report += "\n## 四、经验总结\n\n" + + if accuracy >= 70: + report += "✅ 当前策略表现良好,建议保持现有参数设置。\n" + elif accuracy >= 50: + report += "⚠️ 策略有效但存在改进空间,建议根据修正建议调整参数。\n" + else: + report += "❌ 策略表现不佳,需要重大调整或暂停交易,重新评估市场环境。\n" + + return report + + def get_historical_accuracy_trend(self) -> List[Dict]: + """获取历史准确率趋势""" + trend = [] + + for record in self.correction_history[-10:]: # 最近10次 + trend.append({ + "timestamp": record['timestamp'], + "accuracy": record['performance']['accuracy'], + "total_predictions": record['performance']['total_predictions'] + }) + + return trend + + +# ==================== 测试代码 ==================== + +if __name__ == "__main__": + print("=" * 50) + print("Skill E: Auditor 复盘审计员测试") + print("=" * 50) + + # 创建审计员实例 + auditor = PerformanceAuditor(data_dir="data") + + # 模拟昨日的预测记录 + mock_predictions = [ + { + "code": "515980", + "trust_index": 75.0, + "narrative_score": 85.0, + "flow_score": 60.0, + "predicted_action": "buy", + "timestamp": "2025-12-29T08:30:00" + }, + { + "code": "512480", + "trust_index": 45.0, + "narrative_score": 55.0, + "flow_score": 30.0, + "predicted_action": "hold", + "timestamp": "2025-12-29T08:30:00" + }, + { + "code": "159928", + "trust_index": 35.0, + "narrative_score": 70.0, + "flow_score": 20.0, + "predicted_action": "sell", + "timestamp": "2025-12-29T08:30:00" + } + ] + + # 模拟今日的实际表现 + mock_actual = { + "515980": { + "code": "515980", + "price_change": 3.2, # 上涨3.2% + "volume_change": 1.5, + "date": "2025-12-29" + }, + "512480": { + "code": "512480", + "price_change": -1.8, # 下跌1.8% + "volume_change": 0.9, + "date": "2025-12-29" + }, + "159928": { + "code": "159928", + "price_change": 0.5, # 微涨(预测失误) + "volume_change": 2.1, + "date": "2025-12-29" + } + } + + # 1. 批量审计 + print("\n1. 批量审计:") + summary = auditor.batch_audit(mock_predictions, mock_actual) + + print(f"\n准确率: {summary['performance']['accuracy']:.1f}%") + print(f"正确: {summary['performance']['correct_predictions']}") + print(f"错误: {summary['performance']['wrong_predictions']}") + + # 2. 查看详细结果 + print("\n2. 详细审计结果:") + for result in summary['detailed_results']: + print(f"\n {result['code']}:") + print(f" - 预测: {result['prediction']['action']} (Trust={result['prediction']['trust_index']})") + print(f" - 实际: {result['actual']['price_change']:+.1f}%") + print(f" - 结果: {'✅ 正确' if result['result']['is_correct'] else '❌ 错误'}") + print(f" - 原因: {result['result']['reason']}") + + # 3. 生成修正报告 + print("\n3. 修正报告:") + report = auditor.generate_correction_report(summary) + print(report) + + # 4. 历史趋势 + print("\n4. 历史准确率趋势:") + trend = auditor.get_historical_accuracy_trend() + for record in trend: + print(f" {record['timestamp'][:10]}: {record['accuracy']:.1f}%") + + print("\n✅ Auditor 模块测试完成") diff --git a/skills/crawler.py b/skills/crawler.py new file mode 100644 index 0000000..a4765ef --- /dev/null +++ b/skills/crawler.py @@ -0,0 +1,298 @@ +""" +Skill F: Crawler (新闻联播爬虫) +================================ +职能:每日抓取新闻联播文字稿 + +数据源:https://cn.govopendata.com/xinwenlianbo/ +输出:符合 Skill A 要求的新闻列表 [{"title": "", "text": ""}] +""" + +import cloudscraper +from bs4 import BeautifulSoup +from datetime import datetime +from typing import List, Dict, Optional +from pathlib import Path +import json +import time + + +class XinwenLianboCrawler: + """ + 新闻联播爬虫 - 抓取每日新闻联播文字稿 + + 核心功能: + 1. 自动构建当日 URL + 2. 使用 cloudscraper 绕过 Cloudflare 防护 + 3. 解析新闻条目 + 4. 输出符合 Analyst 要求的格式 + """ + + def __init__(self, base_url: str = "https://cn.govopendata.com/xinwenlianbo/"): + """ + Args: + base_url: 新闻联播网站基础 URL + """ + self.base_url = base_url + self.cache_dir = Path("data") / "news_cache" + self.cache_dir.mkdir(parents=True, exist_ok=True) + + # 创建 cloudscraper 会话 + self.scraper = cloudscraper.create_scraper( + browser={ + 'browser': 'chrome', + 'platform': 'windows', + 'desktop': True + } + ) + + def _build_url(self, date: Optional[str] = None) -> str: + """ + 构建目标 URL + + Args: + date: 日期字符串 (格式: YYYYMMDD),默认今天 + + Returns: + 完整 URL + """ + if date is None: + date = datetime.now().strftime("%Y%m%d") + + return f"{self.base_url}{date}/" + + def crawl_xinwenlianbo(self, date: Optional[str] = None) -> List[Dict[str, str]]: + """ + 爬取新闻联播 + + Args: + date: 日期字符串 (YYYYMMDD),默认今天 + + Returns: + 新闻列表 [{"title": "", "text": ""}] + """ + url = self._build_url(date) + display_date = date if date else datetime.now().strftime("%Y%m%d") + + print(f"[Crawler] 开始抓取 {display_date} 新闻联播") + print(f"[Crawler] URL: {url}") + + try: + # 使用 cloudscraper 发起请求 + response = self.scraper.get(url, timeout=30) + response.raise_for_status() + response.encoding = 'utf-8' + + # 解析 HTML + soup = BeautifulSoup(response.text, 'html.parser') + + # 提取新闻条目 + news_list = self._parse_content(soup) + + if not news_list: + print(f"[Crawler] 未找到新闻内容,可能网站结构变化或当日无更新") + print(f"[Crawler] 页面标题: {soup.title.string if soup.title else 'No Title'}") + + # 尝试从缓存读取 + cached = self._load_from_cache(display_date) + if cached: + print(f"[Crawler] 使用缓存数据 ({len(cached)} 条)") + return cached + + return [] + + print(f"[Crawler] 成功提取 {len(news_list)} 条新闻") + + # 缓存结果 + self._cache_news(news_list, display_date) + + return news_list + + except Exception as e: + print(f"[Crawler] 爬取失败: {e}") + + # 尝试从缓存读取 + cached = self._load_from_cache(display_date) + if cached: + print(f"[Crawler] 使用缓存数据 ({len(cached)} 条)") + return cached + + return [] + + def _parse_content(self, soup: BeautifulSoup) -> List[Dict[str, str]]: + """ + 解析网页内容,提取新闻条目 + + 网站结构: + - 容器: .content-section + - 标题: .content-heading + - 内容: .content-body + + Args: + soup: BeautifulSoup 对象 + + Returns: + 新闻列表 + """ + news_list = [] + + # 查找所有新闻板块 + sections = soup.select('.content-section') + + if not sections: + print("[Crawler] 未找到 .content-section 元素") + return [] + + for section in sections: + # 提取标题 + title_tag = section.select_one('.content-heading') + if not title_tag: + continue + + title = title_tag.get_text(strip=True) + + # 提取内容 + content_tag = section.select_one('.content-body') + if not content_tag: + continue + + # 尝试提取段落以保持格式 + paragraphs = content_tag.find_all('p') + if paragraphs: + # 多段落用换行分隔 + content = ' '.join([p.get_text(strip=True) for p in paragraphs]) + else: + content = content_tag.get_text(strip=True) + + # 过滤过短的内容 + if len(content) < 10: + continue + + news_list.append({ + "title": title, + "text": content + }) + + return news_list + + def _cache_news(self, news_list: List[Dict], date: str) -> None: + """缓存新闻到本地""" + cache_file = self.cache_dir / f"xinwenlianbo_{date}.json" + + try: + with open(cache_file, 'w', encoding='utf-8') as f: + json.dump({ + "date": date, + "timestamp": datetime.now().isoformat(), + "count": len(news_list), + "news": news_list + }, f, ensure_ascii=False, indent=2) + + print(f"[Crawler] 已缓存到 {cache_file}") + + except Exception as e: + print(f"[Crawler] 缓存失败: {e}") + + def _load_from_cache(self, date: str) -> Optional[List[Dict]]: + """从缓存加载新闻""" + cache_file = self.cache_dir / f"xinwenlianbo_{date}.json" + + if not cache_file.exists(): + return None + + try: + with open(cache_file, 'r', encoding='utf-8') as f: + data = json.load(f) + + return data.get('news', []) + + except Exception as e: + print(f"[Crawler] 读取缓存失败: {e}") + return None + + def crawl_sync( + self, + date: Optional[str] = None + ) -> List[Dict[str, str]]: + """ + 同步爬取接口(主要方法) + + Args: + date: 日期字符串 (YYYYMMDD) + + Returns: + 新闻列表 + """ + return self.crawl_xinwenlianbo(date) + + def crawl_date_range( + self, + start_date: str, + end_date: str + ) -> Dict[str, List[Dict]]: + """ + 批量爬取日期范围内的新闻 + + Args: + start_date: 开始日期 (YYYYMMDD) + end_date: 结束日期 (YYYYMMDD) + + Returns: + {date: news_list} 字典 + """ + from datetime import datetime, timedelta + + start = datetime.strptime(start_date, "%Y%m%d") + end = datetime.strptime(end_date, "%Y%m%d") + + results = {} + current = start + + while current <= end: + date_str = current.strftime("%Y%m%d") + print(f"\n[Crawler] 处理 {date_str}...") + + news_list = self.crawl_sync(date_str) + results[date_str] = news_list + + current += timedelta(days=1) + + # 礼貌延迟,避免频繁请求 + time.sleep(2) + + return results + + +# ==================== 测试代码 ==================== + +def test_sync(): + """同步测试(用于命令行直接运行)""" + print("=" * 50) + print("Skill F: Crawler 新闻联播爬虫测试") + print("=" * 50) + + crawler = XinwenLianboCrawler() + + # 同步爬取今日新闻 + print("\n测试 1: 爬取今日新闻") + news_list = crawler.crawl_sync() + + print(f"\n获取 {len(news_list)} 条新闻") + + if news_list: + print("\n前 3 条新闻预览:") + for i, news in enumerate(news_list[:3], 1): + print(f"\n[{i}] {news['title']}") + print(f" {news['text'][:150]}...") + + # 测试指定日期 + print("\n" + "=" * 50) + print("测试 2: 爬取指定日期 (20251229)") + specific_news = crawler.crawl_sync(date="20251229") + print(f"获取 {len(specific_news)} 条新闻") + + print("\n✅ Crawler 模块测试完成") + return news_list + + +if __name__ == "__main__": + test_sync() diff --git a/skills/pm.py b/skills/pm.py new file mode 100644 index 0000000..e045953 --- /dev/null +++ b/skills/pm.py @@ -0,0 +1,682 @@ +""" +Skill D: PM (基金经理) +======================== +职能:计算信任指数、风控、生成订单 + +输入:World_Book + Market_Data_JSON +输出:Trade_Orders (买卖指令列表) + +设计原则: +- 核心决策通过量化模型完成(向量点积) +- 使用 LLM 提供辅助分析和投资建议 +- 支持 LLM 失败时的优雅降级 +""" + +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from core.world_book import WorldBook +from core.config import config, llm_call +from typing import Dict, List, Optional, Tuple +from datetime import datetime +import json +import re + + +class PortfolioManager: + """ + 基金经理 - 负责投资决策和订单生成 + + 核心职责: + 1. 计算 Trust Index(叙事 × 资金流 交叉验证) + 2. 使用 LLM 提供投资洞察和风险提示 + 3. 风险控制(仓位管理、止损) + 4. 生成具体的买卖订单 + """ + + # LLM 投资分析提示词 + INVESTMENT_ANALYSIS_PROMPT = """你是一位专业的 A 股 ETF 基金经理,擅长根据宏观环境和市场数据做出投资决策。 + +你的核心任务是: +1. 分析当前宏观环境对各板块的影响 +2. 评估投资机会的风险收益比 +3. 给出具体的投资建议和理由 +4. 提示潜在的风险因素 + +请严格按照 JSON 格式返回分析结果。""" + + def __init__( + self, + world_book: WorldBook, + total_capital: float = 1000000.0, # 默认100万 + max_position_pct: float = 0.15, # 单个 ETF 最大仓位15% + min_trust_score: float = 60.0, # 最低信任指数阈值 + use_llm: bool = True + ): + """ + Args: + world_book: WorldBook 实例 + total_capital: 总资金(元) + max_position_pct: 单个 ETF 最大仓位比例 + min_trust_score: 最低信任指数(低于此值不开仓) + use_llm: 是否使用 LLM 进行辅助分析 + """ + self.wb = world_book + self.total_capital = total_capital + self.max_position_pct = max_position_pct + self.min_trust_score = min_trust_score + self.use_llm = use_llm + + # 加载资产映射表 + self.asset_map = self._load_asset_map() + + # 当前持仓(简化版,实际应从数据库读取) + self.current_positions: Dict[str, Dict] = {} + + if self.use_llm: + print("[PM] ✅ LLM 辅助分析启用 - 将提供智能投资洞察") + else: + print("[PM] ⚠️ 纯量化模式 - 仅使用数学模型") + + def _load_asset_map(self) -> Dict: + """加载资产映射表""" + from pathlib import Path + asset_map_path = Path("core") / "asset_map.json" + if not asset_map_path.exists(): + print("[PM] 警告: 未找到 asset_map.json") + return {} + + with open(asset_map_path, 'r', encoding='utf-8') as f: + return json.load(f) + + def calculate_macro_sensitivity_score(self, asset_id: str) -> float: + """ + 使用向量点积计算资产的宏观敏感度得分 + + 这是新的核心计算方法,替代传统的手工评分。 + + 公式: + Score = Σ (macro_factor_value[i] × sensitivity[i]) + + 示例: + - 宏观环境向量:{"interest_rate_down": 1.0, "geopolitics_tension": 0.5, "policy_digital_economy": 1.0} + - 软件ETF敏感度:{"policy_digital_economy": 1.0, "interest_rate_down": 0.7} + - 计算:(1.0 × 1.0) + (1.0 × 0.7) + (0.5 × 0) = 1.7分 + + Args: + asset_id: 资产ID(例如 "tech_software") + + Returns: + 宏观敏感度得分(可能为负,表示利空) + """ + # 获取资产的敏感度矩阵 + asset_data = self.asset_map.get("assets", {}).get(asset_id, {}) + sensitivity_matrix = asset_data.get("sensitivity", {}) + + if not sensitivity_matrix: + print(f"[PM] 警告: {asset_id} 没有敏感度数据") + return 0.0 + + # 获取当前宏观因子向量 + macro_vector = self.wb.macro_factor_vector + + if not macro_vector: + print("[PM] 警告: 宏观因子向量为空,无法计算") + return 0.0 + + # 向量点积计算 + score = 0.0 + matched_factors = [] + + for factor_name, sensitivity_value in sensitivity_matrix.items(): + macro_value = macro_vector.get(factor_name, 0.0) + + if macro_value != 0: + contribution = macro_value * sensitivity_value + score += contribution + matched_factors.append(f"{factor_name}({macro_value}×{sensitivity_value}={contribution:.2f})") + + print(f"[PM] {asset_id} 宏观得分: {score:.2f} | 因子: {', '.join(matched_factors) if matched_factors else '无匹配'}") + + return round(score, 2) + + def calculate_trust_index( + self, + etf_code: str, + asset_id: str, + narrative_score: float, + flow_score: float + ) -> Dict: + """ + 计算 Trust Index(信任指数)- 新版本 + + 公式(升级版): + TrustIndex = (MacroScore × 50 + Narrative × 0.3 + Flow × 0.2) - Penalty + + 其中: + - MacroScore: 宏观敏感度得分(向量点积) + - Narrative: 叙事强度 (0-100) + - Flow: 资金流评分 (0-100) + + 一票否决规则: + - 如果 MacroScore < -1.0,Penalty = 100(宏观环境严重利空) + + Args: + etf_code: ETF 代码 + asset_id: 资产ID + narrative_score: 叙事评分 (0-100) + flow_score: 资金流评分 (0-100) + + Returns: + Trust Index 结果字典 + """ + # 1. 计算宏观敏感度得分 + macro_score = self.calculate_macro_sensitivity_score(asset_id) + + # 2. 归一化到 0-100 范围(假设 macro_score 在 -2 到 +3 之间) + normalized_macro = max(0, min(100, (macro_score + 2) / 5 * 100)) + + # 3. 加权计算基础得分 + base_score = normalized_macro * 0.5 + narrative_score * 0.3 + flow_score * 0.2 + + # 4. 一票否决:宏观环境严重利空 + penalty = 0 + if macro_score < -1.0: + penalty = 100 + verdict = "reject" + else: + # 正常评判 + trust_index = base_score - penalty + + if trust_index >= self.min_trust_score: + verdict = "buy" + elif trust_index >= 40: + verdict = "hold" + else: + verdict = "sell" + + final_trust_index = max(0, base_score - penalty) + + return { + "code": etf_code, + "asset_id": asset_id, + "trust_index": round(final_trust_index, 2), + "macro_score": macro_score, + "normalized_macro": round(normalized_macro, 2), + "narrative_score": round(narrative_score, 2), + "flow_score": round(flow_score, 2), + "penalty": penalty, + "verdict": verdict, + "timestamp": datetime.now().isoformat() + } + + def batch_calculate_trust_index( + self, + market_data: Dict[str, Dict] + ) -> List[Dict]: + """ + 批量计算所有 ETF 的 Trust Index - 新版本 + + 逻辑: + 1. 遍历所有资产 + 2. 对每个资产下的 ETF 计算宏观敏感度得分 + 3. 结合叙事和资金流计算最终 Trust Index + + Args: + market_data: Quant.batch_analyze() 的输出 + + Returns: + Trust Index 列表,按评分排序 + """ + results = [] + + print(f"[PM] 计算 Trust Index(使用向量点积方法)...") + + # 遍历所有资产 + for asset_id, asset_data in self.asset_map.get("assets", {}).items(): + etf_list = asset_data.get("etfs", []) + + for etf_code in etf_list: + # 跳过没有市场数据的 ETF + if etf_code not in market_data: + continue + + # 获取叙事评分 + narratives = self.wb.get_narratives_by_etf(etf_code) + + if narratives: + # 取最高权重的叙事 + narrative_score = max(n.current_weight for n in narratives) + else: + narrative_score = 0 + + # 获取资金流评分 + flow_score = market_data[etf_code].get("flow_analysis", {}).get("flow_score", 0) + + # 计算 Trust Index(使用向量点积) + trust_result = self.calculate_trust_index( + etf_code, + asset_id, + narrative_score, + flow_score + ) + + results.append(trust_result) + + # 按 Trust Index 排序 + results.sort(key=lambda x: x['trust_index'], reverse=True) + + print(f"[PM] Trust Index 计算完成,共 {len(results)} 个标的") + + # 打印 Top 5 + print("\n[PM] Top 5 标的:") + for i, result in enumerate(results[:5], 1): + asset_name = self.asset_map.get("assets", {}).get(result['asset_id'], {}).get("name", "未知") + print(f" {i}. {result['code']} ({asset_name}) - TrustIndex: {result['trust_index']} " + f"(Macro: {result['macro_score']:.2f}, Narrative: {result['narrative_score']:.1f}, " + f"Flow: {result['flow_score']:.1f})") + + # 使用 LLM 提供投资洞察 + if self.use_llm and results: + self._provide_llm_insights(results[:10]) + + return results + + def _provide_llm_insights(self, top_results: List[Dict]) -> None: + """使用 LLM 提供投资洞察""" + try: + # 准备分析数据 + opportunities = [] + for r in top_results: + asset_name = self.asset_map.get("assets", {}).get(r['asset_id'], {}).get("name", r['asset_id']) + opportunities.append({ + "code": r['code'], + "name": asset_name, + "trust_index": r['trust_index'], + "macro_score": r['macro_score'], + "narrative_score": r['narrative_score'], + "flow_score": r['flow_score'], + "verdict": r['verdict'] + }) + + # 获取当前宏观状态 + macro_state = self.wb.macro_cycle.to_dict() + macro_factors = list(self.wb.macro_factor_vector.keys())[:5] + + prompt = f"""请分析以下 ETF 投资机会并给出简要建议: + +【当前宏观环境】 +- 市场周期: {macro_state['status']} +- 流动性: {macro_state['liquidity']} +- 政策风向: {macro_state['policy_wind']} +- 活跃因子: {', '.join(macro_factors) if macro_factors else '无'} + +【Top 投资机会】 +{json.dumps(opportunities, ensure_ascii=False, indent=2)} + +请以 JSON 格式返回: +{{ + "market_view": "当前市场整体观点(30字内)", + "top_pick": "最推荐的标的代码", + "top_pick_reason": "推荐理由(50字内)", + "risk_warning": "主要风险提示(30字内)", + "position_advice": "仓位建议(如:谨慎/标准/积极)" +}}""" + + llm_output = llm_call( + messages=[ + {"role": "system", "content": self.INVESTMENT_ANALYSIS_PROMPT}, + {"role": "user", "content": prompt} + ], + temperature=0.3, + max_tokens=400 + ) + + if llm_output: + # 清理 JSON + llm_output = llm_output.strip() + if llm_output.startswith("```"): + llm_output = re.sub(r'^```(?:json)?\s*', '', llm_output) + llm_output = re.sub(r'\s*```$', '', llm_output) + + insights = json.loads(llm_output) + + print("\n" + "=" * 50) + print("🤖 AI 投资洞察:") + print("=" * 50) + print(f"📊 市场观点: {insights.get('market_view', '')}") + print(f"⭐ 最优选择: {insights.get('top_pick', '')} - {insights.get('top_pick_reason', '')}") + print(f"⚠️ 风险提示: {insights.get('risk_warning', '')}") + print(f"💰 仓位建议: {insights.get('position_advice', '')}") + print("=" * 50) + + except Exception as e: + print(f"[PM] LLM 洞察生成失败: {e}") + + def generate_trade_orders( + self, + trust_results: List[Dict], + market_data: Dict[str, Dict] + ) -> List[Dict]: + """ + 生成交易订单 + + 策略: + 1. 买入信号:Trust Index >= 60 且 verdict = "buy" + 2. 卖出信号:verdict = "sell" 或 "reject" + 3. 仓位计算:Trust Index 越高,仓位越大 + + Args: + trust_results: batch_calculate_trust_index() 的输出 + market_data: 市场数据(用于获取价格) + + Returns: + 订单列表 + """ + orders = [] + + print(f"[PM] 生成交易订单...") + + # 可用资金(假设当前空仓) + available_capital = self._calculate_available_capital() + + for trust_result in trust_results: + etf_code = trust_result['code'] + verdict = trust_result['verdict'] + trust_index = trust_result['trust_index'] + + # 获取当前价格 + realtime_data = market_data.get(etf_code, {}).get("realtime") + if not realtime_data: + continue + + price = realtime_data['price'] + + # 判断操作 + if verdict == "buy" and trust_index >= self.min_trust_score: + order = self._create_buy_order( + etf_code, + trust_index, + price, + available_capital + ) + if order: + orders.append(order) + available_capital -= order['amount'] + + elif verdict in ["sell", "reject"] and etf_code in self.current_positions: + order = self._create_sell_order(etf_code, price) + if order: + orders.append(order) + + print(f"[PM] 生成 {len(orders)} 条订单") + return orders + + def _create_buy_order( + self, + etf_code: str, + trust_index: float, + price: float, + available_capital: float + ) -> Optional[Dict]: + """ + 创建买入订单 + + 仓位计算: + position_pct = min(trust_index / 100 * max_position_pct, max_position_pct) + """ + # 已持仓则跳过 + if etf_code in self.current_positions: + return None + + # 计算目标仓位 + position_pct = min( + (trust_index / 100) * self.max_position_pct, + self.max_position_pct + ) + + target_amount = self.total_capital * position_pct + + # 检查可用资金 + if target_amount > available_capital: + target_amount = available_capital * 0.8 # 保留20%缓冲 + + if target_amount < 1000: # 最小交易金额 + return None + + shares = int(target_amount / price / 100) * 100 # 向下取整到100股 + actual_amount = shares * price + + return { + "action": "buy", + "code": etf_code, + "price": price, + "shares": shares, + "amount": round(actual_amount, 2), + "position_pct": round(actual_amount / self.total_capital * 100, 2), + "trust_index": trust_index, + "reason": f"Trust Index {trust_index:.1f},叙事与资金共振", + "timestamp": datetime.now().isoformat() + } + + def _create_sell_order( + self, + etf_code: str, + price: float + ) -> Optional[Dict]: + """创建卖出订单""" + if etf_code not in self.current_positions: + return None + + position = self.current_positions[etf_code] + shares = position['shares'] + amount = shares * price + + return { + "action": "sell", + "code": etf_code, + "price": price, + "shares": shares, + "amount": round(amount, 2), + "reason": "Trust Index 下降或一票否决触发", + "timestamp": datetime.now().isoformat() + } + + def _calculate_available_capital(self) -> float: + """计算可用资金""" + # 简化版:假设当前空仓 + # 实际应该是:总资金 - 已用资金 + return self.total_capital + + def generate_portfolio_report( + self, + trust_results: List[Dict], + orders: List[Dict] + ) -> Dict: + """ + 生成投资组合报告 + + Args: + trust_results: Trust Index 结果 + orders: 交易订单 + + Returns: + 报告字典 + """ + # 统计 verdict 分布 + verdict_counts = {} + for result in trust_results: + verdict = result['verdict'] + verdict_counts[verdict] = verdict_counts.get(verdict, 0) + 1 + + # 统计订单金额 + total_buy_amount = sum( + order['amount'] for order in orders if order['action'] == 'buy' + ) + total_sell_amount = sum( + order['amount'] for order in orders if order['action'] == 'sell' + ) + + # Top 5 高信任度 ETF + top_5 = trust_results[:5] + + report = { + "timestamp": datetime.now().isoformat(), + "total_capital": self.total_capital, + "analysis": { + "total_etfs": len(trust_results), + "verdict_distribution": verdict_counts, + "avg_trust_index": round( + sum(r['trust_index'] for r in trust_results) / len(trust_results), 2 + ) if trust_results else 0 + }, + "orders": { + "total_orders": len(orders), + "buy_orders": len([o for o in orders if o['action'] == 'buy']), + "sell_orders": len([o for o in orders if o['action'] == 'sell']), + "total_buy_amount": round(total_buy_amount, 2), + "total_sell_amount": round(total_sell_amount, 2) + }, + "top_opportunities": [ + { + "code": r['code'], + "trust_index": r['trust_index'], + "verdict": r['verdict'] + } + for r in top_5 + ], + "risk_control": { + "capital_utilization": round(total_buy_amount / self.total_capital * 100, 2), + "max_single_position": self.max_position_pct * 100, + "min_trust_threshold": self.min_trust_score + } + } + + return report + + def apply_risk_control( + self, + orders: List[Dict] + ) -> List[Dict]: + """ + 风控检查 + + 规则: + 1. 单个 ETF 不超过最大仓位 + 2. 总仓位不超过90%(保留10%现金) + 3. 同板块 ETF 合计不超过30% + + Args: + orders: 原始订单列表 + + Returns: + 风控后的订单列表 + """ + filtered_orders = [] + total_amount = 0 + + for order in orders: + if order['action'] == 'buy': + # 检查单仓位 + if order['position_pct'] > self.max_position_pct * 100: + print(f"[PM] 风控拒绝: {order['code']} 仓位过大") + continue + + # 检查总仓位 + new_total = total_amount + order['amount'] + if new_total / self.total_capital > 0.9: + print(f"[PM] 风控拒绝: {order['code']} 总仓位将超90%") + continue + + total_amount = new_total + + filtered_orders.append(order) + + print(f"[PM] 风控完成,通过 {len(filtered_orders)}/{len(orders)} 条订单") + return filtered_orders + + +# ==================== 工具函数 ==================== + +def save_orders_to_file(orders: List[Dict], filename: str = "trade_orders.json") -> None: + """保存订单到文件""" + output_path = Path("data") / filename + + with open(output_path, 'w', encoding='utf-8') as f: + json.dump({ + "timestamp": datetime.now().isoformat(), + "orders": orders + }, f, ensure_ascii=False, indent=2) + + print(f"[PM] 订单已保存到 {output_path}") + + +# ==================== 测试代码 ==================== + +if __name__ == "__main__": + print("=" * 50) + print("Skill D: PM 基金经理测试") + print("=" * 50) + + # 创建 WorldBook 和 PM + wb = WorldBook(data_dir="data") + pm = PortfolioManager(wb, total_capital=1000000) + + # 模拟数据:添加测试叙事 + from core.world_book import Narrative, create_narrative_id + + narrative1 = Narrative( + id=create_narrative_id("AI算力"), + topic="AI算力", + related_etfs=["515980"], + lifecycle_stage="realization", + base_score=88.0, + current_weight=88.0 + ) + wb.add_narrative(narrative1) + + # 模拟 Quant 输出的市场数据 + mock_market_data = { + "515980": { + "flow_analysis": {"flow_score": 72.5}, + "realtime": {"price": 1.25, "name": "AI算力ETF"} + }, + "512480": { + "flow_analysis": {"flow_score": 45.0}, + "realtime": {"price": 2.10, "name": "半导体ETF"} + } + } + + # 1. 批量计算 Trust Index + print("\n1. 计算 Trust Index:") + trust_results = pm.batch_calculate_trust_index(mock_market_data) + + for result in trust_results: + print(f" {result['code']}: Trust={result['trust_index']}, " + f"叙事={result['narrative_score']}, 资金={result['flow_score']}, " + f"判定={result['verdict']}") + + # 2. 生成交易订单 + print("\n2. 生成交易订单:") + orders = pm.generate_trade_orders(trust_results, mock_market_data) + + for order in orders: + print(f" {order['action'].upper()} {order['code']}: " + f"{order['shares']}股 @ ¥{order['price']}, " + f"金额 ¥{order['amount']:,.0f}") + + # 3. 风控检查 + print("\n3. 风控检查:") + safe_orders = pm.apply_risk_control(orders) + + # 4. 生成投资组合报告 + print("\n4. 投资组合报告:") + report = pm.generate_portfolio_report(trust_results, safe_orders) + print(json.dumps(report, ensure_ascii=False, indent=2)) + + # 5. 保存订单 + if safe_orders: + save_orders_to_file(safe_orders) + + print("\n✅ PM 模块测试完成") diff --git a/skills/quant.py b/skills/quant.py new file mode 100644 index 0000000..157c7a4 --- /dev/null +++ b/skills/quant.py @@ -0,0 +1,406 @@ +""" +Skill B: Quant (量化研究员) +============================ +职能:行情分析、资金流计算、异常检测 + +输入:ETF 代码列表 +输出:Market_Data_JSON (资金流评分、技术指标、异常信号) +""" + +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from core.config import config, llm_call +import akshare as ak +import pandas as pd +import numpy as np +from datetime import datetime, timedelta +from typing import Dict, List, Optional, Tuple +import warnings +warnings.filterwarnings('ignore') + + +class QuantAnalyzer: + """量化分析器 - 负责 ETF 行情数据获取和资金流分析""" + + def __init__(self): + self.cache = {} # 简单缓存机制 + + def get_etf_realtime_data(self, etf_code: str) -> Optional[Dict]: + """ + 获取 ETF 实时行情数据 + + Args: + etf_code: ETF 代码(如 "512980") + + Returns: + 实时行情字典 + """ + try: + # 使用 akshare 获取实时行情 + df = ak.fund_etf_spot_em() + + # 查找目标 ETF + etf_data = df[df['代码'] == etf_code] + + if etf_data.empty: + print(f"[Quant] 未找到 ETF: {etf_code}") + return None + + row = etf_data.iloc[0] + + return { + "code": etf_code, + "name": row['名称'], + "price": float(row['最新价']), + "change_pct": float(row['涨跌幅']), + "volume": float(row['成交量']), + "amount": float(row['成交额']), + "turnover_rate": float(row.get('换手率', 0)), + "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S") + } + + except Exception as e: + print(f"[Quant] 获取实时数据失败 {etf_code}: {e}") + return None + + def get_etf_historical_data( + self, + etf_code: str, + days: int = 20 + ) -> Optional[pd.DataFrame]: + """ + 获取 ETF 历史行情数据 + + Args: + etf_code: ETF 代码 + days: 获取天数(默认20个交易日) + + Returns: + 历史行情 DataFrame + """ + try: + # 计算日期范围 + end_date = datetime.now().strftime("%Y%m%d") + start_date = (datetime.now() - timedelta(days=days*2)).strftime("%Y%m%d") + + # 获取历史数据 + df = ak.fund_etf_hist_em( + symbol=etf_code, + period="daily", + start_date=start_date, + end_date=end_date, + adjust="qfq" + ) + + if df is None or df.empty: + print(f"[Quant] 无历史数据: {etf_code}") + return None + + # 重命名列 + df.rename(columns={ + '日期': 'date', + '开盘': 'open', + '收盘': 'close', + '最高': 'high', + '最低': 'low', + '成交量': 'volume', + '成交额': 'amount', + '涨跌幅': 'change_pct', + '换手率': 'turnover_rate' + }, inplace=True) + + return df.tail(days) + + except Exception as e: + print(f"[Quant] 获取历史数据失败 {etf_code}: {e}") + return None + + def calculate_fund_flow_score( + self, + etf_code: str, + window: int = 5 + ) -> Dict: + """ + 计算资金流向评分 + + 核心逻辑: + 1. 成交额放量 (amount_surge) + 2. 换手率提升 (turnover_boost) + 3. 价格与成交量背离检测 + + Args: + etf_code: ETF 代码 + window: 计算窗口(天数) + + Returns: + 资金流评分字典 + """ + df = self.get_etf_historical_data(etf_code, days=window*2) + + if df is None or len(df) < window: + return { + "code": etf_code, + "flow_score": 0, + "amount_surge": 0, + "turnover_boost": 0, + "divergence_signal": False, + "status": "insufficient_data" + } + + # 近期数据 vs 历史基准 + recent = df.tail(window) + baseline = df.head(window) + + # 1. 成交额放量评分 (0-50) + recent_avg_amount = recent['amount'].mean() + baseline_avg_amount = baseline['amount'].mean() + + if baseline_avg_amount > 0: + amount_ratio = recent_avg_amount / baseline_avg_amount + amount_surge_score = min((amount_ratio - 1) * 50, 50) + else: + amount_surge_score = 0 + + # 2. 换手率提升评分 (0-30) + recent_avg_turnover = recent['turnover_rate'].mean() + baseline_avg_turnover = baseline['turnover_rate'].mean() + + if baseline_avg_turnover > 0: + turnover_ratio = recent_avg_turnover / baseline_avg_turnover + turnover_boost_score = min((turnover_ratio - 1) * 30, 30) + else: + turnover_boost_score = 0 + + # 3. 价格与成交量背离检测 (奖励分 +20) + recent_price_change = recent['close'].iloc[-1] / recent['close'].iloc[0] - 1 + recent_volume_change = recent['volume'].iloc[-1] / recent['volume'].iloc[0] - 1 + + # 价跌量增 = 可能见底信号 + divergence_signal = (recent_price_change < -0.02 and recent_volume_change > 0.3) + divergence_bonus = 20 if divergence_signal else 0 + + # 总分计算 (0-100) + total_score = max(0, min(100, + amount_surge_score + turnover_boost_score + divergence_bonus + )) + + return { + "code": etf_code, + "flow_score": round(total_score, 2), + "amount_surge": round(amount_surge_score, 2), + "turnover_boost": round(turnover_boost_score, 2), + "divergence_signal": divergence_signal, + "recent_avg_amount": round(recent_avg_amount / 1e8, 2), # 单位:亿 + "baseline_avg_amount": round(baseline_avg_amount / 1e8, 2), + "status": "success" + } + + def detect_anomaly( + self, + etf_code: str, + sigma: float = 2.0 + ) -> Dict: + """ + 异常检测 - 识别异常放量或暴涨暴跌 + + Args: + etf_code: ETF 代码 + sigma: 标准差倍数(默认2倍) + + Returns: + 异常信号字典 + """ + df = self.get_etf_historical_data(etf_code, days=30) + + if df is None or len(df) < 10: + return {"code": etf_code, "has_anomaly": False, "reason": "insufficient_data"} + + # 计算统计基准 + mean_amount = df['amount'].mean() + std_amount = df['amount'].std() + mean_change = df['change_pct'].mean() + std_change = df['change_pct'].std() + + # 最新数据 + latest = df.iloc[-1] + + anomaly_signals = [] + + # 成交额异常 + if latest['amount'] > mean_amount + sigma * std_amount: + anomaly_signals.append("volume_surge") + + # 涨跌幅异常 + if abs(latest['change_pct']) > abs(mean_change) + sigma * std_change: + if latest['change_pct'] > 0: + anomaly_signals.append("price_spike") + else: + anomaly_signals.append("price_crash") + + return { + "code": etf_code, + "has_anomaly": len(anomaly_signals) > 0, + "signals": anomaly_signals, + "latest_amount": round(latest['amount'] / 1e8, 2), + "latest_change_pct": round(latest['change_pct'], 2), + "threshold_amount": round((mean_amount + sigma * std_amount) / 1e8, 2), + "threshold_change": round(abs(mean_change) + sigma * std_change, 2) + } + + def batch_analyze( + self, + etf_codes: List[str] + ) -> Dict[str, Dict]: + """ + 批量分析 ETF + + Args: + etf_codes: ETF 代码列表 + + Returns: + 分析结果字典 {code: result} + """ + results = {} + + print(f"[Quant] 开始批量分析 {len(etf_codes)} 个 ETF...") + + for code in etf_codes: + print(f"[Quant] 分析 {code}...") + + # 资金流评分 + flow_result = self.calculate_fund_flow_score(code) + + # 异常检测 + anomaly_result = self.detect_anomaly(code) + + # 实时行情 + realtime_data = self.get_etf_realtime_data(code) + + results[code] = { + "flow_analysis": flow_result, + "anomaly_detection": anomaly_result, + "realtime": realtime_data, + "timestamp": datetime.now().isoformat() + } + + print(f"[Quant] 批量分析完成") + return results + + def generate_market_report( + self, + analysis_results: Dict[str, Dict] + ) -> Dict: + """ + 生成市场数据报告(供其他 Agent 使用) + + Args: + analysis_results: batch_analyze 的输出 + + Returns: + 标准化的 Market_Data_JSON + """ + report = { + "timestamp": datetime.now().isoformat(), + "total_analyzed": len(analysis_results), + "etf_scores": {}, + "top_flow_etfs": [], + "anomaly_alerts": [] + } + + # 提取评分 + for code, result in analysis_results.items(): + flow_score = result.get("flow_analysis", {}).get("flow_score", 0) + report["etf_scores"][code] = flow_score + + # 异常提醒 + if result.get("anomaly_detection", {}).get("has_anomaly"): + report["anomaly_alerts"].append({ + "code": code, + "signals": result["anomaly_detection"]["signals"] + }) + + # 排序 Top 资金流 ETF + sorted_etfs = sorted( + report["etf_scores"].items(), + key=lambda x: x[1], + reverse=True + ) + report["top_flow_etfs"] = [ + {"code": code, "score": score} + for code, score in sorted_etfs[:5] + ] + + return report + + +# ==================== 工具函数 ==================== + +def get_all_etf_codes_from_asset_map() -> List[str]: + """从 asset_map.json 提取所有 ETF 代码""" + import json + from pathlib import Path + + asset_map_path = Path("core") / "asset_map.json" + + if not asset_map_path.exists(): + print("[Quant] 未找到 asset_map.json") + return [] + + with open(asset_map_path, 'r', encoding='utf-8') as f: + asset_map = json.load(f) + + etf_codes = set() + for sector_data in asset_map.get("sectors", {}).values(): + etf_codes.update(sector_data.get("etfs", [])) + + return list(etf_codes) + + +if __name__ == "__main__": + print("=" * 50) + print("Skill B: Quant 量化研究员测试") + print("=" * 50) + + # 创建分析器实例 + quant = QuantAnalyzer() + + # 测试单个 ETF + test_code = "512480" # 半导体 ETF + print(f"\n测试 ETF: {test_code}") + + # 1. 获取实时数据 + print("\n1. 实时行情:") + realtime = quant.get_etf_realtime_data(test_code) + if realtime: + print(f" {realtime['name']}: ¥{realtime['price']}, 涨跌幅: {realtime['change_pct']}%") + + # 2. 资金流评分 + print("\n2. 资金流分析:") + flow = quant.calculate_fund_flow_score(test_code) + print(f" 资金流评分: {flow['flow_score']}") + print(f" 成交额放量: {flow['amount_surge']}") + print(f" 换手率提升: {flow['turnover_boost']}") + + # 3. 异常检测 + print("\n3. 异常检测:") + anomaly = quant.detect_anomaly(test_code) + print(f" 发现异常: {anomaly['has_anomaly']}") + if anomaly['has_anomaly']: + print(f" 异常信号: {anomaly['signals']}") + + # 4. 批量分析(测试少量 ETF) + print("\n4. 批量分析测试:") + test_codes = ["512480", "515980", "159928"] # 半导体、AI、消费 + batch_results = quant.batch_analyze(test_codes) + + # 5. 生成报告 + report = quant.generate_market_report(batch_results) + print(f"\n5. 市场报告:") + print(f" 分析数量: {report['total_analyzed']}") + print(f" Top 资金流 ETF:") + for item in report['top_flow_etfs']: + print(f" {item['code']}: {item['score']:.2f}") + + print("\n✅ Quant 模块测试完成") diff --git a/skills/strategist.py b/skills/strategist.py new file mode 100644 index 0000000..f9b3fc3 --- /dev/null +++ b/skills/strategist.py @@ -0,0 +1,742 @@ +""" +Skill C: Strategist (宏观策略师) +================================= +职能:维护世界书、判断周期、更新叙事权重 + +输入:Narrative_JSON + 历史 WorldBook +输出:更新后的 World_Book + +设计原则: +- 使用 LLM 进行宏观周期判断和叙事生命周期分析 +- 结合规则引擎提供可解释的决策逻辑 +- 支持 LLM 失败时的优雅降级 +""" + +import sys +from pathlib import Path + +# 添加项目根目录到路径 +sys.path.insert(0, str(Path(__file__).parent.parent)) + +from core.world_book import WorldBook, Narrative, create_narrative_id +from core.config import config, llm_call +from typing import Dict, List, Optional +from datetime import datetime +import json + + +class MacroStrategist: + """ + 宏观策略师 - 负责 World Book 的智能维护 + + 核心职责: + 1. 接收 Analyst 的新闻分析结果 + 2. 使用 LLM 进行宏观周期和叙事生命周期判断 + 3. 智能更新或创建叙事对象 + 4. 应用时间衰减 + 5. 生成投资策略建议 + """ + + # LLM 系统提示词 + MACRO_ANALYSIS_PROMPT = """你是一位资深的 A 股宏观策略分析师,擅长根据政策新闻判断市场周期变化。 + +你的核心任务是: +1. 分析当前新闻叙事对宏观环境的影响 +2. 判断市场所处的周期阶段 +3. 识别关键的宏观驱动因子 +4. 给出策略配置建议 + +宏观周期状态: +- upward(上行):政策利好、流动性充裕、风险偏好上升 +- downward(下行):政策收紧、流动性紧张、风险偏好下降 +- neutral(中性):政策观望、流动性平稳 + +流动性状态: +- loose(宽松):降准降息、央行投放 +- tight(紧张):加息、回笼资金 +- neutral(中性) + +政策风向: +- stimulus(刺激):财政扩张、产业扶持 +- regulation(监管):整顿规范、去杠杆 +- wait_and_see(观望) + +请严格按照 JSON 格式返回分析结果。""" + + def __init__(self, world_book: WorldBook, use_llm: bool = True): + """ + Args: + world_book: WorldBook 实例 + use_llm: 是否使用 LLM 进行分析(默认 True) + """ + self.wb = world_book + self.macro_matrix = self._load_macro_matrix() + self.use_llm = use_llm + + if self.use_llm: + print("[Strategist] ✅ LLM 模式启用 - 将使用大模型进行智能策略分析") + else: + print("[Strategist] ⚠️ 降级模式 - 将使用规则引擎") + + def _load_macro_matrix(self) -> Dict: + """加载宏观逻辑矩阵""" + matrix_path = Path("core") / "macro_matrix.json" + if not matrix_path.exists(): + print("[Strategist] 警告: 未找到 macro_matrix.json") + return {} + + with open(matrix_path, 'r', encoding='utf-8') as f: + return json.load(f) + + def process_narrative_json( + self, + narrative_json: Dict + ) -> None: + """ + 处理 Analyst 输出的 Narrative_JSON + + 逻辑: + 1. 遍历所有叙事 + 2. 如果是新叙事 → 创建 + 3. 如果已存在 → 强化(boost) + 4. 更新生命周期阶段 + + Args: + narrative_json: Analyst.generate_narrative_json() 的输出 + """ + narratives = narrative_json.get('narratives', []) + print(f"[Strategist] 处理 {len(narratives)} 个叙事...") + + for narrative_data in narratives: + topic = narrative_data['topic'] + + # 检查是否已存在 + existing_narrative = self.wb.get_narrative_by_topic(topic) + + if existing_narrative: + # 已存在 → 强化 + self._boost_narrative(existing_narrative, narrative_data) + else: + # 新叙事 → 创建 + self._create_narrative(narrative_data) + + # 推导宏观因子向量 + self.infer_macro_factor_vector(narrative_json) + + print(f"[Strategist] 叙事处理完成,当前活跃叙事数: {len(self.wb.active_narratives)}") + + def _create_narrative(self, narrative_data: Dict) -> None: + """创建新叙事对象""" + topic = narrative_data['topic'] + + # 生成唯一 ID + narrative_id = create_narrative_id(topic) + + # 判断衰减系数(Level A 新闻衰减慢) + decay_factor = 0.93 if narrative_data['level_a_count'] > 0 else 0.95 + + # 创建 Narrative 对象 + narrative = Narrative( + id=narrative_id, + topic=topic, + related_etfs=narrative_data['related_etfs'], + lifecycle_stage=narrative_data['lifecycle_stage'], + base_score=narrative_data['max_score'], + decay_factor=decay_factor, + current_weight=narrative_data['avg_score'] + ) + + self.wb.add_narrative(narrative) + print(f"[Strategist] 创建新叙事: {topic} (评分: {narrative.current_weight})") + + def _boost_narrative( + self, + existing_narrative: Narrative, + narrative_data: Dict + ) -> None: + """强化已有叙事""" + topic = narrative_data['topic'] + new_score = narrative_data['avg_score'] + + # 使用 Narrative 的 boost 方法 + existing_narrative.boost(new_score) + + # 如果有 Level A 新闻,可能需要升级生命周期 + if narrative_data['level_a_count'] > 0: + if existing_narrative.lifecycle_stage == "fermentation": + existing_narrative.update_stage("realization") + + print(f"[Strategist] 强化叙事: {topic} → {existing_narrative.current_weight:.2f}") + + def daily_maintenance(self) -> Dict: + """ + 每日维护任务 + + 执行: + 1. 对所有叙事应用时间衰减 + 2. 移除衰退的叙事 + 3. 检测生命周期阶段变化 + + Returns: + 维护报告 + """ + print("[Strategist] 执行每日维护...") + + # 1. 应用衰减 + self.wb.decay_all_narratives() + + # 2. 检测阶段降级 + for narrative in self.wb.active_narratives.values(): + self._check_stage_downgrade(narrative) + + # 3. 移除低权重叙事 + removed = self.wb.remove_weak_narratives(threshold=10.0) + + # 4. 生成报告 + report = { + "timestamp": datetime.now().isoformat(), + "total_narratives": len(self.wb.active_narratives), + "removed_narratives": len(removed), + "top_narratives": [ + {"topic": n.topic, "weight": n.current_weight} + for n in self.wb.get_top_narratives(5) + ] + } + + print(f"[Strategist] 维护完成,移除 {len(removed)} 个衰退叙事") + return report + + def _check_stage_downgrade(self, narrative: Narrative) -> None: + """检测叙事是否需要降级生命周期""" + current_stage = narrative.lifecycle_stage + weight = narrative.current_weight + + # 权重低于阈值 → 进入衰退期 + if weight < 30 and current_stage != "decay": + narrative.update_stage("decay") + print(f"[Strategist] 叙事进入衰退期: {narrative.topic}") + + # realization → fermentation + elif weight < 60 and current_stage == "realization": + narrative.update_stage("fermentation") + + def detect_macro_cycle_change( + self, + narrative_json: Dict, + market_data: Optional[Dict] = None + ) -> Optional[Dict]: + """ + 检测宏观周期变化(核心方法) + + 优先使用 LLM 进行智能分析,失败时降级到规则引擎 + + Args: + narrative_json: Analyst 输出 + market_data: Quant 输出(可选) + + Returns: + 宏观周期变化建议(如有) + """ + # 检查是否有 Level A 新闻 + level_a_count = sum( + n.get('level_a_count', 0) + for n in narrative_json.get('narratives', []) + ) + + if level_a_count == 0: + print("[Strategist] 无 Level A 新闻,跳过宏观周期检测") + return None + + print(f"[Strategist] 检测到 {level_a_count} 条 Level A 新闻,分析宏观周期...") + + if self.use_llm: + result = self._detect_cycle_with_llm(narrative_json, market_data) + if result: + return result + print("[Strategist] ⚠️ LLM 分析失败,降级到规则引擎") + + # 降级到规则引擎 + return self._detect_cycle_with_rules(narrative_json) + + def _detect_cycle_with_llm( + self, + narrative_json: Dict, + market_data: Optional[Dict] + ) -> Optional[Dict]: + """使用 LLM 分析宏观周期变化""" + # 构建分析上下文 + narratives_summary = [] + for n in narrative_json.get('narratives', []): + topic_name = n.get('topic_name', n.get('topic', '')) + narratives_summary.append({ + "topic": topic_name, + "score": n.get('max_score', 0), + "level_a": n.get('level_a_count', 0) > 0, + "sentiment": n.get('overall_sentiment', 'neutral'), + "signals": n.get('key_signals', [])[:2] + }) + + # 当前宏观状态 + current_macro = self.wb.macro_cycle.to_dict() + + prompt = f"""请分析以下市场叙事信息,判断宏观周期是否发生变化: + +【当前宏观状态】 +- 市场周期: {current_macro['status']} +- 流动性: {current_macro['liquidity']} +- 政策风向: {current_macro['policy_wind']} + +【最新叙事信息】 +{json.dumps(narratives_summary, ensure_ascii=False, indent=2)} + +请分析后以 JSON 格式返回: +{{ + "cycle_changed": true/false, + "new_status": "upward/downward/neutral", + "new_liquidity": "loose/tight/neutral", + "new_policy_wind": "stimulus/regulation/wait_and_see", + "reason": "变化原因(50字内)", + "confidence": 0-100, + "key_factors": ["驱动因子1", "驱动因子2"] +}} + +如果宏观环境未发生明显变化,cycle_changed 设为 false。""" + + try: + llm_output = llm_call( + messages=[ + {"role": "system", "content": self.MACRO_ANALYSIS_PROMPT}, + {"role": "user", "content": prompt} + ], + temperature=0.3, + max_tokens=500 + ) + + if not llm_output: + return None + + # 清理并解析 JSON + import re + llm_output = llm_output.strip() + if llm_output.startswith("```"): + llm_output = re.sub(r'^```(?:json)?\s*', '', llm_output) + llm_output = re.sub(r'\s*```$', '', llm_output) + + result = json.loads(llm_output) + + if not result.get("cycle_changed", False): + print(f"[Strategist] 🤖 LLM 判断:宏观周期无明显变化") + return None + + # 验证字段 + valid_status = ["upward", "downward", "neutral"] + valid_liquidity = ["loose", "tight", "neutral"] + valid_policy = ["stimulus", "regulation", "wait_and_see"] + + new_status = result.get("new_status", "neutral") + new_liquidity = result.get("new_liquidity", "neutral") + new_policy = result.get("new_policy_wind", "wait_and_see") + + if new_status not in valid_status: + new_status = "neutral" + if new_liquidity not in valid_liquidity: + new_liquidity = "neutral" + if new_policy not in valid_policy: + new_policy = "wait_and_see" + + print(f"[Strategist] 🤖 LLM 判断:{result.get('reason', '')}") + print(f"[Strategist] 🤖 置信度: {result.get('confidence', 0)}%") + + return { + "status": new_status, + "liquidity": new_liquidity, + "policy_wind": new_policy, + "reason": result.get("reason", "LLM 分析结果"), + "key_factors": result.get("key_factors", []), + "analysis_method": "llm" + } + + except json.JSONDecodeError as e: + print(f"[Strategist] LLM 返回格式解析失败: {e}") + return None + except Exception as e: + print(f"[Strategist] LLM 分析异常: {e}") + return None + + def _detect_cycle_with_rules(self, narrative_json: Dict) -> Optional[Dict]: + """规则引擎检测宏观周期变化(降级方案)""" + # 检测关键词模式 + narratives_text = " ".join([ + n.get('topic', '') + " " + " ".join(n.get('key_signals', [])) + for n in narrative_json.get('narratives', []) + ]) + + # 货币宽松信号 + if any(kw in narratives_text for kw in ["降准", "降息", "宽松", "流动性"]): + return { + "status": "upward", + "liquidity": "loose", + "policy_wind": "stimulus", + "reason": "货币政策宽松,市场流动性改善", + "key_factors": ["降准/降息"], + "analysis_method": "rules" + } + + # 监管收紧信号 + if any(kw in narratives_text for kw in ["监管", "整顿", "规范", "收紧"]): + return { + "status": "downward", + "liquidity": "tight", + "policy_wind": "regulation", + "reason": "监管政策收紧,风险偏好下降", + "key_factors": ["监管收紧"], + "analysis_method": "rules" + } + + # 财政刺激信号 + if any(kw in narratives_text for kw in ["刺激", "万亿", "基建", "财政"]): + return { + "status": "upward", + "liquidity": "loose", + "policy_wind": "stimulus", + "reason": "财政政策积极,经济预期改善", + "key_factors": ["财政刺激"], + "analysis_method": "rules" + } + + return None + + def apply_macro_cycle_change(self, cycle_change: Dict) -> None: + """应用宏观周期变化""" + self.wb.update_macro_cycle( + status=cycle_change.get('status'), + liquidity=cycle_change.get('liquidity'), + policy_wind=cycle_change.get('policy_wind') + ) + + def infer_macro_factor_vector(self, narrative_json: Dict) -> Dict[str, float]: + """ + 根据新闻叙事推导宏观因子向量(核心方法) + + 优先使用 LLM 进行智能推导,失败时降级到规则引擎。 + + 这是核心方法,将新闻内容映射到宏观因子空间,供 PM 进行向量点积计算。 + + Args: + narrative_json: Analyst 输出的叙事 JSON + + Returns: + 宏观因子向量,例如: + { + "interest_rate_down": 1.0, + "policy_digital_economy": 0.8, + "geopolitics_tension": 0.5 + } + """ + print("[Strategist] 推导宏观因子向量...") + + if self.use_llm: + result = self._infer_factors_with_llm(narrative_json) + if result: + self.wb.update_macro_factor_vector(result) + return result + print("[Strategist] ⚠️ LLM 推导失败,降级到规则引擎") + + # 降级到规则引擎 + result = self._infer_factors_with_rules(narrative_json) + self.wb.update_macro_factor_vector(result) + return result + + def _infer_factors_with_llm(self, narrative_json: Dict) -> Optional[Dict[str, float]]: + """使用 LLM 推导宏观因子向量""" + # 准备叙事摘要 + narratives_summary = [] + for n in narrative_json.get('narratives', []): + narratives_summary.append({ + "topic": n.get('topic_name', n.get('topic', '')), + "score": n.get('max_score', 0), + "level_a": n.get('level_a_count', 0) > 0, + "sentiment": n.get('overall_sentiment', 'neutral'), + "key_signals": n.get('key_signals', []) + }) + + # 可用因子列表 + available_factors = [ + "interest_rate_down", "liquidity_easing", "cpi_rebound", + "risk_on", "risk_off", "policy_tech_self_reliance", + "policy_digital_economy", "policy_new_infra", "policy_capital_market", + "policy_soe_reform", "policy_low_altitude", "policy_food_security", + "policy_public_health", "policy_platform_economy", "consumption_stimulus", + "govt_spending", "geopolitics_tension", "currency_rmb_depreciation", + "export_growth", "fed_rate_cut", "dollar_index_down", + "inflation_expectations", "oil_price_up", "market_volume_spike", + "tech_cycle_up", "demographic_trend", "foreign_inflow" + ] + + prompt = f"""请根据以下市场叙事信息,推导当前活跃的宏观因子及其强度。 + +【叙事信息】 +{json.dumps(narratives_summary, ensure_ascii=False, indent=2)} + +【可用宏观因子】 +{', '.join(available_factors)} + +请分析新闻内容,判断哪些宏观因子被触发,并给出强度评分(0.0-1.0)。 +强度参考: +- 1.0: 政策明确、信号强烈(如央行降准公告) +- 0.7-0.9: 政策导向明确(如部委发文) +- 0.4-0.6: 信号中等(如行业动态) +- 0.1-0.3: 信号微弱(如市场传闻) + +以 JSON 格式返回(仅包含被触发的因子): +{{ + "factors": {{ + "factor_name": 0.8, + "factor_name2": 0.5 + }}, + "reasoning": "简要说明推导逻辑(50字内)" +}}""" + + try: + llm_output = llm_call( + messages=[ + {"role": "system", "content": "你是一位宏观经济分析师,擅长从新闻中提取宏观经济信号。"}, + {"role": "user", "content": prompt} + ], + temperature=0.3, + max_tokens=600 + ) + + if not llm_output: + return None + + # 清理并解析 JSON + import re + llm_output = llm_output.strip() + if llm_output.startswith("```"): + llm_output = re.sub(r'^```(?:json)?\s*', '', llm_output) + llm_output = re.sub(r'\s*```$', '', llm_output) + + result = json.loads(llm_output) + factors = result.get("factors", {}) + reasoning = result.get("reasoning", "") + + # 验证因子名称和强度 + valid_factors = {} + for factor_name, strength in factors.items(): + if factor_name in available_factors: + strength = float(strength) + strength = max(0.0, min(1.0, strength)) + valid_factors[factor_name] = round(strength, 2) + + if valid_factors: + print(f"[Strategist] 🤖 LLM 推导出 {len(valid_factors)} 个宏观因子") + print(f"[Strategist] 🤖 推导逻辑: {reasoning}") + for factor, strength in sorted(valid_factors.items(), key=lambda x: -x[1])[:5]: + print(f" 📊 {factor}: {strength}") + + return valid_factors + + except json.JSONDecodeError as e: + print(f"[Strategist] LLM 返回格式解析失败: {e}") + return None + except Exception as e: + print(f"[Strategist] LLM 推导异常: {e}") + return None + + def _infer_factors_with_rules(self, narrative_json: Dict) -> Dict[str, float]: + """规则引擎推导宏观因子向量(降级方案)""" + factor_vector: Dict[str, float] = {} + + # 宏观因子关键词映射表 + factor_keyword_map = { + "interest_rate_down": ["降息", "降准", "MLF下调", "LPR下调", "利率下行"], + "liquidity_easing": ["流动性", "宽松", "货币政策", "注入流动性"], + "cpi_rebound": ["CPI", "通胀", "物价上涨", "消费复苏"], + "risk_on": ["牛市", "突破", "成交量放大", "风险偏好"], + "risk_off": ["暴跌", "恐慌", "避险", "黑天鹅"], + "policy_tech_self_reliance": ["自主可控", "国产替代", "卡脖子", "芯片", "半导体"], + "policy_digital_economy": ["数字经济", "数据要素", "信创", "AI", "算力"], + "policy_new_infra": ["新基建", "5G", "6G", "东数西算", "算力底座"], + "policy_capital_market": ["资本市场", "印花税", "T+0", "金融强国"], + "policy_soe_reform": ["国企改革", "中特估", "红利"], + "policy_low_altitude": ["低空经济", "无人机", "eVTOL"], + "policy_food_security": ["粮食安全", "一号文件", "种业"], + "policy_public_health": ["医疗", "医保", "创新药", "老龄化"], + "consumption_stimulus": ["消费券", "汽车下乡", "家电补贴", "内需"], + "govt_spending": ["财政支出", "专项债", "万亿", "基建投资"], + "geopolitics_tension": ["地缘", "制裁", "冲突", "台海", "中美"], + "export_growth": ["出口", "外贸", "订单", "海外需求"], + "market_volume_spike": ["成交量", "放量", "天量"], + } + + # 收集所有文本 + all_text = "" + level_a_count = 0 + level_b_count = 0 + + for narrative in narrative_json.get('narratives', []): + all_text += f" {narrative.get('topic', '')} " + all_text += " ".join(narrative.get('key_signals', [])) + level_a_count += narrative.get('level_a_count', 0) + level_b_count += narrative.get('level_b_count', 0) + + # 扫描关键词 + for factor_name, keywords in factor_keyword_map.items(): + match_count = sum(1 for kw in keywords if kw in all_text) + + if match_count > 0: + if level_a_count > 0: + strength = min(1.0, 0.8 + match_count * 0.1) + elif level_b_count > 0: + strength = min(0.9, 0.5 + match_count * 0.1) + else: + strength = min(0.6, 0.3 + match_count * 0.1) + + factor_vector[factor_name] = round(strength, 2) + + print(f"[Strategist] 📏 规则引擎推导出 {len(factor_vector)} 个宏观因子") + return factor_vector + + def calculate_macro_impact_on_etf( + self, + etf_code: str, + macro_factor: str + ) -> float: + """ + 计算宏观因子对特定 ETF 的影响 + + Args: + etf_code: ETF 代码 + macro_factor: 宏观因子名称(如 "rate_cut") + + Returns: + 影响值 (-10 到 +10) + """ + # 从宏观矩阵查询 + factor_data = self.macro_matrix.get("macro_factors", {}).get(macro_factor, {}) + impact_map = factor_data.get("impact", {}) + + # 查找 ETF 对应的类别 + etf_mapping = self.macro_matrix.get("etf_mapping", {}) + + for etf_type, codes in etf_mapping.items(): + if etf_code in codes: + return impact_map.get(etf_type, 0) + + return 0 + + def generate_world_book_snapshot(self) -> Dict: + """生成 World Book 快照(供 PM 使用)""" + snapshot = self.wb.export_snapshot() + + # 添加策略师视角的元数据 + snapshot["strategist_view"] = { + "strong_narratives": [ + n.topic for n in self.wb.get_top_narratives(3) + ], + "macro_recommendation": self._get_macro_recommendation(), + "risk_level": self._assess_risk_level() + } + + return snapshot + + def _get_macro_recommendation(self) -> str: + """生成宏观建议""" + cycle = self.wb.macro_cycle + + if cycle.status == "upward" and cycle.liquidity == "loose": + return "aggressive" # 激进配置 + elif cycle.status == "downward" or cycle.policy_wind == "regulation": + return "defensive" # 防守配置 + else: + return "balanced" # 平衡配置 + + def _assess_risk_level(self) -> str: + """评估市场风险等级""" + # 基于叙事数量和宏观状态 + narrative_count = len(self.wb.active_narratives) + cycle = self.wb.macro_cycle + + if cycle.status == "downward" or narrative_count < 3: + return "high" + elif cycle.status == "upward" and narrative_count >= 5: + return "low" + else: + return "medium" + + +# ==================== 测试代码 ==================== + +if __name__ == "__main__": + print("=" * 50) + print("Skill C: Strategist 宏观策略师测试") + print("=" * 50) + + # 创建 WorldBook 和 Strategist + wb = WorldBook(data_dir="data") + strategist = MacroStrategist(wb) + + # 模拟 Analyst 输出 + mock_narrative_json = { + "timestamp": datetime.now().isoformat(), + "total_news": 3, + "narratives": [ + { + "topic": "低空经济", + "news_count": 2, + "avg_score": 82.0, + "max_score": 90.0, + "level_a_count": 0, + "related_etfs": ["512980", "159969"], + "lifecycle_stage": "fermentation" + }, + { + "topic": "AI算力", + "news_count": 1, + "avg_score": 75.0, + "max_score": 75.0, + "level_a_count": 0, + "related_etfs": ["515980", "159813"], + "lifecycle_stage": "fermentation" + } + ] + } + + # 1. 处理叙事 JSON + print("\n1. 处理叙事 JSON:") + strategist.process_narrative_json(mock_narrative_json) + + # 2. 显示当前 World Book 状态 + print("\n2. 当前 World Book 状态:") + print(f" 活跃叙事数: {len(wb.active_narratives)}") + print(f" 宏观周期: {wb.macro_cycle.to_dict()}") + + # 3. 执行每日维护 + print("\n3. 执行每日维护:") + maintenance_report = strategist.daily_maintenance() + print(f" 维护报告: {json.dumps(maintenance_report, ensure_ascii=False, indent=2)}") + + # 4. 检测宏观周期变化(模拟降准新闻) + print("\n4. 检测宏观周期变化:") + mock_news_with_policy = { + "narratives": [ + {"topic": "央行降准", "level_a_count": 1, "avg_score": 95.0} + ] + } + + cycle_change = strategist.detect_macro_cycle_change(mock_news_with_policy) + if cycle_change: + print(f" 检测到周期变化: {cycle_change['reason']}") + strategist.apply_macro_cycle_change(cycle_change) + + # 5. 生成快照 + print("\n5. 生成 World Book 快照:") + snapshot = strategist.generate_world_book_snapshot() + print(f" 策略建议: {snapshot['strategist_view']['macro_recommendation']}") + print(f" 风险等级: {snapshot['strategist_view']['risk_level']}") + + # 6. 保存 World Book + wb.save() + + print("\n✅ Strategist 模块测试完成") diff --git a/test_vector_calculation.py b/test_vector_calculation.py new file mode 100644 index 0000000..1d4797b --- /dev/null +++ b/test_vector_calculation.py @@ -0,0 +1,199 @@ +""" +测试向量点积计算逻辑 +====================== +验证 Strategist 推导的宏观因子向量与 PM 的敏感度矩阵计算是否正确 +""" + +import sys +from pathlib import Path +sys.path.insert(0, str(Path(__file__).parent)) + +from core.world_book import WorldBook +from skills.strategist import MacroStrategist +from skills.pm import PortfolioManager + + +def test_macro_vector_calculation(): + """测试宏观因子向量计算""" + print("=" * 60) + print("测试 1: 宏观因子向量推导") + print("=" * 60) + + # 创建 WorldBook + wb = WorldBook(data_dir="data/test") + + # 创建 Strategist + strategist = MacroStrategist(wb) + + # 模拟新闻分析结果 + narrative_json = { + "narratives": [ + { + "topic": "央行宣布全面降息,支持数字经济发展", + "level_a_count": 1, + "level_b_count": 0, + "avg_score": 85, + "max_score": 90, + "related_etfs": ["515230"], + "lifecycle_stage": "fermentation" + }, + { + "topic": "地缘紧张局势升级,避险情绪升温", + "level_a_count": 0, + "level_b_count": 1, + "avg_score": 70, + "max_score": 75, + "related_etfs": ["518880"], + "lifecycle_stage": "incubation" + } + ] + } + + # 推导宏观因子向量 + print("\n输入新闻:") + for n in narrative_json['narratives']: + print(f" - {n['topic']} (Level A: {n['level_a_count']}, Level B: {n['level_b_count']})") + + print("\n推导宏观因子向量:") + factor_vector = strategist.infer_macro_factor_vector(narrative_json) + + print(f"\n结果: {factor_vector}") + + return wb, factor_vector + + +def test_sensitivity_calculation(wb: WorldBook, factor_vector: dict): + """测试敏感度矩阵计算""" + print("\n" + "=" * 60) + print("测试 2: 向量点积计算") + print("=" * 60) + + # 创建 PM + pm = PortfolioManager(wb, total_capital=1000000) + + # 测试案例 1: 软件ETF + print("\n案例 1: 软件与数字经济 (tech_software)") + print("-" * 60) + macro_score_software = pm.calculate_macro_sensitivity_score("tech_software") + print(f"最终得分: {macro_score_software}") + + # 测试案例 2: 银行ETF + print("\n案例 2: 银行/高股息 (finance_bank)") + print("-" * 60) + macro_score_bank = pm.calculate_macro_sensitivity_score("finance_bank") + print(f"最终得分: {macro_score_bank}") + + # 测试案例 3: 黄金ETF + print("\n案例 3: 资源与黄金 (resources_gold)") + print("-" * 60) + macro_score_gold = pm.calculate_macro_sensitivity_score("resources_gold") + print(f"最终得分: {macro_score_gold}") + + # 对比分析 + print("\n" + "=" * 60) + print("对比分析") + print("=" * 60) + print(f"软件ETF得分: {macro_score_software}") + print(f"银行ETF得分: {macro_score_bank}") + print(f"黄金ETF得分: {macro_score_gold}") + + print("\n结论:") + if macro_score_software > macro_score_bank: + print("✓ 软件ETF > 银行ETF (符合预期: 降息利好成长股,利空银行息差)") + + if macro_score_gold > 0: + print("✓ 黄金ETF得分为正 (符合预期: 地缘紧张利好黄金避险)") + + +def test_trust_index_calculation(wb: WorldBook): + """测试完整的 Trust Index 计算""" + print("\n" + "=" * 60) + print("测试 3: Trust Index 计算") + print("=" * 60) + + pm = PortfolioManager(wb, total_capital=1000000) + + # 模拟叙事评分和资金流评分 + test_cases = [ + { + "etf_code": "515230", + "asset_id": "tech_software", + "name": "软件ETF", + "narrative_score": 85, + "flow_score": 70 + }, + { + "etf_code": "512800", + "asset_id": "finance_bank", + "name": "银行ETF", + "narrative_score": 50, + "flow_score": 60 + }, + { + "etf_code": "518880", + "asset_id": "resources_gold", + "name": "黄金ETF", + "narrative_score": 70, + "flow_score": 55 + } + ] + + print("\n计算各标的 Trust Index:") + results = [] + + for case in test_cases: + print(f"\n{case['name']} ({case['etf_code']}):") + print(f" 叙事评分: {case['narrative_score']}") + print(f" 资金流评分: {case['flow_score']}") + + trust_result = pm.calculate_trust_index( + case['etf_code'], + case['asset_id'], + case['narrative_score'], + case['flow_score'] + ) + + results.append({**case, **trust_result}) + + print(f" → Trust Index: {trust_result['trust_index']} ({trust_result['verdict']})") + + # 排序 + results.sort(key=lambda x: x['trust_index'], reverse=True) + + print("\n" + "=" * 60) + print("最终排名") + print("=" * 60) + for i, result in enumerate(results, 1): + print(f"{i}. {result['name']} - Trust Index: {result['trust_index']} " + f"(Macro: {result['macro_score']:.2f}, Narrative: {result['narrative_score']}, " + f"Flow: {result['flow_score']}) → {result['verdict']}") + + +def main(): + """主测试函数""" + print("\n" + "🧪 " * 30) + print("MND-IA 向量点积计算测试") + print("🧪 " * 30) + + try: + # 测试 1: 宏观因子向量推导 + wb, factor_vector = test_macro_vector_calculation() + + # 测试 2: 敏感度矩阵点积计算 + test_sensitivity_calculation(wb, factor_vector) + + # 测试 3: 完整 Trust Index 计算 + test_trust_index_calculation(wb) + + print("\n" + "✅ " * 30) + print("所有测试完成!") + print("✅ " * 30) + + except Exception as e: + print(f"\n❌ 测试失败: {e}") + import traceback + traceback.print_exc() + + +if __name__ == "__main__": + main()