MND-IA/skills/quant.py

"""
Skill B: Quant (量化研究员)
============================
职能：行情分析、资金流计算、异常检测

输入：ETF 代码列表
输出：Market_Data_JSON (资金流评分、技术指标、异常信号)
"""

import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))

from core.config import config, llm_call
import akshare as ak
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Tuple
import warnings
warnings.filterwarnings('ignore')


class QuantAnalyzer:
    """量化分析器 - 负责 ETF 行情数据获取和资金流分析"""

    def __init__(self):
        self.cache = {}  # 简单缓存机制

    def get_etf_realtime_data(self, etf_code: str) -> Optional[Dict]:
        """
        获取 ETF 实时行情数据

        Args:
            etf_code: ETF 代码（如 "512980"）

        Returns:
            实时行情字典
        """
        try:
            # 使用 akshare 获取实时行情
            df = ak.fund_etf_spot_em()

            # 查找目标 ETF
            etf_data = df[df['代码'] == etf_code]

            if etf_data.empty:
                print(f"[Quant] 未找到 ETF: {etf_code}")
                return None

            row = etf_data.iloc[0]

            return {
                "code": etf_code,
                "name": row['名称'],
                "price": float(row['最新价']),
                "change_pct": float(row['涨跌幅']),
                "volume": float(row['成交量']),
                "amount": float(row['成交额']),
                "turnover_rate": float(row.get('换手率', 0)),
                "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S")
            }

        except Exception as e:
            print(f"[Quant] 获取实时数据失败 {etf_code}: {e}")
            return None

    def get_etf_historical_data(
        self,
        etf_code: str,
        days: int = 20
    ) -> Optional[pd.DataFrame]:
        """
        获取 ETF 历史行情数据

        Args:
            etf_code: ETF 代码
            days: 获取天数（默认20个交易日）

        Returns:
            历史行情 DataFrame
        """
        try:
            # 计算日期范围
            end_date = datetime.now().strftime("%Y%m%d")
            start_date = (datetime.now() - timedelta(days=days*2)).strftime("%Y%m%d")

            # 获取历史数据
            df = ak.fund_etf_hist_em(
                symbol=etf_code,
                period="daily",
                start_date=start_date,
                end_date=end_date,
                adjust="qfq"
            )

            if df is None or df.empty:
                print(f"[Quant] 无历史数据: {etf_code}")
                return None

            # 重命名列
            df.rename(columns={
                '日期': 'date',
                '开盘': 'open',
                '收盘': 'close',
                '最高': 'high',
                '最低': 'low',
                '成交量': 'volume',
                '成交额': 'amount',
                '涨跌幅': 'change_pct',
                '换手率': 'turnover_rate'
            }, inplace=True)

            return df.tail(days)

        except Exception as e:
            print(f"[Quant] 获取历史数据失败 {etf_code}: {e}")
            return None

    def calculate_fund_flow_score(
        self,
        etf_code: str,
        window: int = 5
    ) -> Dict:
        """
        计算资金流向评分

        核心逻辑：
        1. 成交额放量 (amount_surge)
        2. 换手率提升 (turnover_boost)
        3. 价格与成交量背离检测

        Args:
            etf_code: ETF 代码
            window: 计算窗口（天数）

        Returns:
            资金流评分字典
        """
        df = self.get_etf_historical_data(etf_code, days=window*2)

        if df is None or len(df) < window:
            return {
                "code": etf_code,
                "flow_score": 0,
                "amount_surge": 0,
                "turnover_boost": 0,
                "divergence_signal": False,
                "status": "insufficient_data"
            }

        # 近期数据 vs 历史基准
        recent = df.tail(window)
        baseline = df.head(window)

        # 1. 成交额放量评分 (0-50)
        recent_avg_amount = recent['amount'].mean()
        baseline_avg_amount = baseline['amount'].mean()

        if baseline_avg_amount > 0:
            amount_ratio = recent_avg_amount / baseline_avg_amount
            amount_surge_score = min((amount_ratio - 1) * 50, 50)
        else:
            amount_surge_score = 0

        # 2. 换手率提升评分 (0-30)
        recent_avg_turnover = recent['turnover_rate'].mean()
        baseline_avg_turnover = baseline['turnover_rate'].mean()

        if baseline_avg_turnover > 0:
            turnover_ratio = recent_avg_turnover / baseline_avg_turnover
            turnover_boost_score = min((turnover_ratio - 1) * 30, 30)
        else:
            turnover_boost_score = 0

        # 3. 价格与成交量背离检测 (奖励分 +20)
        recent_price_change = recent['close'].iloc[-1] / recent['close'].iloc[0] - 1
        recent_volume_change = recent['volume'].iloc[-1] / recent['volume'].iloc[0] - 1

        # 价跌量增 = 可能见底信号
        divergence_signal = (recent_price_change < -0.02 and recent_volume_change > 0.3)
        divergence_bonus = 20 if divergence_signal else 0

        # 总分计算 (0-100)
        total_score = max(0, min(100,
            amount_surge_score + turnover_boost_score + divergence_bonus
        ))

        return {
            "code": etf_code,
            "flow_score": round(total_score, 2),
            "amount_surge": round(amount_surge_score, 2),
            "turnover_boost": round(turnover_boost_score, 2),
            "divergence_signal": divergence_signal,
            "recent_avg_amount": round(recent_avg_amount / 1e8, 2),  # 单位：亿
            "baseline_avg_amount": round(baseline_avg_amount / 1e8, 2),
            "status": "success"
        }

    def detect_anomaly(
        self,
        etf_code: str,
        sigma: float = 2.0
    ) -> Dict:
        """
        异常检测 - 识别异常放量或暴涨暴跌

        Args:
            etf_code: ETF 代码
            sigma: 标准差倍数（默认2倍）

        Returns:
            异常信号字典
        """
        df = self.get_etf_historical_data(etf_code, days=30)

        if df is None or len(df) < 10:
            return {"code": etf_code, "has_anomaly": False, "reason": "insufficient_data"}

        # 计算统计基准
        mean_amount = df['amount'].mean()
        std_amount = df['amount'].std()
        mean_change = df['change_pct'].mean()
        std_change = df['change_pct'].std()

        # 最新数据
        latest = df.iloc[-1]

        anomaly_signals = []

        # 成交额异常
        if latest['amount'] > mean_amount + sigma * std_amount:
            anomaly_signals.append("volume_surge")

        # 涨跌幅异常
        if abs(latest['change_pct']) > abs(mean_change) + sigma * std_change:
            if latest['change_pct'] > 0:
                anomaly_signals.append("price_spike")
            else:
                anomaly_signals.append("price_crash")

        return {
            "code": etf_code,
            "has_anomaly": len(anomaly_signals) > 0,
            "signals": anomaly_signals,
            "latest_amount": round(latest['amount'] / 1e8, 2),
            "latest_change_pct": round(latest['change_pct'], 2),
            "threshold_amount": round((mean_amount + sigma * std_amount) / 1e8, 2),
            "threshold_change": round(abs(mean_change) + sigma * std_change, 2)
        }

    def batch_analyze(
        self,
        etf_codes: List[str]
    ) -> Dict[str, Dict]:
        """
        批量分析 ETF

        Args:
            etf_codes: ETF 代码列表

        Returns:
            分析结果字典 {code: result}
        """
        results = {}

        print(f"[Quant] 开始批量分析 {len(etf_codes)} 个 ETF...")

        for code in etf_codes:
            print(f"[Quant] 分析 {code}...")

            # 资金流评分
            flow_result = self.calculate_fund_flow_score(code)

            # 异常检测
            anomaly_result = self.detect_anomaly(code)

            # 实时行情
            realtime_data = self.get_etf_realtime_data(code)

            results[code] = {
                "flow_analysis": flow_result,
                "anomaly_detection": anomaly_result,
                "realtime": realtime_data,
                "timestamp": datetime.now().isoformat()
            }

        print(f"[Quant] 批量分析完成")
        return results

    def generate_market_report(
        self,
        analysis_results: Dict[str, Dict]
    ) -> Dict:
        """
        生成市场数据报告（供其他 Agent 使用）

        Args:
            analysis_results: batch_analyze 的输出

        Returns:
            标准化的 Market_Data_JSON
        """
        report = {
            "timestamp": datetime.now().isoformat(),
            "total_analyzed": len(analysis_results),
            "etf_scores": {},
            "top_flow_etfs": [],
            "anomaly_alerts": []
        }

        # 提取评分
        for code, result in analysis_results.items():
            flow_score = result.get("flow_analysis", {}).get("flow_score", 0)
            report["etf_scores"][code] = flow_score

            # 异常提醒
            if result.get("anomaly_detection", {}).get("has_anomaly"):
                report["anomaly_alerts"].append({
                    "code": code,
                    "signals": result["anomaly_detection"]["signals"]
                })

        # 排序 Top 资金流 ETF
        sorted_etfs = sorted(
            report["etf_scores"].items(),
            key=lambda x: x[1],
            reverse=True
        )
        report["top_flow_etfs"] = [
            {"code": code, "score": score}
            for code, score in sorted_etfs[:5]
        ]

        return report


# ==================== 工具函数 ====================

def get_all_etf_codes_from_asset_map() -> List[str]:
    """从 asset_map.json 提取所有 ETF 代码"""
    import json
    from pathlib import Path

    asset_map_path = Path("core") / "asset_map.json"

    if not asset_map_path.exists():
        print("[Quant] 未找到 asset_map.json")
        return []

    with open(asset_map_path, 'r', encoding='utf-8') as f:
        asset_map = json.load(f)

    etf_codes = set()
    for sector_data in asset_map.get("sectors", {}).values():
        etf_codes.update(sector_data.get("etfs", []))

    return list(etf_codes)


if __name__ == "__main__":
    print("=" * 50)
    print("Skill B: Quant 量化研究员测试")
    print("=" * 50)

    # 创建分析器实例
    quant = QuantAnalyzer()

    # 测试单个 ETF
    test_code = "512480"  # 半导体 ETF
    print(f"\n测试 ETF: {test_code}")

    # 1. 获取实时数据
    print("\n1. 实时行情:")
    realtime = quant.get_etf_realtime_data(test_code)
    if realtime:
        print(f"   {realtime['name']}: ¥{realtime['price']}, 涨跌幅: {realtime['change_pct']}%")

    # 2. 资金流评分
    print("\n2. 资金流分析:")
    flow = quant.calculate_fund_flow_score(test_code)
    print(f"   资金流评分: {flow['flow_score']}")
    print(f"   成交额放量: {flow['amount_surge']}")
    print(f"   换手率提升: {flow['turnover_boost']}")

    # 3. 异常检测
    print("\n3. 异常检测:")
    anomaly = quant.detect_anomaly(test_code)
    print(f"   发现异常: {anomaly['has_anomaly']}")
    if anomaly['has_anomaly']:
        print(f"   异常信号: {anomaly['signals']}")

    # 4. 批量分析（测试少量 ETF）
    print("\n4. 批量分析测试:")
    test_codes = ["512480", "515980", "159928"]  # 半导体、AI、消费
    batch_results = quant.batch_analyze(test_codes)

    # 5. 生成报告
    report = quant.generate_market_report(batch_results)
    print(f"\n5. 市场报告:")
    print(f"   分析数量: {report['total_analyzed']}")
    print(f"   Top 资金流 ETF:")
    for item in report['top_flow_etfs']:
        print(f"      {item['code']}: {item['score']:.2f}")

    print("\n✅ Quant 模块测试完成")