#!/usr/bin/env python3
"""
AXIOM Server — Local data proxy for the AXIOM Paper Trading Terminal.
Replaces: python3 -m http.server 8080

Serves static files (axiom-alpaca.html) AND provides yfinance data endpoints
for consolidated market data, fundamentals, short interest, and earnings.

Usage:
    cd ~/AXIOM
    python3 axiom-server.py

Then open http://localhost:8080/axiom-alpaca.html in Safari.

Dependencies:
    pip3 install yfinance
"""

import http.server
import json
import os
import sys
import time
import threading
import urllib.parse
import urllib.request
import ssl
from datetime import datetime, timedelta
from email.utils import parsedate_to_datetime

try:
    import yfinance as yf
except ImportError:
    print("\n[AXIOM] ERROR: yfinance not installed.")
    print("[AXIOM] Run: pip3 install yfinance\n")
    sys.exit(1)

# ═══════════════════════════════════════════════════════════════
# CONFIGURATION
# ═══════════════════════════════════════════════════════════════

PORT = 8080
BIND = "0.0.0.0"  # Accessible from other devices on network

# Disable crawler on Oracle (saves bandwidth). Use: python3 axiom-server.py --no-crawler
NO_CRAWLER = "--no-crawler" in sys.argv

# Cache TTLs (seconds)
CACHE_TTL_QUOTE = 30       # Live quotes — refresh every 30s
CACHE_TTL_HISTORY = 3600   # Historical bars — 1 hour
CACHE_TTL_SHORT = 600      # Short interest — 10 minutes
CACHE_TTL_FUNDAMENTALS = 3600  # Fundamentals — 1 hour
CACHE_TTL_EARNINGS = 3600  # Earnings calendar — 1 hour

# ═══════════════════════════════════════════════════════════════
# IN-MEMORY CACHE
# ═══════════════════════════════════════════════════════════════

cache = {}
cache_lock = threading.Lock()

# ── Delisted Ticker Tracker ──
# Tracks symbols that repeatedly return no data from yfinance.
# Only counts a miss when >50% of the batch succeeded (proves API works).
# After 10 consecutive qualified failures + verification, symbol is confirmed.
DELISTED_THRESHOLD = 10
delisted_tracker = {}       # {sym: consecutive_fail_count}
delisted_confirmed = set()  # Symbols confirmed delisted
delisted_lock = threading.Lock()


def cache_get(key, ttl):
    """Get value from cache if it exists and hasn't expired."""
    with cache_lock:
        if key in cache:
            entry = cache[key]
            if time.time() - entry["ts"] < ttl:
                return entry["data"]
    return None


def cache_set(key, data):
    """Store value in cache with current timestamp."""
    with cache_lock:
        cache[key] = {"data": data, "ts": time.time()}


# Shared SSL context (reused everywhere — avoids recreating per request)
SSL_CTX = ssl.create_default_context()
SSL_CTX.check_hostname = False
SSL_CTX.verify_mode = ssl.CERT_NONE

# Shared User-Agent header
UA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"


# ═══════════════════════════════════════════════════════════════
# TICKER DATABASE — Loaded from SEC EDGAR for prefix matching
# ═══════════════════════════════════════════════════════════════

ticker_db = []           # List of {symbol, name} dicts sorted by symbol
ticker_db_loaded = False
ticker_db_lock = threading.Lock()

def load_ticker_db():
    """Load all US stock tickers from SEC EDGAR company_tickers.json.
    This is a free, public API — no auth required.
    Returns ~10,000 tickers with company names."""
    global ticker_db, ticker_db_loaded
    if ticker_db_loaded:
        return

    with ticker_db_lock:
        if ticker_db_loaded:
            return
        try:
            url = "https://www.sec.gov/files/company_tickers.json"
            req = urllib.request.Request(url, headers={
                "User-Agent": "AXIOM Trading Terminal support@axiom.local",
                "Accept": "application/json",
            })
            resp = urllib.request.urlopen(req, timeout=15, context=SSL_CTX)
            data = json.loads(resp.read().decode("utf-8"))
            seen = set()
            db = []
            for key in data:
                entry = data[key]
                sym = str(entry.get("ticker", "")).upper().strip()
                name = str(entry.get("title", "")).strip()
                if sym and sym not in seen and "." not in sym and "/" not in sym:
                    seen.add(sym)
                    db.append({"symbol": sym, "name": name, "exchange": "", "type": "EQUITY"})
            # Sort by symbol length then alphabetically (short tickers first)
            db.sort(key=lambda x: (len(x["symbol"]), x["symbol"]))
            ticker_db = db
            ticker_db_loaded = True
            print(f"[AXIOM] Ticker DB loaded: {len(db)} symbols from SEC EDGAR")
        except Exception as e:
            print(f"[AXIOM] Ticker DB load failed: {e}")
            # Minimal fallback — some common single/double-letter tickers
            ticker_db = [
                {"symbol": "A", "name": "Agilent Technologies", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "B", "name": "Barnes Group", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "C", "name": "Citigroup", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "D", "name": "Dominion Energy", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "E", "name": "ENI SpA", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "F", "name": "Ford Motor Co", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "G", "name": "Genpact Ltd", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "H", "name": "Hyatt Hotels", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "J", "name": "Jacobs Solutions", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "K", "name": "Kellanova", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "L", "name": "Loews Corp", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "M", "name": "Macy's Inc", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "O", "name": "Realty Income", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "R", "name": "Ryder System", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "S", "name": "SentinelOne", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "T", "name": "AT&T Inc", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "U", "name": "Unity Software", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "V", "name": "Visa Inc", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "W", "name": "Wayfair Inc", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "X", "name": "US Steel Corp", "exchange": "NYSE", "type": "EQUITY"},
                {"symbol": "Z", "name": "Zillow Group", "exchange": "NASDAQ", "type": "EQUITY"},
            ]
            ticker_db_loaded = True

def search_ticker_db(query, limit=12):
    """Search the local ticker DB: exact match first, then prefix, then contains."""
    load_ticker_db()
    q = query.upper().strip()
    if not q:
        return []

    exact = []
    prefix = []
    contains = []

    for t in ticker_db:
        sym = t["symbol"]
        name_up = t["name"].upper()
        if sym == q:
            exact.append(t)
        elif sym.startswith(q):
            prefix.append(t)
        elif q in name_up:
            contains.append(t)

    # Sort prefix matches: shorter symbols first (exact length match priority)
    prefix.sort(key=lambda x: (len(x["symbol"]), x["symbol"]))
    contains.sort(key=lambda x: (len(x["symbol"]), x["symbol"]))

    # Combine: exact first, then prefix, then name-contains
    results = exact + prefix[:limit] + contains[:max(0, limit - len(prefix))]
    return results[:limit]

# Load ticker DB in background on startup
def _bg_load_ticker_db():
    time.sleep(2)  # Small delay so server starts fast
    load_ticker_db()

threading.Thread(target=_bg_load_ticker_db, daemon=True).start()


# ═══════════════════════════════════════════════════════════════
# BACKGROUND NEWS CRAWLER — Polls 12+ free sources every 60s
# ═══════════════════════════════════════════════════════════════

import re

crawler_store = {
    "headlines": [],
    "last_scan": 0,
    "scan_count": 0,
    "errors": [],
    "sources_ok": [],
}
crawler_lock = threading.Lock()

CRAWLER_INTERVAL = 60  # seconds between full crawl cycles

# ── Catalyst keywords — wire services ONLY pass if headline contains these ──
CATALYST_KEYWORDS = [
    'fda', 'approval', 'approved', 'clearance', 'breakthrough therapy',
    'merger', 'acquisition', 'acquire', 'buyout', 'takeover', 'tender offer',
    'earnings', 'revenue', 'eps', 'guidance', 'forecast', 'outlook', 'profit',
    'contract', 'awarded', 'partnership', 'collaboration', 'deal', 'agreement',
    'ipo', 'offering', 'secondary offering', 'shelf registration', 'public offering',
    'bankruptcy', 'restructuring', 'delisting', 'chapter 11', 'default',
    'investigation', 'subpoena', 'settlement', 'indictment', 'fraud',
    'ceo', 'cfo', 'resignation', 'appointed', 'terminated', 'executive',
    'dividend', 'buyback', 'repurchase', 'split', 'spinoff', 'spin-off',
    'patent', 'lawsuit', 'litigation', 'injunction',
    'clinical trial', 'phase 1', 'phase 2', 'phase 3', 'pdufa', 'nda filing',
    'upgrade', 'downgrade', 'price target', 'initiate coverage', 'rating',
    'recall', 'warning letter', 'crl', 'complete response',
    'data breach', 'cybersecurity', 'hack',
    'tariff', 'sanction', 'ban', 'regulation', 'antitrust',
    'stock split', 'reverse split', 'tender', 'proxy fight',
]

# ── Spam/clickbait filter for crawler (MAXIMUM AGGRESSION) ──
CRAWLER_SPAM_SOURCES = {
    # Investment opinion / clickbait sites
    'accesswire', 'marketbeat', 'investorplace', 'insidermonkey',
    'insider monkey', 'simplywall', 'simply wall', 'tipranks',
    'stockanalysis', 'gurumeditations', 'benzinga cannabis',
    'smarteranalyst', 'wallstreetzen', 'stocktwits',
    'motley fool', 'fool.com', 'zacks', 'zacks.com',
    '247wallst', '24/7 wall st', 'investopedia',
    'kiplinger', 'stocknews.com', 'stocknews',
    'investingcube', 'financhill', 'finbold',
    'coinspeaker', 'coindesk', 'cointelegraph',
    'wccftech', 'talkmarkets', 'gurufocus',
    'tradingview', 'yahoofinance',
    'nasdaq.com/articles', 'insidertrades',
    'smartkarma', 'alphaspread', 'marketscreener',
    'barchart', 'chartmill', 'financialmodeling',
    'macroaxis', 'wisesheets', 'danelfin',
    'thefly buzz', 'flyonthewall', 'briefing.com',
    'investrend', 'goldseiten', 'silberseiten',
    # Additional garbage sources
    'thestreet', 'thestreet.com', 'benzinga',
    'schaeffers', 'schaeffersresearch',
    'stockinvestor', 'stockmarket.com',
    'moneymorning', 'investinganswers',
    'stockgumshoe', 'stockresearchpro',
    'marketrealist', 'stocktargetadvisor',
    'topstocktips', 'pennystockdream',
    'microcapdaily', 'otcmarkets',
    'investorsalley', 'tradesmithdaily',
    'oxfordclub', 'moneymappress',
    'wealthdaily', 'energyandcapital',
    'biomedtracker', 'wallstreetdaily',
    'investinghaven', 'equitymaster',
    'dailyreckoning', 'agorafinancial',
    'stansberryresearch', 'banyanhill',
    'palmbeachgroup', 'legacyresearch',
    'wealthpress', 'tradehawk',
    'profitconfidential', 'investmentU',
}

CRAWLER_JUNK_PATTERNS = [
    # Stock recommendation clickbait
    r'top \d+ stocks', r'\d+ stocks to', r'best stocks',
    r'should you buy', r'is it time to', r'massive upside',
    r'skyrocket', r'moon shot', r'next big thing',
    r'millionaire', r'retire early', r'secret stock',
    r'penny stock', r'free stock', r'stock pick',
    r'buy the dip', r'buy now', r'sell now',
    r'wall street.?s? pick', r'undervalued gem',
    r'hidden gem', r'explosive growth', r'100% upside',
    r'stock to watch', r'stocks to watch', r'watchlist',
    r'portfolio', r'dividend aristocrat', r'passive income',
    r'warren buffett', r'buffett', r'cathie wood',
    r'must.?own', r'hot stock', r'reddit favorite',
    r'meme stock', r'to the moon', r'diamond hand',
    # Urgency / FOMO clickbait
    r'here.{0,3}s? why', r'you.?ll regret', r'don.?t miss',
    r'could soar', r'could surge', r'could rally',
    r'set to explode', r'about to break', r'primed for',
    r'\d+% return', r'double your', r'triple your',
    r'under the radar', r'flying under', r'sleeper stock',
    r'ai stock to', r'ev stock to', r'crypto stock',
    r'magnificent seven', r'mag.? ?7',
    r'market crash', r'recession proof', r'bear market',
    r'what.*investors.*need.*know',
    r'according to analysts', r'analysts say', r'analysts predict',
    r'wall street loves', r'wall street hates',
    # Ad / sponsored / promo
    r'sponsored', r'advertisement', r'partner content',
    r'promoted', r'\bad\b:', r'paid post', r'branded content',
    r'subscribe now', r'sign up', r'newsletter',
    r'affiliate', r'discount code', r'promo code',
    r'click here', r'learn more', r'limited time',
    r'exclusive offer', r'free trial', r'act now',
    r'congratulations', r'you.?ve been selected',
    r'giveaway', r'sweepstakes', r'contest',
    # Seeking Alpha specific junk patterns
    r'wall street breakfast', r'seeking alpha premium',
    r'deep value', r'deep dive', r'undervalued',
    r'dividend champion', r'dividend king', r'dividend growth',
    r'income investor', r'value investor', r'growth investor',
    r'alpha picks', r'alpha investor', r'quant rating',
    r'sa premium', r'sa marketplace', r'sa author',
    r'why i.?m buying', r'why i.?m selling', r'why i bought',
    r'my top pick', r'my portfolio', r'my dividend',
    r'strong buy', r'strong sell', r'hold rating',
    r'bear case', r'bull case', r'investment thesis',
    r'intrinsic value', r'fair value estimate',
    r'sum.of.the.parts', r'dcf model', r'dcf valuation',
    r'margin of safety', r'price.to.value',
    r'i.?m bullish', r'i.?m bearish', r'remain bullish', r'remain bearish',
    r'time to load up', r'back up the truck',
    r'once.in.a.lifetime', r'generational.*buy',
    r'no.brainer', r'slam dunk', r'easy money',
    # More general opinion clickbait
    r'why.*(?:is|are).*(?:buy|sell|hold)',
    r'reasons? to (?:buy|sell|avoid)',
    r'\d+ reasons?', r'here are \d+',
    r'everything you need to know',
    r'complete guide', r'ultimate guide', r'beginner.?s guide',
    r'how to (?:invest|trade|buy|pick)',
    r'what you need to know',
    r'the truth about', r'the real reason',
    r'don.?t (?:panic|worry|sell)', r'stay calm',
    r'is it (?:too late|over|done)',
    r'still a buy', r'still undervalued', r'still cheap',
    r'ignore the noise', r'ignore the bears',
    r'contrarian', r'against the crowd',
    r'beaten.down', r'oversold.*buy', r'buy.*oversold',
    r'rich valuation', r'overvalued.*sell',
    r'(?:weekly|daily|monthly) (?:recap|roundup|summary|wrap)',
    r'market (?:recap|roundup|wrap|update|commentary)',
    r'this week in', r'last week in',
    r'morning (?:brief|note|update)', r'evening (?:brief|note)',
    r'closing bell', r'opening bell',
    r'what to expect', r'week ahead', r'day ahead',
    r'preview:', r'outlook:', r'forecast:',
    r'technical analysis', r'chart analysis', r'chart pattern',
    r'support and resistance', r'fibonacci',
    r'moving average', r'rsi (?:shows|indicates|suggests)',
    r'macd (?:shows|indicates|suggests)',
    r'elliott wave', r'wave count',
]

# TRUSTED = only hard-news wire services and government sources
# Seeking Alpha is NOT trusted — too much opinion clickbait
CRAWLER_TRUSTED_SOURCES = {
    'reuters', 'bloomberg', 'cnbc', 'wsj', 'wall street journal',
    'financial times', 'ft.com', 'barron', 'marketwatch',
    'associated press', 'ap news', 'nytimes', 'new york times',
    'washington post', 'sec.gov', 'edgar', 'fda.gov',
    'the information', 'semafor', 'investing.com',
    'pr newswire', 'business wire', 'globenewswire',
    'federal reserve', 'us treasury', 'federalreserve.gov',
}

# Sources that require EXTRA filtering — only pass catalyst headlines
CRAWLER_OPINION_SOURCES = {
    'seeking alpha', 'seekingalpha',
    'benzinga', 'thestreet',
}


def crawler_is_spam(headline, source):
    """MAXIMUM aggression spam/clickbait filter for crawler results."""
    src_lower = (source or '').lower()
    hl = (headline or '').lower()

    # Hard-trusted sources (Reuters, Bloomberg, etc.) — still filter junk patterns
    is_trusted = False
    for t in CRAWLER_TRUSTED_SOURCES:
        if t in src_lower:
            is_trusted = True
            break

    # Even trusted sources get junk-pattern checked (just not source-blocked)
    if not is_trusted:
        # Block known spam sources entirely
        for s in CRAWLER_SPAM_SOURCES:
            if s in src_lower:
                return True

    # Opinion sources (Seeking Alpha, Benzinga) — ONLY pass if headline
    # contains a hard catalyst keyword, block everything else
    for op in CRAWLER_OPINION_SOURCES:
        if op in src_lower:
            if not crawler_has_catalyst(headline):
                return True
            break

    # Junk pattern check on ALL sources including trusted
    for pat in CRAWLER_JUNK_PATTERNS:
        if re.search(pat, hl):
            return True

    # Too short = garbage
    if len(headline or '') < 20:
        return True

    # All caps headlines = spam
    if headline and headline == headline.upper() and len(headline) > 30:
        return True

    # Excessive punctuation = clickbait (!!!, ???, $$$)
    if headline and (headline.count('!') >= 2 or headline.count('?') >= 3 or headline.count('$') >= 3):
        return True

    return False


def crawler_has_catalyst(text):
    """Check if text contains a real catalyst keyword."""
    tl = (text or '').lower()
    for kw in CATALYST_KEYWORDS:
        if kw in tl:
            return True
    return False


# ── Sentiment scoring: bull/bear classification for headlines ──
# Score ranges from -100 (most bearish) to +100 (most bullish)
# Used to sort all filings and news from most bullish to most bearish

SENTIMENT_BULLISH_KEYWORDS = {
    # Strong bull (+80 to +100)
    'fda approval': 95, 'fda approved': 95, 'fda clearance': 90,
    'breakthrough therapy': 90, 'fast track': 85,
    'earnings beat': 85, 'revenue beat': 85, 'tops estimates': 85,
    'guidance raised': 80, 'raises guidance': 80, 'raises outlook': 80,
    'raises forecast': 80, 'upside guidance': 80, 'above expectations': 80,
    'record revenue': 85, 'record earnings': 85, 'record profit': 85,
    'all-time high': 80, 'new high': 75,
    'strong buy': 80, 'analyst upgrade': 75, 'upgrade to buy': 75,
    'price target raised': 70, 'price target increased': 70,
    'initiated with buy': 70, 'initiated with outperform': 70,

    # Moderate bull (+40 to +70)
    'partnership': 60, 'collaboration': 55, 'strategic alliance': 60,
    'contract awarded': 65, 'wins contract': 65, 'new contract': 60,
    'acquisition of': 55, 'acquires': 55, 'to acquire': 55,
    'merger agreement': 50, 'merger approved': 60,
    'buyback': 55, 'share repurchase': 55, 'stock repurchase': 55,
    'dividend increase': 60, 'special dividend': 65, 'raises dividend': 60,
    'stock split': 50, 'insider buying': 65, 'insider purchase': 65,
    'beat expectations': 70, 'beat estimates': 70,
    'positive results': 65, 'positive data': 65, 'positive phase': 70,
    'ipo prices': 50, 'ipo priced above': 60,
    'upgrade': 55, 'outperform': 50, 'overweight': 50,

    # Mild bull (+10 to +35)
    'revenue growth': 30, 'sales growth': 30, 'profit growth': 30,
    'expansion': 25, 'new product': 25, 'launch': 20,
    'growth': 15, 'bullish': 30, 'rally': 25, 'surges': 30,
    'soars': 35, 'jumps': 25, 'gains': 15, 'rises': 10,
    'rebounds': 20, 'recovery': 15, 'improves': 15,
}

SENTIMENT_BEARISH_KEYWORDS = {
    # Strong bear (-80 to -100)
    'bankruptcy': -95, 'chapter 11': -95, 'chapter 7': -100,
    'fraud': -90, 'indictment': -90, 'criminal charges': -95,
    'sec investigation': -85, 'doj investigation': -90,
    'accounting fraud': -95, 'restatement': -80,
    'delisting': -90, 'delisted': -90,
    'short seller report': -85, 'hindenburg': -80, 'muddy waters': -80,
    'citron research': -75, 'iceberg research': -75,
    'fda rejection': -90, 'fda refuses': -85, 'complete response letter': -85,
    'clinical trial failure': -90, 'failed phase': -85, 'trial halted': -85,
    'data breach': -70, 'cybersecurity incident': -65,

    # Moderate bear (-40 to -70)
    'earnings miss': -75, 'revenue miss': -75, 'misses estimates': -75,
    'guidance cut': -70, 'lowers guidance': -70, 'lowers outlook': -70,
    'warns': -60, 'profit warning': -70, 'revenue warning': -65,
    'downgrade': -60, 'sell rating': -65, 'underperform': -55,
    'price target cut': -55, 'price target lowered': -55,
    'layoffs': -50, 'job cuts': -50, 'workforce reduction': -50,
    'recall': -55, 'product recall': -60, 'safety recall': -65,
    'lawsuit': -45, 'litigation': -40, 'sued': -45,
    'investigation': -50, 'subpoena': -55, 'probe': -45,
    'insider selling': -55, 'insider sale': -50,
    'secondary offering': -50, 'dilution': -55, 'shelf offering': -50,
    'default': -80, 'restructuring': -45,

    # Mild bear (-10 to -35)
    'decline': -20, 'drops': -15, 'falls': -15, 'slips': -10,
    'plunges': -35, 'tumbles': -30, 'crashes': -35, 'sinks': -25,
    'bearish': -30, 'selloff': -25, 'sell-off': -25,
    'weakness': -15, 'concern': -10, 'risk': -10,
    'misses': -20, 'disappoints': -25, 'below expectations': -30,
    'tariff': -20, 'sanction': -25, 'ban': -20,
}


def crawler_score_sentiment(headline, source_type="news", filing_label=""):
    """Score a headline from -100 (most bearish) to +100 (most bullish).
    Returns (score, label) where label is a human-readable tag like 'STRONG BULL'."""
    if not headline:
        return 0, "NEUTRAL"

    hl = headline.lower()
    score = 0
    matches = 0

    # Check bullish keywords (take the strongest match)
    best_bull = 0
    for kw, val in SENTIMENT_BULLISH_KEYWORDS.items():
        if kw in hl:
            if val > best_bull:
                best_bull = val
            matches += 1

    # Check bearish keywords (take the strongest match)
    best_bear = 0
    for kw, val in SENTIMENT_BEARISH_KEYWORDS.items():
        if kw in hl:
            if val < best_bear:
                best_bear = val
            matches += 1

    # Combine: if both bull and bear keywords found, net them
    if best_bull > 0 and best_bear < 0:
        score = best_bull + best_bear  # net effect
    elif best_bull > 0:
        score = best_bull
    elif best_bear < 0:
        score = best_bear

    # Boost scores for trusted filing types
    if filing_label == "INSIDER":
        # Insider buying is very bullish, selling is bearish
        if "purchase" in hl or "buying" in hl or "bought" in hl:
            score = max(score, 65)
        elif "sale" in hl or "selling" in hl or "sold" in hl:
            score = min(score, -50)
        elif score == 0:
            score = -20  # Most Form 4s are insider sales (compensation)

    elif filing_label == "IPO":
        if score == 0:
            score = 35  # IPO filings are mildly bullish (new market interest)

    elif filing_label == "ACTIVIST":
        if score == 0:
            score = 40  # Activist positions often bullish catalysts

    elif filing_label == "8-K":
        # 8-K is neutral by default — the content determines sentiment
        pass

    # SEC filings with no detected keywords get mild boost for being newsworthy
    if source_type == "sec" and score == 0:
        score = 5  # Slightly positive — filing = something is happening

    if source_type == "wire" and score == 0:
        score = 10  # Wire releases tend to be company-positive PR

    # Classify
    if score >= 70:
        label = "STRONG BULL"
    elif score >= 40:
        label = "BULL"
    elif score >= 15:
        label = "MILD BULL"
    elif score > -15:
        label = "NEUTRAL"
    elif score > -40:
        label = "MILD BEAR"
    elif score > -70:
        label = "BEAR"
    else:
        label = "STRONG BEAR"

    return score, label


def crawler_extract_tickers(text):
    """Extract stock ticker symbols from text."""
    COMMON_WORDS = {
        'CEO', 'CFO', 'FDA', 'SEC', 'IPO', 'ETF', 'NYSE', 'NASDAQ', 'EPS',
        'GDP', 'CPI', 'US', 'USA', 'EST', 'EDT', 'PST', 'PDT', 'RSS', 'API',
        'AI', 'COO', 'CTO', 'LLC', 'INC', 'LTD', 'CEO', 'CFO', 'THE', 'FOR',
        'AND', 'NOT', 'BUT', 'ARE', 'WAS', 'HAS', 'ITS', 'NEW', 'ALL', 'CAN',
        'MAY', 'ANY', 'NOW', 'OUR', 'ONE', 'TWO', 'TOP', 'KEY', 'BIG', 'LOW',
        'BUY', 'RUN', 'SET', 'OLD', 'END', 'PUT', 'GET', 'HOW', 'WHY', 'WHO',
        'LED', 'RED', 'NET', 'TAX', 'OIL', 'GAS', 'CAR', 'FED', 'IMF', 'WHO',
        'CDC', 'NIH', 'DOJ', 'FBI', 'CIA', 'NSA', 'EPA', 'FTC', 'DOD', 'FAA',
        'ATM', 'APR', 'APT', 'AMP', 'ALSO', 'JUST', 'LIKE', 'MORE', 'MOST',
        'MUCH', 'WILL', 'WHAT', 'WHEN', 'THAN', 'THEM', 'THEN', 'THAT', 'THIS',
        'FROM', 'HAVE', 'BEEN', 'WERE', 'INTO', 'OVER', 'EACH', 'AMID',
    }
    tickers = set()
    for m in re.findall(r'\$([A-Z]{1,5})\b', text):
        if m not in COMMON_WORDS:
            tickers.add(m)
    for m in re.findall(r'(?:NYSE|NASDAQ|AMEX|OTC)[:\s]+([A-Z]{1,5})', text):
        tickers.add(m)
    return list(tickers)


def crawler_parse_rss(xml_text, source_name, source_type="news"):
    """Parse an RSS feed XML into a list of filtered headline dicts."""
    items = re.findall(r'<item>(.*?)</item>', xml_text, re.DOTALL)
    results = []

    for item_xml in items[:30]:
        title_m = re.search(r'<title><!\[CDATA\[(.*?)\]\]></title>', item_xml)
        if not title_m:
            title_m = re.search(r'<title>(.*?)</title>', item_xml)
        link_m = re.search(r'<link/?\s*>(.*?)<', item_xml) or re.search(r'<link>(.*?)</link>', item_xml)
        date_m = re.search(r'<pubDate>(.*?)</pubDate>', item_xml)
        desc_m = re.search(r'<description><!\[CDATA\[(.*?)\]\]></description>', item_xml, re.DOTALL)
        if not desc_m:
            desc_m = re.search(r'<description>(.*?)</description>', item_xml, re.DOTALL)

        title = title_m.group(1) if title_m else ""
        title = title.replace("&amp;", "&").replace("&lt;", "<").replace("&gt;", ">").replace("&#39;", "'").replace("&quot;", '"')
        title = re.sub(r'<[^>]+>', '', title).strip()

        link = link_m.group(1).strip() if link_m else ""
        pub_date = date_m.group(1) if date_m else ""
        desc = desc_m.group(1) if desc_m else ""
        desc = re.sub(r'<[^>]+>', '', desc).strip()[:300]

        if not title:
            continue

        # Spam filter
        if crawler_is_spam(title, source_name):
            continue

        # Wire services: ONLY pass headlines with real catalyst content
        if source_type == "wire" and not crawler_has_catalyst(title + " " + desc):
            continue

        # Parse timestamp
        ts = 0
        try:
            dt = parsedate_to_datetime(pub_date)
            ts = int(dt.timestamp() * 1000)
        except Exception:
            try:
                dt = datetime.fromisoformat(pub_date.replace("Z", "+00:00"))
                ts = int(dt.timestamp() * 1000)
            except Exception:
                ts = int(time.time() * 1000)

        tickers = crawler_extract_tickers(title + " " + desc)

        sent_score, sent_label = crawler_score_sentiment(title + " " + desc, source_type)

        results.append({
            "headline": title,
            "source": source_name,
            "source_type": source_type,
            "url": link,
            "desc": desc[:200],
            "ts": ts,
            "tickers": tickers,
            "has_catalyst": crawler_has_catalyst(title + " " + desc),
            "sentiment": sent_score,
            "sentiment_label": sent_label,
        })

    return results


def crawler_fetch_rss(url, source_name, source_type="news"):
    """Fetch and parse a single RSS feed."""

    req = urllib.request.Request(url, headers={
        "User-Agent": UA,
        "Accept": "application/rss+xml, application/xml, text/xml, */*",
    })
    resp = urllib.request.urlopen(req, timeout=12, context=SSL_CTX)
    xml = resp.read().decode("utf-8", errors="ignore")
    return crawler_parse_rss(xml, source_name, source_type)


def crawler_fetch_reddit():
    """Fetch trending posts from financial subreddits (public JSON, no auth)."""

    subreddits = [
        ("wallstreetbets", "hot"),
        ("stocks", "new"),
        ("pennystocks", "new"),
        ("stockmarket", "new"),
        ("investing", "new"),
        ("options", "hot"),
        ("Daytrading", "new"),
        ("SecurityAnalysis", "new"),
        ("SPACs", "new"),
        ("biotechplays", "new"),
        ("smallstreetbets", "hot"),
    ]

    results = []
    for sub, sort in subreddits:
        try:
            url = f"https://www.reddit.com/r/{sub}/{sort}.json?limit=25"
            req = urllib.request.Request(url, headers={
                "User-Agent": "AXIOM-Crawler/1.0 (financial research tool)",
            })
            resp = urllib.request.urlopen(req, timeout=10, context=SSL_CTX)
            data = json.loads(resp.read().decode("utf-8"))

            posts = data.get("data", {}).get("children", [])
            for post in posts:
                pd = post.get("data", {})
                title = pd.get("title", "")
                score = pd.get("score", 0)
                num_comments = pd.get("num_comments", 0)
                created = pd.get("created_utc", 0)
                permalink = pd.get("permalink", "")
                selftext = pd.get("selftext", "")[:300]

                # Only keep posts with engagement
                if score < 10 and num_comments < 5:
                    continue

                # Spam filter
                if crawler_is_spam(title, f"reddit r/{sub}"):
                    continue

                tickers = crawler_extract_tickers(title + " " + selftext)

                reddit_sent, reddit_sent_label = crawler_score_sentiment(title + " " + selftext[:200], "reddit")
                results.append({
                    "headline": title,
                    "source": f"r/{sub}",
                    "source_type": "reddit",
                    "url": f"https://reddit.com{permalink}",
                    "desc": selftext[:200],
                    "ts": int(created * 1000) if created else 0,
                    "tickers": tickers,
                    "has_catalyst": crawler_has_catalyst(title),
                    "score": score,
                    "sentiment": reddit_sent,
                    "sentiment_label": reddit_sent_label,
                    "comments": num_comments,
                })
        except Exception as e:
            print(f"[CRAWLER] Reddit r/{sub} error: {e}")

    return results


def crawler_fetch_sec_filings():
    """Fetch today's important SEC filings from EDGAR. Covers 8-K (material events),
    Form 4 (insider trades), S-1 (IPO filings), 13F (institutional holdings),
    and SC 13D/G (activist/large holder). Tries Atom RSS first, HTML fallback."""

    SEC_UA = "AXIOM jyeager89@hotmail.com"
    today = datetime.now().strftime("%Y-%m-%d")
    results = []

    # Filing types to crawl: (type, label, count)
    SEC_FILING_TYPES = [
        ("8-K", "8-K", 40),       # Material events
        ("4", "INSIDER", 30),      # Insider buying/selling
        ("S-1", "IPO", 15),        # IPO filings
        ("SC+13D", "ACTIVIST", 15),# Activist positions
    ]

    for filing_type, label, count in SEC_FILING_TYPES:
      for rss_url in [
        f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type={filing_type}&dateb=&owner=include&count={count}&output=atom",
        f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcurrent&type={filing_type}&dateb=&owner=include&count={count}",
      ]:
        try:
            is_atom = "output=atom" in rss_url
            req = urllib.request.Request(rss_url, headers={
                "User-Agent": SEC_UA,
                "Accept": "application/atom+xml, text/xml, text/html, */*",
            })
            resp = urllib.request.urlopen(req, timeout=15, context=SSL_CTX)
            text = resp.read().decode("utf-8", errors="ignore")

            if is_atom and "<entry>" in text:
                # Parse Atom XML with regex (no lxml dependency)
                entries = re.findall(r'<entry>(.*?)</entry>', text, re.DOTALL)
                for entry in entries[:40]:
                    title_m = re.search(r'<title[^>]*>(.*?)</title>', entry)
                    link_m = re.search(r'<link[^>]*href="([^"]*)"', entry)
                    summary_m = re.search(r'<summary[^>]*>(.*?)</summary>', entry, re.DOTALL)
                    updated_m = re.search(r'<updated>(.*?)</updated>', entry)
                    title = title_m.group(1) if title_m else ""
                    link = link_m.group(1) if link_m else ""
                    summary = summary_m.group(1).strip() if summary_m else ""
                    updated = updated_m.group(1) if updated_m else ""

                    # Parse "8-K - Company Name (CIK) (Filer)"
                    company = title
                    dash_idx = title.find(" - ")
                    if dash_idx > -1:
                        company = re.sub(r'\s*\(\d+\)\s*\(.*?\)\s*$', '', title[dash_idx + 3:]).strip()

                    if not link.startswith("http"):
                        link = "https://www.sec.gov" + link

                    sec_headline = f"[SEC {label}] {company}"
                    sec_sent, sec_sent_label = crawler_score_sentiment(sec_headline + " " + summary, "sec", label)
                    results.append({
                        "headline": sec_headline,
                        "source": "SEC EDGAR",
                        "source_type": "sec",
                        "url": link,
                        "desc": summary[:120] if summary else "",
                        "ts": int(time.time() * 1000),
                        "tickers": [],
                        "has_catalyst": True,
                        "sentiment": sec_sent,
                        "sentiment_label": sec_sent_label,
                    })

            elif not is_atom and "<a href=" in text.lower():
                # Parse HTML table fallback (non-atom response)
                rows = re.findall(r'<a href="(/Archives/edgar/data/[^"]+)"[^>]*>([^<]+)</a>', text)
                for link_path, form_text in rows[:40]:
                    if filing_type.replace("+", " ") not in form_text and label not in form_text:
                        continue
                    # Try to extract company name from surrounding HTML
                    idx = text.find(link_path)
                    chunk = text[max(0, idx-300):idx+200]
                    co_m = re.search(r'<a[^>]*class="company-name"[^>]*>([^<]+)</a>', chunk)
                    company = co_m.group(1).strip() if co_m else form_text.strip()

                    sec_headline2 = f"[SEC {label}] {company}"
                    sec_sent2, sec_sent_label2 = crawler_score_sentiment(sec_headline2, "sec", label)
                    results.append({
                        "headline": sec_headline2,
                        "source": "SEC EDGAR",
                        "source_type": "sec",
                        "url": "https://www.sec.gov" + link_path,
                        "desc": "",
                        "ts": int(time.time() * 1000),
                        "tickers": [],
                        "has_catalyst": True,
                        "sentiment": sec_sent2,
                        "sentiment_label": sec_sent_label2,
                    })

            if results:
                print(f"[AXIOM] SEC {label} filings via RSS: {len(results)} found")
                break  # Got results for this filing type, move to next type

        except Exception as e:
            print(f"[AXIOM] SEC RSS feed failed ({rss_url[:60]}...): {e}")
            continue

    # ── FALLBACK: EFTS full-text search API ──
    for endpoint in [
        f"https://efts.sec.gov/LATEST/search-index?forms=8-K&dateRange=custom&startdt={today}&enddt={today}",
        f"https://efts.sec.gov/LATEST/search-index?q=%228-K%22&forms=8-K&dateRange=custom&startdt={today}&enddt={today}",
    ]:
        try:
            req = urllib.request.Request(endpoint, headers={
                "User-Agent": SEC_UA,
                "Accept": "application/json",
            })
            resp = urllib.request.urlopen(req, timeout=12, context=SSL_CTX)
            data = json.loads(resp.read().decode("utf-8"))

            hits = []
            if "hits" in data and "hits" in data["hits"]:
                hits = data["hits"]["hits"]
            elif "filings" in data:
                hits = data["filings"]

            if not hits:
                continue

            for hit in hits[:50]:
                src = hit.get("_source", hit)
                names = src.get("display_names", [])
                company = names[0] if names else src.get("entity_name", src.get("name", "Unknown"))
                tickers_raw = src.get("tickers", []) or []
                form_type = src.get("form_type", "8-K")
                filed = src.get("file_date", src.get("filed", today))
                file_num = src.get("file_num", "")
                accession = src.get("accession_no", src.get("accession_number", ""))

                filing_url = f"https://www.sec.gov/cgi-bin/browse-edgar?action=getcompany&company={urllib.parse.quote(company)}&type=8-K&dateb=&owner=include&count=10"
                if accession:
                    clean_acc = accession.replace("-", "")
                    entity_id = file_num.replace("-", "").split("/")[-1] if file_num else ""
                    filing_url = f"https://www.sec.gov/Archives/edgar/data/{entity_id}/{clean_acc}/{accession}-index.htm"

                efts_headline = f"[SEC 8-K] {company} filed {form_type}"
                efts_sent, efts_sent_label = crawler_score_sentiment(efts_headline, "sec", "8-K")
                results.append({
                    "headline": efts_headline,
                    "source": "SEC EDGAR",
                    "source_type": "sec",
                    "url": filing_url,
                    "desc": f"Filed: {filed} | File#: {file_num}",
                    "ts": int(time.time() * 1000),
                    "tickers": [t.upper() for t in tickers_raw[:5]],
                    "has_catalyst": True,
                    "sentiment": efts_sent,
                    "sentiment_label": efts_sent_label,
                })

            if results:
                print(f"[AXIOM] SEC 8-K filings via EFTS: {len(results)} found")
                return results

        except Exception as e:
            print(f"[AXIOM] SEC EFTS failed ({endpoint[:60]}...): {e}")
            continue

    print("[AXIOM] SEC 8-K: all endpoints failed, no filings returned")
    return results


# All RSS sources in one clean table: (url, display_name, source_type)
CRAWLER_RSS_SOURCES = [
    # ── Major Financial News ──
    ("https://www.cnbc.com/id/100003114/device/rss/rss.html", "CNBC", "news"),
    ("https://www.cnbc.com/id/10001147/device/rss/rss.html", "CNBC Markets", "news"),
    ("https://feeds.marketwatch.com/marketwatch/topstories/", "MarketWatch", "news"),
    ("https://feeds.marketwatch.com/marketwatch/marketpulse/", "MW MarketPulse", "news"),
    ("https://finance.yahoo.com/news/rssindex", "Yahoo Finance", "news"),
    ("https://seekingalpha.com/market_currents.xml", "Seeking Alpha", "news"),
    ("https://www.investing.com/rss/news.rss", "Investing.com", "news"),
    ("https://feeds.bloomberg.com/markets/news.rss", "Bloomberg", "news"),
    ("https://feeds.reuters.com/reuters/businessNews", "Reuters Business", "news"),
    ("https://www.barrons.com/feed", "Barron's", "news"),
    ("https://www.ft.com/rss/home", "Financial Times", "news"),
    ("https://feeds.benzinga.com/benzinga", "Benzinga", "news"),
    ("https://www.thestreet.com/feeds/all.rss", "TheStreet", "news"),

    # ── Wire Services (corporate press releases) ──
    ("https://www.prnewswire.com/rss/financial-services-latest-news/financial-services-latest-news-list.rss", "PR Newswire", "wire"),
    ("https://www.prnewswire.com/rss/news-releases-list.rss", "PR Newswire All", "wire"),
    ("https://feed.businesswire.com/rss/home/?rss=G1QFDERJXkJeGVJQWQ==", "Business Wire", "wire"),
    ("https://www.globenewswire.com/RssFeed/subjectcode/01-BUS/feedTitle/GlobeNewswire+-+Business+News.xml", "GlobeNewsWire", "wire"),
    ("https://www.globenewswire.com/RssFeed/subjectcode/01-BAN/feedTitle/GlobeNewswire+-+Banking+and+Financial+Services.xml", "GlobeNW Finance", "wire"),

    # ── Government & Regulatory ──
    ("https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds/press-releases/rss.xml", "FDA", "fda"),
    ("https://www.fda.gov/about-fda/contact-fda/stay-informed/rss-feeds/drug-safety-and-availability/rss.xml", "FDA Drug Safety", "fda"),
    ("https://www.federalreserve.gov/feeds/press_all.xml", "Federal Reserve", "news"),
    ("https://home.treasury.gov/system/files/136/press-releases.xml", "US Treasury", "news"),

    # ── Google News Topic Searches (broad coverage) ──
    ("https://news.google.com/rss/search?q=stock+market+breaking+OR+FDA+OR+merger+OR+acquisition+OR+earnings&hl=en-US&gl=US&ceid=US:en", "Google News", "news"),
    ("https://news.google.com/rss/search?q=FDA+approval+OR+FDA+clearance+OR+PDUFA&hl=en-US&gl=US&ceid=US:en", "Google FDA", "news"),
    ("https://news.google.com/rss/search?q=merger+OR+acquisition+OR+buyout+OR+takeover+stock&hl=en-US&gl=US&ceid=US:en", "Google M&A", "news"),
    ("https://news.google.com/rss/search?q=IPO+OR+%22initial+public+offering%22+OR+%22direct+listing%22+stock&hl=en-US&gl=US&ceid=US:en", "Google IPO", "news"),
    ("https://news.google.com/rss/search?q=earnings+beat+OR+earnings+miss+OR+revenue+beat+OR+guidance+raised&hl=en-US&gl=US&ceid=US:en", "Google Earnings", "news"),
    ("https://news.google.com/rss/search?q=%22insider+buying%22+OR+%22insider+selling%22+OR+%22insider+trading%22+SEC+filing&hl=en-US&gl=US&ceid=US:en", "Google Insider", "news"),
    ("https://news.google.com/rss/search?q=%22price+target%22+OR+%22analyst+upgrade%22+OR+%22analyst+downgrade%22+stock&hl=en-US&gl=US&ceid=US:en", "Google Analyst", "news"),
    ("https://news.google.com/rss/search?q=stock+%22short+seller%22+OR+%22short+report%22+OR+Hindenburg+OR+Citron+OR+%22Muddy+Waters%22&hl=en-US&gl=US&ceid=US:en", "Google Short Sellers", "news"),
    ("https://news.google.com/rss/search?q=Federal+Reserve+OR+%22interest+rate%22+OR+%22rate+cut%22+OR+%22rate+hike%22+OR+inflation+CPI+PPI&hl=en-US&gl=US&ceid=US:en", "Google Macro", "news"),
    ("https://news.google.com/rss/search?q=%22stock+halt%22+OR+%22trading+halted%22+OR+LULD+OR+%22circuit+breaker%22&hl=en-US&gl=US&ceid=US:en", "Google Halts", "news"),
    ("https://news.google.com/rss/search?q=semiconductor+OR+AI+chip+OR+NVIDIA+OR+%22artificial+intelligence%22+stock&hl=en-US&gl=US&ceid=US:en", "Google AI/Semi", "news"),
    ("https://news.google.com/rss/search?q=%22stock+split%22+OR+%22reverse+split%22+OR+%22special+dividend%22+OR+buyback&hl=en-US&gl=US&ceid=US:en", "Google Corp Actions", "news"),
    ("https://news.google.com/rss/search?q=biotech+OR+pharma+OR+%22clinical+trial%22+OR+%22phase+3%22+results&hl=en-US&gl=US&ceid=US:en", "Google Biotech", "news"),
]


def crawler_run_once():
    """Single crawl cycle — fetches all sources and updates store."""
    all_headlines = []
    errors = []
    sources_ok = []

    # Fetch all RSS sources from the table
    for url, name, stype in CRAWLER_RSS_SOURCES:
        try:
            items = crawler_fetch_rss(url, name, stype)
            all_headlines.extend(items)
            sources_ok.append(name)
        except Exception as e:
            errors.append(f"{name}: {e}")

    # SEC EDGAR 8-K filings (custom fetcher)
    try:
        all_headlines.extend(crawler_fetch_sec_filings())
        sources_ok.append("SEC EDGAR")
    except Exception as e:
        errors.append(f"SEC EDGAR: {e}")

    # Reddit (custom fetcher)
    try:
        all_headlines.extend(crawler_fetch_reddit())
        sources_ok.append("Reddit")
    except Exception as e:
        errors.append(f"Reddit: {e}")

    # Deduplicate, score any unscored items, sort by sentiment (bull→bear), trim
    seen = set()
    unique = []
    for item in all_headlines:
        key = re.sub(r'[^a-z0-9]', '', (item["headline"] or "").lower())[:60]
        if key and key not in seen:
            seen.add(key)
            # Ensure every item has a sentiment score
            if "sentiment" not in item:
                s, sl = crawler_score_sentiment(
                    item.get("headline", "") + " " + item.get("desc", ""),
                    item.get("source_type", "news")
                )
                item["sentiment"] = s
                item["sentiment_label"] = sl
            unique.append(item)
    # Primary sort: sentiment score descending (most bullish first, most bearish last)
    # Secondary sort: timestamp descending (newest first within same sentiment tier)
    unique.sort(key=lambda x: (x.get("sentiment", 0), x.get("ts", 0)), reverse=True)
    unique = unique[:500]

    with crawler_lock:
        crawler_store["headlines"] = unique
        crawler_store["last_scan"] = int(time.time() * 1000)
        crawler_store["scan_count"] += 1
        crawler_store["errors"] = errors
        crawler_store["sources_ok"] = sources_ok

    total = len(CRAWLER_RSS_SOURCES) + 2  # +2 for SEC (multi-type) + Reddit (multi-sub)
    print(f"[CRAWLER] Scan #{crawler_store['scan_count']}: {len(unique)} headlines from {len(sources_ok)}/{total} sources")


def crawler_thread_main():
    """Background thread — runs the crawler every CRAWLER_INTERVAL seconds."""
    print("[CRAWLER] Starting background news crawler (30+ sources including RSS, Reddit, SEC, FDA)...")
    time.sleep(5)  # Let server start first

    while True:
        try:
            crawler_run_once()
        except Exception as e:
            print(f"[CRAWLER] Fatal error in crawl cycle: {e}")
        time.sleep(CRAWLER_INTERVAL)


def get_crawler_data(symbol=None):
    """Get crawler results, optionally filtered by ticker symbol."""
    with crawler_lock:
        data = {
            "headlines": list(crawler_store["headlines"]),
            "last_scan": crawler_store["last_scan"],
            "scan_count": crawler_store["scan_count"],
            "errors": list(crawler_store["errors"]),
            "sources_ok": list(crawler_store["sources_ok"]),
        }

    if symbol:
        sym_upper = symbol.upper()
        filtered = []
        for item in data["headlines"]:
            if sym_upper in item.get("tickers", []):
                filtered.append(item)
            elif sym_upper in (item.get("headline", "") or "").upper():
                filtered.append(item)
        data["headlines"] = filtered
        data["symbol"] = sym_upper

    data["count"] = len(data["headlines"])
    return data


# ═══════════════════════════════════════════════════════════════
# YFINANCE DATA FETCHERS
# ═══════════════════════════════════════════════════════════════

def fetch_batch_quotes(symbols):
    """
    Fetch real-time quotes for multiple symbols.
    Returns consolidated volume, OHLC, prev close, market cap, etc.
    """
    cache_key = "batch:" + ",".join(sorted(symbols))
    cached = cache_get(cache_key, CACHE_TTL_QUOTE)
    if cached:
        return cached

    result = {}
    try:
        # Use yf.download() for batch — single HTTP call for all symbols
        # Much faster and less rate-limiting than individual Ticker() calls
        import pandas as pd
        sym_str = " ".join(symbols)
        hist = yf.download(sym_str, period="2d", interval="1d", group_by="ticker",
                           threads=True, progress=False, timeout=15)

        for sym in symbols:
            try:
                # Extract this symbol's data from the batch download
                if len(symbols) == 1:
                    sym_hist = hist  # Single symbol: no multi-level columns
                else:
                    if sym not in hist.columns.get_level_values(0):
                        continue
                    sym_hist = hist[sym]

                if sym_hist is None or sym_hist.empty:
                    continue

                # Drop NaN rows
                sym_hist = sym_hist.dropna(subset=["Close"])
                if sym_hist.empty:
                    continue

                today_row = sym_hist.iloc[-1]
                price = round(float(today_row.get("Close", 0)), 2)
                if price <= 0:
                    continue

                quote = {
                    "symbol": sym,
                    "price": price,
                    "open": round(float(today_row.get("Open", 0)), 2),
                    "high": round(float(today_row.get("High", 0)), 2),
                    "low": round(float(today_row.get("Low", 0)), 2),
                    "close": price,
                    "volume": int(today_row.get("Volume", 0)),
                    "prevClose": 0, "prevVolume": 0,
                    "marketCap": 0,
                    "change": 0, "changePct": 0,
                    "source": "yfinance"
                }

                if len(sym_hist) >= 2:
                    prev_row = sym_hist.iloc[-2]
                    quote["prevClose"] = round(float(prev_row.get("Close", 0)), 2)
                    quote["prevVolume"] = int(prev_row.get("Volume", 0))

                # Calculate change
                if quote["prevClose"] > 0:
                    quote["change"] = round(price - quote["prevClose"], 2)
                    quote["changePct"] = round(
                        (price - quote["prevClose"]) / quote["prevClose"] * 100, 2
                    )

                result[sym] = quote

            except Exception as e:
                print(f"[AXIOM] Quote error for {sym}: {e}")
                continue

        print(f"[AXIOM] Batch download: requested {len(symbols)}, got {len(result)} quotes")

        # Track delisted tickers — only when batch mostly succeeded
        # This prevents false positives on weekends/API outages
        success_rate = len(result) / max(len(symbols), 1)
        if success_rate >= 0.5 and len(result) >= 3:
            with delisted_lock:
                for sym in symbols:
                    if sym in result:
                        # Got data — reset failure counter
                        delisted_tracker.pop(sym, None)
                        delisted_confirmed.discard(sym)
                    else:
                        # No data while most others succeeded
                        delisted_tracker[sym] = delisted_tracker.get(sym, 0) + 1
                        if delisted_tracker[sym] >= DELISTED_THRESHOLD and sym not in delisted_confirmed:
                            # Verify with a single-symbol lookup before confirming
                            try:
                                verify = yf.Ticker(sym)
                                vhist = verify.history(period="5d")
                                if vhist is not None and not vhist.empty:
                                    # Actually has data — false alarm
                                    delisted_tracker[sym] = 0
                                    print(f"[AXIOM] Ticker {sym} passed verification — NOT delisted")
                                    continue
                            except Exception:
                                pass
                            delisted_confirmed.add(sym)
                            print(f"[AXIOM] Ticker {sym} confirmed DELISTED after {delisted_tracker[sym]} failures + verification")

    except Exception as e:
        print(f"[AXIOM] Batch quote error: {e}")
        # Fallback: try individual tickers if batch download fails
        try:
            tickers = yf.Tickers(" ".join(symbols[:30]))  # Cap fallback at 30
            for sym in symbols[:30]:
                try:
                    t = tickers.tickers.get(sym)
                    if not t:
                        continue
                    fi = t.fast_info
                    lp = float(fi.last_price or 0)
                    pc = float(fi.previous_close or 0)
                    if lp <= 0:
                        continue
                    result[sym] = {
                        "symbol": sym, "price": round(lp, 2),
                        "open": 0, "high": 0, "low": 0, "close": round(lp, 2),
                        "volume": 0, "prevClose": round(pc, 2), "prevVolume": 0,
                        "marketCap": int(fi.market_cap or 0),
                        "change": round(lp - pc, 2) if pc else 0,
                        "changePct": round((lp - pc) / pc * 100, 2) if pc > 0 else 0,
                        "source": "yfinance-fallback"
                    }
                except Exception:
                    continue
            print(f"[AXIOM] Fallback quotes: got {len(result)}")
        except Exception as e2:
            print(f"[AXIOM] Fallback also failed: {e2}")

    cache_set(cache_key, result)
    return result


def fetch_history(symbol, days=90, interval="1d"):
    """
    Fetch historical OHLCV bars.
    Returns list of {date, open, high, low, close, volume} dicts.
    """
    cache_key = f"hist:{symbol}:{days}:{interval}"
    cached = cache_get(cache_key, CACHE_TTL_HISTORY)
    if cached:
        return cached

    result = {"symbol": symbol, "bars": []}
    try:
        t = yf.Ticker(symbol)
        period = f"{days}d" if days <= 365 else f"{days // 365}y"
        hist = t.history(period=period, interval=interval)

        for idx, row in hist.iterrows():
            bar = {
                "date": idx.strftime("%Y-%m-%d"),
                "o": round(float(row.get("Open", 0)), 2),
                "h": round(float(row.get("High", 0)), 2),
                "l": round(float(row.get("Low", 0)), 2),
                "c": round(float(row.get("Close", 0)), 2),
                "v": int(row.get("Volume", 0))
            }
            result["bars"].append(bar)

    except Exception as e:
        print(f"[AXIOM] History error for {symbol}: {e}")

    cache_set(cache_key, result)
    return result


def fetch_short_interest(symbol):
    """
    Fetch short interest data: short float %, shares short, short ratio.
    Uses yfinance as data source. Note: yfinance shortPercentOfFloat can be
    inaccurate because Yahoo uses a different float estimate than exchanges.
    Always prefer Finviz when available (handled in frontend priority logic).
    """
    cache_key = f"short:{symbol}"
    cached = cache_get(cache_key, CACHE_TTL_SHORT)
    if cached:
        return cached

    result = {
        "symbol": symbol,
        "shortPctFloat": None,
        "sharesShort": None,
        "shortRatio": None,
        "sharesFloat": None,
        "sharesOutstanding": None,
        "shortPriorMonth": None,
        "dateShort": None,
        "source": "yfinance"
    }

    try:
        t = yf.Ticker(symbol)
        info = t.info or {}

        # Short interest fields from Yahoo
        raw_spf = info.get("shortPercentOfFloat")
        result["sharesShort"] = info.get("sharesShort")
        result["shortRatio"] = info.get("shortRatio")
        result["sharesFloat"] = info.get("floatShares")
        result["sharesOutstanding"] = info.get("sharesOutstanding")
        result["shortPriorMonth"] = info.get("sharesShortPriorMonth")
        result["dateShort"] = info.get("dateShortInterest")

        # Convert epoch timestamp to readable date
        if result["dateShort"] and isinstance(result["dateShort"], (int, float)):
            result["dateShort"] = datetime.fromtimestamp(result["dateShort"]).strftime("%Y-%m-%d")

        # yfinance shortPercentOfFloat is inconsistent:
        # Sometimes it's a decimal (0.41 = 41%), sometimes already a pct (41.03).
        # Best approach: if we have sharesShort AND sharesFloat, calculate it ourselves.
        if result["sharesShort"] and result["sharesFloat"] and result["sharesFloat"] > 0:
            calculated_pct = round((result["sharesShort"] / result["sharesFloat"]) * 100, 2)
            result["shortPctFloat"] = calculated_pct
            result["shortPctFloat_raw"] = raw_spf  # Keep raw for debugging
            print(f"[AXIOM] {symbol} short: calculated {calculated_pct}% from {result['sharesShort']}/{result['sharesFloat']}, raw yf={raw_spf}")
        elif raw_spf is not None:
            # Fallback: use raw value with heuristic conversion
            # If < 1, it's likely a decimal (0.41 = 41%). If >= 1, it's already percent.
            if raw_spf < 1:
                result["shortPctFloat"] = round(raw_spf * 100, 2)
            else:
                result["shortPctFloat"] = round(raw_spf, 2)

    except Exception as e:
        print(f"[AXIOM] Short interest error for {symbol}: {e}")

    cache_set(cache_key, result)
    return result


def fetch_fundamentals(symbol):
    """
    Fetch company fundamentals: P/E, market cap, revenue, debt, sector, etc.
    This is NEW data that AXIOM has never had access to.
    """
    cache_key = f"fund:{symbol}"
    cached = cache_get(cache_key, CACHE_TTL_FUNDAMENTALS)
    if cached:
        return cached

    result = {
        "symbol": symbol,
        "name": None,
        "sector": None,
        "industry": None,
        "marketCap": None,
        "pe": None,
        "forwardPe": None,
        "peg": None,
        "ps": None,
        "pb": None,
        "debtToEquity": None,
        "currentRatio": None,
        "roe": None,
        "revenueGrowth": None,
        "earningsGrowth": None,
        "profitMargin": None,
        "operatingMargin": None,
        "revenue": None,
        "netIncome": None,
        "freeCashFlow": None,
        "dividendYield": None,
        "beta": None,
        "fiftyTwoWeekHigh": None,
        "fiftyTwoWeekLow": None,
        "avgVolume": None,
        "avgVolume10d": None,
        "sharesFloat": None,
        "sharesOutstanding": None,
        "targetMeanPrice": None,
        "targetHighPrice": None,
        "targetLowPrice": None,
        "recommendationKey": None,
        "numberOfAnalysts": None,
        "earningsDate": None,
        "exDividendDate": None,
        "institutionalHolding": None,
        "insiderHolding": None,
        "source": "yfinance"
    }

    try:
        t = yf.Ticker(symbol)
        info = t.info or {}

        result["name"] = info.get("longName") or info.get("shortName")
        result["sector"] = info.get("sector")
        result["industry"] = info.get("industry")
        result["marketCap"] = info.get("marketCap")
        result["pe"] = info.get("trailingPE")
        result["forwardPe"] = info.get("forwardPE")
        result["peg"] = info.get("pegRatio")
        result["ps"] = info.get("priceToSalesTrailing12Months")
        result["pb"] = info.get("priceToBook")
        result["debtToEquity"] = info.get("debtToEquity")
        result["currentRatio"] = info.get("currentRatio")
        result["roe"] = info.get("returnOnEquity")
        result["revenueGrowth"] = info.get("revenueGrowth")
        result["earningsGrowth"] = info.get("earningsGrowth")
        result["profitMargin"] = info.get("profitMargins")
        result["operatingMargin"] = info.get("operatingMargins")
        result["revenue"] = info.get("totalRevenue")
        result["netIncome"] = info.get("netIncomeToCommon")
        result["freeCashFlow"] = info.get("freeCashflow")
        result["dividendYield"] = info.get("dividendYield")
        result["beta"] = info.get("beta")
        result["fiftyTwoWeekHigh"] = info.get("fiftyTwoWeekHigh")
        result["fiftyTwoWeekLow"] = info.get("fiftyTwoWeekLow")
        result["avgVolume"] = info.get("averageVolume")
        result["avgVolume10d"] = info.get("averageVolume10days")
        result["sharesFloat"] = info.get("floatShares")
        result["sharesOutstanding"] = info.get("sharesOutstanding")
        result["targetMeanPrice"] = info.get("targetMeanPrice")
        result["targetHighPrice"] = info.get("targetHighPrice")
        result["targetLowPrice"] = info.get("targetLowPrice")
        result["recommendationKey"] = info.get("recommendationKey")
        result["numberOfAnalysts"] = info.get("numberOfAnalystOpinions")
        result["institutionalHolding"] = info.get("heldPercentInstitutions")
        result["insiderHolding"] = info.get("heldPercentInsiders")

        # Get earnings date
        try:
            cal = t.calendar
            if cal is not None:
                if isinstance(cal, dict):
                    ed = cal.get("Earnings Date")
                    if ed and len(ed) > 0:
                        result["earningsDate"] = str(ed[0].date()) if hasattr(ed[0], "date") else str(ed[0])
                elif hasattr(cal, "iloc"):
                    result["earningsDate"] = str(cal.iloc[0, 0]) if cal.shape[1] > 0 else None
        except Exception:
            pass

        # Convert ratios from decimals to percentages for readability
        for key in ["roe", "revenueGrowth", "earningsGrowth", "profitMargin",
                     "operatingMargin", "dividendYield", "institutionalHolding", "insiderHolding"]:
            if result[key] is not None and isinstance(result[key], (int, float)):
                result[key] = round(result[key] * 100, 2)

        # Round dollar values
        for key in ["pe", "forwardPe", "peg", "ps", "pb", "beta",
                     "targetMeanPrice", "targetHighPrice", "targetLowPrice",
                     "fiftyTwoWeekHigh", "fiftyTwoWeekLow", "debtToEquity", "currentRatio"]:
            if result[key] is not None and isinstance(result[key], float):
                result[key] = round(result[key], 2)

    except Exception as e:
        print(f"[AXIOM] Fundamentals error for {symbol}: {e}")

    cache_set(cache_key, result)
    return result


def fetch_earnings_calendar(symbols):
    """
    Fetch upcoming earnings dates for a list of symbols.
    """
    cache_key = "earnings:" + ",".join(sorted(symbols))
    cached = cache_get(cache_key, CACHE_TTL_EARNINGS)
    if cached:
        return cached

    result = {}
    for sym in symbols:
        try:
            t = yf.Ticker(sym)
            cal = t.calendar
            earnings_date = None
            if cal is not None:
                if isinstance(cal, dict):
                    ed = cal.get("Earnings Date")
                    if ed and len(ed) > 0:
                        earnings_date = str(ed[0].date()) if hasattr(ed[0], "date") else str(ed[0])
                elif hasattr(cal, "iloc"):
                    earnings_date = str(cal.iloc[0, 0]) if cal.shape[1] > 0 else None

            result[sym] = {"symbol": sym, "earningsDate": earnings_date}
        except Exception:
            result[sym] = {"symbol": sym, "earningsDate": None}

    cache_set(cache_key, result)
    return result


def fetch_finra_short_volume(symbol):
    """
    Fetch FINRA daily short sale volume for a specific ticker.
    Tries the last 5 business days until it finds data.
    Python fetches directly from FINRA — no CORS proxy needed.
    """
    cache_key = f"finra:{symbol}"
    cached = cache_get(cache_key, CACHE_TTL_SHORT)
    if cached:
        return cached

    # Also get real consolidated volume from yfinance for accurate percentage
    real_vol_data = fetch_history(symbol, 5, "1d")
    real_vol_map = {}
    if real_vol_data and real_vol_data.get("bars"):
        for bar in real_vol_data["bars"]:
            real_vol_map[bar["date"]] = bar["v"]

    # Try last 5 business days
    today = datetime.now()

    for offset in range(7):
        d = today - timedelta(days=offset)
        if d.weekday() >= 5:  # Skip weekends
            continue
        ds = d.strftime("%Y%m%d")
        iso_date = d.strftime("%Y-%m-%d")
        url = f"https://cdn.finra.org/equity/regsho/daily/CNMSshvol{ds}.txt"

        try:
            req = urllib.request.Request(url, headers={"User-Agent": UA})
            resp = urllib.request.urlopen(req, timeout=10, context=SSL_CTX)
            text = resp.read().decode("utf-8", errors="ignore")
            lines = text.strip().split("\n")
            print(f"[AXIOM] FINRA file {ds}: {len(lines)} lines, first line: {lines[0][:80] if lines else 'EMPTY'}")

            total_short = 0
            total_vol = 0
            found = False

            debug_printed = False
            for line in lines[1:]:  # Skip header
                parts = line.split("|")
                if len(parts) >= 5 and parts[1].strip().upper() == symbol.upper():
                    if not debug_printed:
                        print(f"[AXIOM] FINRA raw line for {symbol}: {repr(line[:200])}")
                        print(f"[AXIOM] FINRA parts: p2={repr(parts[2])} p3={repr(parts[3])} p4={repr(parts[4])}")
                        debug_printed = True
                    try:
                        p2 = parts[2].strip()
                        p3 = parts[3].strip()
                        p4 = parts[4].strip()
                        short_ex = int(float(p2)) if p2 else 0
                        short_ot = int(float(p3)) if p3 else 0
                        tot = int(float(p4)) if p4 else 0
                        total_short += short_ex + short_ot
                        total_vol += tot
                        found = True
                    except (ValueError, IndexError):
                        continue

            print(f"[AXIOM] FINRA {ds} for {symbol}: found={found}, short={total_short}, vol={total_vol}")

            if found and total_short > 0:
                # Use real consolidated volume if available
                real_vol = real_vol_map.get(iso_date, 0)
                denom_vol = max(real_vol, total_vol)
                pct = (total_short / denom_vol * 100) if denom_vol > 0 else 0
                vol_source = "consolidated" if real_vol > total_vol else "FINRA"

                result = {
                    "symbol": symbol,
                    "date": iso_date,
                    "shortVolume": total_short,
                    "totalVolume": denom_vol,
                    "finraTotal": total_vol,
                    "pct": round(pct, 1),
                    "volSource": vol_source,
                    "source": "FINRA"
                }
                cache_set(cache_key, result)
                return result

        except Exception as e:
            print(f"[AXIOM] FINRA fetch failed for {ds}: {e}")
            continue

    return {
        "symbol": symbol, "date": None, "shortVolume": 0,
        "totalVolume": 0, "pct": 0, "volSource": "N/A",
        "error": "No FINRA data found", "source": "FINRA"
    }


def fetch_finviz_short(symbol):
    """
    Scrape Finviz for short float %, short ratio, shares short.
    Server-side — no CORS proxy needed.
    Uses a two-step approach: first visits screener to get a session cookie,
    then fetches the quote page with that cookie for reliable results.
    """
    cache_key = f"finviz:{symbol}"
    cached = cache_get(cache_key, CACHE_TTL_SHORT)
    if cached:
        return cached

    result = {
        "symbol": symbol,
        "shortFloat": None,
        "shortRatio": None,
        "sharesShort": None,
        "source": "Finviz"
    }

    import re
    import http.cookiejar

    try:
        # Build a cookie-aware opener to handle Finviz session
        cj = http.cookiejar.CookieJar()
        opener = urllib.request.build_opener(
            urllib.request.HTTPCookieProcessor(cj),
            urllib.request.HTTPSHandler(context=SSL_CTX)
        )

        base_headers = {
            "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
            "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
            "Accept-Language": "en-US,en;q=0.9",
            "Accept-Encoding": "identity",
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "Upgrade-Insecure-Requests": "1",
        }

        # Step 1: Hit the screener page first to establish session cookies
        try:
            screener_req = urllib.request.Request("https://finviz.com/screener.ashx", headers=base_headers)
            opener.open(screener_req, timeout=8)
            print(f"[AXIOM] Finviz screener pre-fetch OK, cookies: {len(cj)}")
        except Exception as e1:
            print(f"[AXIOM] Finviz screener pre-fetch failed: {e1} (continuing anyway)")

        # Step 2: Fetch the actual quote page with cookies from step 1
        url = f"https://finviz.com/quote.ashx?t={symbol}&ty=c&p=d&b=1"
        quote_req = urllib.request.Request(url, headers={
            **base_headers,
            "Referer": "https://finviz.com/screener.ashx",
            "Sec-Fetch-Dest": "document",
            "Sec-Fetch-Mode": "navigate",
            "Sec-Fetch-Site": "same-origin",
            "Sec-Fetch-User": "?1",
        })
        resp = opener.open(quote_req, timeout=10)
        html = resp.read().decode("utf-8", errors="ignore")

        print(f"[AXIOM] Finviz HTML length for {symbol}: {len(html)}")

        # Debug: check if we got a real page or a block/captcha
        if 'captcha' in html.lower() or 'access denied' in html.lower() or len(html) < 5000:
            print(f"[AXIOM] Finviz BLOCKED for {symbol} — captcha/access denied/short page")
            result["error"] = "Finviz blocked request"
            cache_set(cache_key, result)
            return result

        # Helper: find a label in Finviz HTML, grab the VALUE in the NEXT <td>
        # CRITICAL: limit search to a small window (300 chars) to avoid matching
        # values from unrelated rows when regex crosses table boundaries
        def finviz_val(label):
            idx = html.lower().find(label.lower())
            if idx < 0:
                return None
            # Only look at the next 300 chars from the label position
            chunk = html[idx:idx+300]
            # Try: label...end-td...next-td with bold value
            m = re.search(r'</td>\s*<td[^>]*>(?:<[^b][^>]*>)*<b[^>]*>([^<]+)</b>', chunk, re.I)
            if m:
                val = m.group(1).strip()
                print(f"[AXIOM] Finviz '{label}' = '{val}' (primary)")
                return val
            # Fallback: just grab the first <b>val</b> after the label
            m2 = re.search(r'<b[^>]*>([^<]+)</b>', chunk)
            if m2:
                val = m2.group(1).strip()
                # Skip if it looks like it's just the label itself repeated
                if val.lower() != label.lower():
                    print(f"[AXIOM] Finviz '{label}' = '{val}' (fallback)")
                    return val
            return None

        result["shortFloat"] = finviz_val("Short Float")
        result["shortRatio"] = finviz_val("Short Ratio")
        result["sharesShort"] = finviz_val("Shs Short") or finviz_val("Short Interest")

        # Validate: if shortFloat looks unreasonable (>100%) and we have sharesShort data,
        # try to calculate it ourselves from the Finviz data
        if result["shortFloat"]:
            try:
                sf_pct = float(str(result["shortFloat"]).replace("%", ""))
                if sf_pct > 100:
                    print(f"[AXIOM] WARNING: Finviz shortFloat={sf_pct}% seems too high, may be wrong field")
                    # Try to find "Shs Float" (total float shares) and calculate
                    shs_float_str = finviz_val("Shs Float")
                    if shs_float_str and result["sharesShort"]:
                        def parse_finviz_num(s):
                            s = str(s).strip().upper()
                            mult = 1
                            if s.endswith("B"): mult = 1e9; s = s[:-1]
                            elif s.endswith("M"): mult = 1e6; s = s[:-1]
                            elif s.endswith("K"): mult = 1e3; s = s[:-1]
                            return float(s) * mult
                        shs_float = parse_finviz_num(shs_float_str)
                        shs_short = parse_finviz_num(result["sharesShort"])
                        if shs_float > 0:
                            recalc = round((shs_short / shs_float) * 100, 2)
                            print(f"[AXIOM] Finviz recalculated: {shs_short}/{shs_float} = {recalc}%")
                            result["shortFloat"] = str(recalc) + "%"
            except Exception as ve:
                print(f"[AXIOM] Finviz validation error: {ve}")

        print(f"[AXIOM] Finviz final — SF: {result['shortFloat']}, SR: {result['shortRatio']}, SS: {result['sharesShort']}")

    except Exception as e:
        print(f"[AXIOM] Finviz scrape error for {symbol}: {e}")
        result["error"] = str(e)

    cache_set(cache_key, result)
    return result


def fetch_ticker_news(symbol):
    """
    Fetch news for a ticker from multiple free sources:
    1. Yahoo Finance (via yfinance) — most reliable
    2. Google News RSS — free, no API key
    All articles are aggressively filtered to remove spam and clickbait.
    """
    cache_key = f"news:{symbol}"
    cached = cache_get(cache_key, 300)  # 5 min cache
    if cached:
        return cached


    # Reuse the crawler's aggressive global filters for consistency
    # (CRAWLER_SPAM_SOURCES, CRAWLER_JUNK_PATTERNS, CRAWLER_TRUSTED_SOURCES,
    #  CRAWLER_OPINION_SOURCES are defined at module level)

    def is_spam(headline, source):
        """Uses the same maximum-aggression filter as the crawler."""
        return crawler_is_spam(headline, source)

    articles = []

    # 1. Yahoo Finance news via yfinance (handles both old and new API formats)
    try:
        t = yf.Ticker(symbol)
        raw_news = t.news
        news_items = []
        # New format (yfinance 1.x): dict with 'news' key containing list
        if isinstance(raw_news, dict) and "news" in raw_news:
            news_items = raw_news["news"] or []
        # Old format (yfinance 0.2.x): direct list of dicts
        elif isinstance(raw_news, list):
            news_items = raw_news
        print(f"[AXIOM] Yahoo news for {symbol}: got {len(news_items)} items (type={type(raw_news).__name__})")

        for item in news_items[:20]:
            # New format keys: content.title, content.provider.displayName, content.canonicalUrl.url
            content = item.get("content", item)  # new format nests under 'content'
            title = content.get("title", "") or item.get("title", "")
            # Publisher: new format uses provider.displayName, old uses publisher
            provider = content.get("provider", {})
            source = provider.get("displayName", "") if isinstance(provider, dict) else ""
            if not source:
                source = item.get("publisher", "")
            # URL: new format uses canonicalUrl.url, old uses link
            url_obj = content.get("canonicalUrl", {})
            link = url_obj.get("url", "") if isinstance(url_obj, dict) else ""
            if not link:
                link = item.get("link", "") or item.get("url", "")
            # Timestamp: new format uses pubDate, old uses providerPublishTime
            pub_ts = item.get("providerPublishTime", 0)
            if not pub_ts:
                pub_date_str = content.get("pubDate", "")
                if pub_date_str:
                    try:
                        pub_ts = int(parsedate_to_datetime(pub_date_str).timestamp())
                    except Exception:
                        try:
                            pub_ts = int(datetime.fromisoformat(pub_date_str.replace("Z", "+00:00")).timestamp())
                        except Exception:
                            pub_ts = 0

            if is_spam(title, source):
                continue
            if not title:
                continue
            articles.append({
                "headline": title,
                "source": source,
                "url": link,
                "created_at": datetime.fromtimestamp(pub_ts).isoformat() if pub_ts else "",
                "ts": pub_ts * 1000 if pub_ts else 0,
                "origin": "yahoo"
            })
    except Exception as e:
        print(f"[AXIOM] Yahoo news error for {symbol}: {e}")

    # 2. Google News RSS — free, no key needed
    try:
        query = urllib.parse.quote(f'"{symbol}" stock')
        url = f"https://news.google.com/rss/search?q={query}&hl=en-US&gl=US&ceid=US:en"
        req = urllib.request.Request(url, headers={"User-Agent": UA})
        resp = urllib.request.urlopen(req, timeout=8, context=SSL_CTX)
        xml = resp.read().decode("utf-8", errors="ignore")
        items = re.findall(r'<item>(.*?)</item>', xml, re.DOTALL)

        for item_xml in items[:15]:
            title_m = re.search(r'<title>(.*?)</title>', item_xml)
            link_m = re.search(r'<link/?\s*>(.*?)<', item_xml) or re.search(r'<link>(.*?)</link>', item_xml)
            date_m = re.search(r'<pubDate>(.*?)</pubDate>', item_xml)
            source_m = re.search(r'<source[^>]*>(.*?)</source>', item_xml)

            title = title_m.group(1) if title_m else ""
            title = title.replace("&amp;", "&").replace("&lt;", "<").replace("&gt;", ">").replace("&#39;", "'").replace("&quot;", '"')
            link = link_m.group(1).strip() if link_m else ""
            source = source_m.group(1) if source_m else ""
            pub_date = date_m.group(1) if date_m else ""

            if is_spam(title, source):
                continue

            # Parse date
            ts = 0
            try:
                dt = parsedate_to_datetime(pub_date)
                ts = int(dt.timestamp() * 1000)
            except Exception:
                pass

            articles.append({
                "headline": title,
                "source": source,
                "url": link,
                "created_at": pub_date,
                "ts": ts,
                "origin": "google"
            })
    except Exception as e:
        print(f"[AXIOM] Google News error for {symbol}: {e}")

    # Deduplicate by headline similarity
    seen = set()
    unique = []
    for a in articles:
        key = re.sub(r'[^a-z0-9]', '', (a["headline"] or "").lower())[:60]
        if key and key not in seen:
            seen.add(key)
            unique.append(a)

    # Sort by newest first
    unique.sort(key=lambda a: a.get("ts", 0), reverse=True)

    result = {"symbol": symbol, "articles": unique[:30], "count": len(unique), "source": "multi"}
    print(f"[AXIOM] News for {symbol}: {len(unique)} articles (yahoo+google)")
    cache_set(cache_key, result)
    return result


def fetch_news_scan(symbol):
    """
    Search Google News RSS for short seller reports mentioning a ticker.
    Server-side fetch — no CORS proxy needed.
    Returns list of matching articles with seller detection.
    """
    cache_key = f"newsscan:{symbol}"
    cached = cache_get(cache_key, CACHE_TTL_SHORT)
    if cached:
        return cached

    SHORT_SELLER_NAMES = [
        "hindenburg", "muddy waters", "citron", "wolfpack", "spruce point",
        "kerrisdale", "grizzly", "iceberg", "gotham city", "blue orca",
        "viceroy", "quintessential", "bonitas", "fuzzy panda", "white diamond",
        "culper", "jehoshaphat", "bucephalus", "scorpion capital", "hunterbrook"
    ]
    SHORT_KEYWORDS = [
        "short report", "short attack", "short seller", "shorting",
        "bear case", "overvalued", "fraud"
    ]

    result = {"symbol": symbol, "articles": [], "source": "google_news"}


    try:
        query = f'"{symbol}" short seller OR "short report" OR Hindenburg OR "Muddy Waters" OR Citron'
        encoded = urllib.parse.quote(query)
        url = f"https://news.google.com/rss/search?q={encoded}&hl=en-US&gl=US&ceid=US:en"

        req = urllib.request.Request(url, headers={"User-Agent": UA})
        resp = urllib.request.urlopen(req, timeout=10, context=SSL_CTX)
        xml = resp.read().decode("utf-8", errors="ignore")

        # Simple XML parsing without external dependencies
        import re
        items = re.findall(r'<item>(.*?)</item>', xml, re.DOTALL)

        for item_xml in items[:20]:
            title_m = re.search(r'<title>(.*?)</title>', item_xml)
            link_m = re.search(r'<link/?\s*>(.*?)<', item_xml) or re.search(r'<link>(.*?)</link>', item_xml)
            date_m = re.search(r'<pubDate>(.*?)</pubDate>', item_xml)

            title = title_m.group(1) if title_m else ""
            link = link_m.group(1).strip() if link_m else ""
            pub_date = date_m.group(1) if date_m else ""

            # Unescape HTML entities
            title = title.replace("&amp;", "&").replace("&lt;", "<").replace("&gt;", ">").replace("&#39;", "'").replace("&quot;", '"')

            title_lower = title.lower()

            # Check relevance
            seller_name = ""
            is_relevant = False
            for sn in SHORT_SELLER_NAMES:
                if sn in title_lower:
                    is_relevant = True
                    seller_name = sn.title()
                    break
            if not is_relevant:
                for kw in SHORT_KEYWORDS:
                    if kw in title_lower:
                        is_relevant = True
                        break

            if is_relevant:
                result["articles"].append({
                    "title": title,
                    "link": link,
                    "pubDate": pub_date,
                    "sellerName": seller_name
                })

            if len(result["articles"]) >= 5:
                break

    except Exception as e:
        print(f"[AXIOM] News scan error for {symbol}: {e}")
        result["error"] = str(e)

    cache_set(cache_key, result)
    return result


def fetch_short_seller_feeds():
    """
    Fetch RSS feeds from known short seller research firms.
    Returns recent reports across all firms.
    """
    cache_key = "shortseller_feeds"
    cached = cache_get(cache_key, CACHE_TTL_SHORT)
    if cached:
        return cached

    # Known short seller RSS feeds
    SELLERS = [
        {"name": "Hindenburg Research", "rss": "https://hindenburgresearch.com/feed/", "color": "#ff2d55"},
        {"name": "Citron Research", "rss": None, "url": "https://citronresearch.com", "color": "#ff9500"},
        {"name": "Muddy Waters", "rss": None, "url": "https://www.muddywatersresearch.com", "color": "#ff3b30"},
        {"name": "Spruce Point", "rss": "https://www.sprucepointcap.com/feed/", "color": "#5856d6"},
        {"name": "Wolfpack Research", "rss": None, "url": "https://wolfpackresearch.com", "color": "#af52de"},
        {"name": "Kerrisdale Capital", "rss": None, "url": "https://www.kerrisdalecap.com", "color": "#ff6b6b"},
    ]


    all_reports = []

    for seller in SELLERS:
        feed_url = seller.get("rss") or seller.get("url")
        if not feed_url:
            continue

        try:
            req = urllib.request.Request(feed_url, headers={
                "User-Agent": UA
            })
            resp = urllib.request.urlopen(req, timeout=10, context=SSL_CTX)
            text = resp.read().decode("utf-8", errors="ignore")

            if seller.get("rss"):
                # Parse RSS
                items = re.findall(r'<item>(.*?)</item>', text, re.DOTALL)
                for item_xml in items[:10]:
                    title_m = re.search(r'<title>(.*?)</title>', item_xml)
                    link_m = re.search(r'<link/?\s*>(.*?)<', item_xml) or re.search(r'<link>(.*?)</link>', item_xml)
                    date_m = re.search(r'<pubDate>(.*?)</pubDate>', item_xml)
                    desc_m = re.search(r'<description>(.*?)</description>', item_xml, re.DOTALL)

                    title = title_m.group(1) if title_m else ""
                    title = re.sub(r'<!\[CDATA\[(.*?)\]\]>', r'\1', title)
                    title = title.replace("&amp;", "&").replace("&lt;", "<").replace("&gt;", ">")

                    link = link_m.group(1).strip() if link_m else seller.get("url", "")
                    pub_date = date_m.group(1) if date_m else ""
                    desc = desc_m.group(1) if desc_m else ""
                    desc = re.sub(r'<[^>]+>', '', desc)[:200]

                    # Extract ticker symbols
                    tickers = re.findall(r'\$([A-Z]{1,5})', title) + re.findall(r'\(([A-Z]{1,5})\)', title)

                    all_reports.append({
                        "seller": seller["name"],
                        "color": seller["color"],
                        "title": title,
                        "link": link,
                        "pubDate": pub_date,
                        "desc": desc,
                        "tickers": list(set(tickers))
                    })
            else:
                # Parse HTML page for links
                links = re.findall(r'<a[^>]+href=["\']([^"\']+)["\'][^>]*>([^<]{15,200})</a>', text)
                seen = set()
                count = 0
                for href, text_content in links:
                    text_content = text_content.strip()
                    if text_content not in seen and href.startswith("http") and count < 5:
                        seen.add(text_content)
                        tickers = re.findall(r'\$([A-Z]{1,5})', text_content) + re.findall(r'\(([A-Z]{1,5})\)', text_content)
                        all_reports.append({
                            "seller": seller["name"],
                            "color": seller["color"],
                            "title": text_content,
                            "link": href,
                            "pubDate": "Recent",
                            "desc": "",
                            "tickers": list(set(tickers))
                        })
                        count += 1

        except Exception as e:
            print(f"[AXIOM] Short seller feed error for {seller['name']}: {e}")
            continue

    result = {"reports": all_reports, "count": len(all_reports), "source": "rss_feeds"}
    cache_set(cache_key, result)
    return result


def fetch_trading_halts():
    """
    Fetch current trading halts from NASDAQ trader RSS feed.
    Returns list of halts with ticker, time, reason, status.
    """
    cache_key = "halts:all"
    cached = cache_get(cache_key, 30)  # 30s cache — halts are time-critical
    if cached:
        return cached


    # Halt reason codes → human-readable descriptions
    HALT_REASONS = {
        "T1": "News Pending",
        "T2": "News Released",
        "T3": "News & Resumption Times",
        "T5": "Single Stock Trading Pause (10% move in 5 min)",
        "T6": "Extraordinary Market Activity",
        "T8": "ETF Halt",
        "T12": "IPO Halt — Not Yet Trading",
        "H4": "Non-Compliance Halt",
        "H9": "Not Current in Required Filings",
        "H10": "SEC Trading Suspension",
        "H11": "Regulatory Concern",
        "LUDP": "Volatility Trading Pause (LULD)",
        "LUDS": "Volatility Straddle (LULD)",
        "M1": "Corporate Action",
        "M2": "Quotation Not Available",
        "IPO1": "IPO — Not Yet Trading",
        "R1": "New Issue Available",
        "R2": "Issue Available",
        "R4": "Qualifications Issues Reviewed",
        "D": "Deficiency — Below Listing Standards",
    }

    halts = []

    try:
        url = "https://www.nasdaqtrader.com/rss.aspx?feed=tradehalts"
        req = urllib.request.Request(url, headers={"User-Agent": UA})
        resp = urllib.request.urlopen(req, timeout=10, context=SSL_CTX)
        xml = resp.read().decode("utf-8", errors="ignore")

        items = re.findall(r'<item>(.*?)</item>', xml, re.DOTALL)
        for item_xml in items:
            title_m = re.search(r'<title>(.*?)</title>', item_xml)
            desc_m = re.search(r'<description>(.*?)</description>', item_xml, re.DOTALL)
            pub_m = re.search(r'<pubDate>(.*?)</pubDate>', item_xml)

            title = title_m.group(1).strip() if title_m else ""
            desc = desc_m.group(1).strip() if desc_m else ""
            pub_date = pub_m.group(1).strip() if pub_m else ""

            # Parse the description — NASDAQ uses pipe-delimited or HTML table format
            # Typical: "Halt Date|Halt Time|Symbol|Name|Market|Reason|..."
            # Or embedded HTML table
            symbol = ""
            halt_time = ""
            halt_date = ""
            reason_code = ""
            resume_time = ""
            resume_date = ""
            name = ""
            market = ""

            # Try parsing from title first (format: "Symbol - Reason")
            if " - " in title:
                parts = title.split(" - ", 1)
                symbol = parts[0].strip()

            # Parse description fields — try HTML table format
            tds = re.findall(r'<td[^>]*>(.*?)</td>', desc, re.DOTALL)
            if len(tds) >= 6:
                halt_date = tds[0].strip() if len(tds) > 0 else ""
                halt_time = tds[1].strip() if len(tds) > 1 else ""
                symbol = tds[2].strip() if len(tds) > 2 else symbol
                name = tds[3].strip() if len(tds) > 3 else ""
                market = tds[4].strip() if len(tds) > 4 else ""
                reason_code = tds[5].strip() if len(tds) > 5 else ""
                if len(tds) > 6:
                    resume_date = tds[6].strip() if len(tds) > 6 else ""
                if len(tds) > 7:
                    resume_time = tds[7].strip() if len(tds) > 7 else ""
            else:
                # Try pipe-delimited
                fields = desc.replace("&amp;", "&").split("|")
                if len(fields) >= 6:
                    halt_date = fields[0].strip()
                    halt_time = fields[1].strip()
                    symbol = fields[2].strip()
                    name = fields[3].strip()
                    market = fields[4].strip()
                    reason_code = fields[5].strip()
                    if len(fields) > 6:
                        resume_date = fields[6].strip()
                    if len(fields) > 7:
                        resume_time = fields[7].strip()

            if not symbol:
                continue

            # Clean up HTML entities
            symbol = re.sub(r'<[^>]+>', '', symbol).strip()
            name = re.sub(r'<[^>]+>', '', name).strip()

            reason_text = HALT_REASONS.get(reason_code, reason_code)
            is_resumed = bool(resume_time and resume_time.strip())

            # Only keep volatility and large market move halts
            # LUDP = Limit Up-Limit Down pause, LUDS = LULD straddle
            # T5 = single stock circuit breaker, T6 = market-wide circuit breaker
            VOLATILITY_CODES = {"LUDP", "LUDS", "T5", "T6"}
            if reason_code not in VOLATILITY_CODES:
                continue

            # Parse timestamp
            ts = 0
            try:
                ts = int(parsedate_to_datetime(pub_date).timestamp() * 1000)
            except Exception:
                pass

            halts.append({
                "symbol": symbol,
                "name": name,
                "market": market,
                "haltDate": halt_date,
                "haltTime": halt_time,
                "reasonCode": reason_code,
                "reasonText": reason_text,
                "resumeDate": resume_date,
                "resumeTime": resume_time,
                "resumed": is_resumed,
                "pubDate": pub_date,
                "ts": ts
            })

    except Exception as e:
        print(f"[AXIOM] Trading halts error: {e}")

    result = {"halts": halts, "count": len(halts), "source": "nasdaqtrader"}
    print(f"[AXIOM] Trading halts: {len(halts)} halts found")
    cache_set(cache_key, result)
    return result


def fetch_extended_hours(symbol):
    """
    Fetch pre-market and after-hours data using yfinance.
    Uses ticker.info for pre/post market prices AND
    history(prepost=True) for detailed bars with H/L/V.
    This replaces the broken Polygon/Alpaca extended hours approach.
    """
    cache_key = f"extended:{symbol}"
    cached = cache_get(cache_key, 30)  # 30s cache — extended hours prices move
    if cached:
        return cached

    result = {
        "symbol": symbol,
        "regularMarketPrice": None,
        "regularMarketClose": None,
        "regularMarketVolume": None,
        "avgVolume": None,
        "previousClose": None,
        "preMarket": {
            "price": None, "change": None, "changePct": None,
            "high": None, "low": None, "volume": None
        },
        "afterHours": {
            "price": None, "change": None, "changePct": None,
            "high": None, "low": None, "volume": None
        },
        "source": "yfinance"
    }

    try:
        t = yf.Ticker(symbol)

        # ── Step 1: Get reference prices (regular close + previous close) ──
        reg_price = None
        prev_close = None
        reg_volume = None
        try:
            fi = t.fast_info
            reg_price = float(fi.last_price) if fi.last_price else None
            prev_close = float(fi.previous_close) if fi.previous_close else None
            # Official consolidated session volume (NOT a sum of intraday bars,
            # which undercounts off-exchange/auction prints).
            lv = getattr(fi, "last_volume", None)
            if lv:
                reg_volume = int(lv)
        except Exception as e:
            print(f"[AXIOM] Extended fast_info error: {e}")

        info = {}
        try:
            info = t.info or {}
        except Exception as e:
            print(f"[AXIOM] Extended info error: {e}")

        if not reg_price:
            reg_price = info.get("regularMarketPrice") or info.get("currentPrice")
        if not prev_close:
            prev_close = info.get("regularMarketPreviousClose") or info.get("previousClose")
        if not reg_volume:
            reg_volume = info.get("regularMarketVolume") or info.get("volume")
        if reg_volume:
            result["regularMarketVolume"] = int(reg_volume)

        # regularMarketClose = today's close (the reference for after-hours change)
        reg_close = info.get("regularMarketPrice") or reg_price
        result["regularMarketPrice"] = round(float(reg_price), 2) if reg_price else None
        result["previousClose"] = round(float(prev_close), 2) if prev_close else None
        result["regularMarketClose"] = round(float(reg_close), 2) if reg_close else result["regularMarketPrice"]

        # ── Step 2: Get BARS with timestamps (most reliable source) ──
        # Bars tell us EXACTLY when trades happened — no stale data ambiguity
        pre_bars = []
        ah_bars = []
        reg_bars = []
        try:
            hist = t.history(period="5d", interval="5m", prepost=True)
            if hist is not None and len(hist) > 0:
                # Only use TODAY's bars (most recent trading day)
                last_date = hist.index[-1].date()
                for idx, row in hist.iterrows():
                    if idx.date() != last_date:
                        continue
                    try:
                        ts = idx.tz_convert('America/New_York')
                    except Exception:
                        ts = idx
                    mins = ts.hour * 60 + ts.minute

                    bar_h = float(row.get("High", 0))
                    bar_l = float(row.get("Low", 0))
                    bar_c = float(row.get("Close", 0))
                    bar_v = int(row.get("Volume", 0))
                    bar = {"h": bar_h, "l": bar_l, "v": bar_v, "c": bar_c}

                    if mins >= 240 and mins < 570:       # 4:00 AM - 9:30 AM = pre-market
                        pre_bars.append(bar)
                    elif mins >= 570 and mins < 960:      # 9:30 AM - 4:00 PM = regular
                        reg_bars.append(bar)
                    elif mins >= 960 and mins < 1200:     # 4:00 PM - 8:00 PM = after-hours
                        ah_bars.append(bar)

                print(f"[AXIOM] Extended bars for {symbol} (date={last_date}): pre={len(pre_bars)}, reg={len(reg_bars)}, ah={len(ah_bars)}, total={len(hist)}")
        except Exception as e:
            print(f"[AXIOM] Extended bars error for {symbol}: {e}")

        # Use last regular bar close as the most accurate regular close
        if reg_bars:
            reg_close = reg_bars[-1]["c"]
            result["regularMarketClose"] = round(reg_close, 2)
            # Fallback only: if the official consolidated volume wasn't available,
            # approximate from today's regular 5m bars. (Bar sums undercount
            # off-exchange/auction prints, so this is a last resort.)
            if not result["regularMarketVolume"]:
                result["regularMarketVolume"] = sum(b["v"] for b in reg_bars)

        # Average daily volume (for RVOL denominator on the Analysis page)
        try:
            avg_vol = info.get("averageVolume") or info.get("averageVolume10days")
            if avg_vol:
                result["avgVolume"] = int(avg_vol)
        except Exception:
            pass

        # ── Step 3: Build pre-market data from BARS (primary) ──
        if pre_bars:
            pre_price = round(pre_bars[-1]["c"], 2)  # Latest pre-market trade
            result["preMarket"]["price"] = pre_price
            result["preMarket"]["high"] = round(max(b["h"] for b in pre_bars), 2)
            low_bars = [b["l"] for b in pre_bars if b["l"] > 0]
            result["preMarket"]["low"] = round(min(low_bars), 2) if low_bars else None
            result["preMarket"]["volume"] = sum(b["v"] for b in pre_bars)
        else:
            # Fallback: use info fields only if they look reasonable
            info_pre = info.get("preMarketPrice")
            if info_pre and info_pre > 0 and prev_close:
                # Sanity check: pre-market price should be within ~10% of prev close
                pct_diff = abs(info_pre - prev_close) / prev_close
                if pct_diff < 0.15:
                    result["preMarket"]["price"] = round(float(info_pre), 2)
                    print(f"[AXIOM] Using info preMarketPrice={info_pre} (passed sanity check)")
                else:
                    print(f"[AXIOM] Rejecting stale info preMarketPrice={info_pre} (pct_diff={pct_diff:.2%} vs prevClose={prev_close})")

        # ── Step 4: Build after-hours data from BARS (primary) ──
        if ah_bars:
            ah_price = round(ah_bars[-1]["c"], 2)  # Latest after-hours trade
            result["afterHours"]["price"] = ah_price
            result["afterHours"]["high"] = round(max(b["h"] for b in ah_bars), 2)
            low_bars = [b["l"] for b in ah_bars if b["l"] > 0]
            result["afterHours"]["low"] = round(min(low_bars), 2) if low_bars else None
            result["afterHours"]["volume"] = sum(b["v"] for b in ah_bars)
        else:
            # Fallback: use info postMarketPrice
            info_post = info.get("postMarketPrice")
            if info_post and info_post > 0:
                result["afterHours"]["price"] = round(float(info_post), 2)

        # ── Step 5: ALWAYS calculate change/changePct ourselves ──
        # Never trust yfinance's preMarketChangePercent/postMarketChangePercent —
        # the format is inconsistent and causes bugs (e.g., -32.68% instead of -0.35%)

        # Pre-market change = vs previous close
        if result["preMarket"]["price"] and prev_close and prev_close > 0:
            pp = result["preMarket"]["price"]
            pc = float(prev_close)
            result["preMarket"]["change"] = round(pp - pc, 2)
            result["preMarket"]["changePct"] = round((pp - pc) / pc * 100, 2)

        # After-hours change = vs regular market close (today's close)
        if result["afterHours"]["price"] and reg_close and reg_close > 0:
            ap = result["afterHours"]["price"]
            rc = float(reg_close)
            result["afterHours"]["change"] = round(ap - rc, 2)
            result["afterHours"]["changePct"] = round((ap - rc) / rc * 100, 2)

        print(f"[AXIOM] Extended FINAL: pre=${result['preMarket']['price']} ({result['preMarket']['changePct']}%), ah=${result['afterHours']['price']} ({result['afterHours']['changePct']}%), reg=${result['regularMarketPrice']}, prevClose={prev_close}")

    except Exception as e:
        print(f"[AXIOM] Extended hours FATAL error for {symbol}: {e}")
        traceback.print_exc()
        result["error"] = str(e)

    cache_set(cache_key, result)
    return result


ALLOWED_PROXY_DOMAINS = [
    "finnhub.io",
    "api.polygon.io",
    "api.nasdaq.com",
    "query1.finance.yahoo.com",
    "nfs.faireconomy.media",
    "sec.gov",
    "www.sec.gov",
    "efts.sec.gov",
    "apewisdom.io",
    "api.tradestie.com",
]

def fetch_proxy(url):
    """
    General-purpose server-side proxy for whitelisted API domains.
    Replaces browser CORS proxies for Finnhub and Polygon calls.
    Only allows requests to whitelisted domains for security.
    """
    # Security: only proxy to allowed domains
    parsed = urllib.parse.urlparse(url)
    domain = parsed.hostname or ""
    allowed = False
    for d in ALLOWED_PROXY_DOMAINS:
        if domain == d or domain.endswith("." + d):
            allowed = True
            break

    if not allowed:
        return {"error": f"Domain not allowed: {domain}", "allowed": ALLOWED_PROXY_DOMAINS}

    # Check cache
    cache_key = f"proxy:{url}"
    cached = cache_get(cache_key, 30)  # 30s cache for proxied requests
    if cached:
        return cached


    try:
        # Use appropriate Accept header and User-Agent based on domain
        accept = "application/json"
        agent = UA
        if "sec.gov" in domain:
            # SEC requires identifying User-Agent with email, blocks browser UAs
            agent = "AXIOM-Crawler/1.0 jyeager89@hotmail.com"
            if "efts.sec.gov" in domain:
                # EFTS full-text search API returns JSON
                accept = "application/json"
            else:
                # www.sec.gov Atom feeds return XML
                accept = "application/atom+xml, application/xml, text/xml, */*"
        req = urllib.request.Request(url, headers={
            "User-Agent": agent,
            "Accept": accept
        })
        resp = urllib.request.urlopen(req, timeout=10, context=SSL_CTX)
        text = resp.read().decode("utf-8", errors="ignore")
        data = json.loads(text)
        cache_set(cache_key, data)
        return data
    except json.JSONDecodeError:
        # Return raw text wrapped in JSON for non-JSON responses
        cache_set(cache_key, {"_raw": text})
        return {"_raw": text}
    except Exception as e:
        return {"error": str(e)}


# ═══════════════════════════════════════════════════════════════
# HTTP REQUEST HANDLER
# ═══════════════════════════════════════════════════════════════

class AXIOMHandler(http.server.SimpleHTTPRequestHandler):
    """
    Serves static files AND handles /yf/ API endpoints.
    """

    def do_OPTIONS(self):
        """Handle CORS preflight requests."""
        self.send_response(204)
        self.send_header("Access-Control-Allow-Origin", "*")
        self.send_header("Access-Control-Allow-Methods", "GET, POST, DELETE, PATCH, OPTIONS")
        self.send_header("Access-Control-Allow-Headers", "Content-Type, APCA-API-KEY-ID, APCA-API-SECRET-KEY")
        self.send_header("Access-Control-Max-Age", "86400")
        self.end_headers()

    def _handle_write_method(self, method):
        """Route POST/DELETE/PATCH to Alpaca proxy or AI endpoints."""
        path = urllib.parse.urlparse(self.path).path
        if path.startswith("/alpaca/"):
            self._alpaca_proxy(method)
        elif path == "/yf/ai-analyze" and method == "POST":
            self.handle_ai_analyze()
        elif path == "/yf/ethics-rating" and method == "POST":
            self.handle_ethics_rating()
        elif path == "/yf/ai-research" and method == "POST":
            self.handle_ai_research()
        else:
            self.send_error_json("Not found", 404)

    def do_POST(self):   self._handle_write_method("POST")
    def do_DELETE(self):  self._handle_write_method("DELETE")
    def do_PATCH(self):   self._handle_write_method("PATCH")

    def do_GET(self):
        parsed = urllib.parse.urlparse(self.path)
        path = parsed.path
        params = urllib.parse.parse_qs(parsed.query)

        # ── ALPACA PROXY ─────────────────────────────────────
        if path.startswith("/alpaca/"):
            self._alpaca_proxy("GET")
            return

        # ── API ROUTES ──────────────────────────────────────
        if path == "/yf/batch":
            self.handle_batch(params)
        elif path == "/yf/history":
            self.handle_history(params)
        elif path == "/yf/short":
            self.handle_short(params)
        elif path == "/yf/fundamentals":
            self.handle_fundamentals(params)
        elif path == "/yf/earnings":
            self.handle_earnings(params)
        elif path == "/yf/finra":
            self.handle_finra(params)
        elif path == "/yf/finviz":
            self.handle_finviz(params)
        elif path == "/yf/news":
            self.handle_news(params)
        elif path == "/yf/news-scan":
            self.handle_news_scan(params)
        elif path == "/yf/short-sellers":
            self.handle_short_sellers()
        elif path == "/yf/extended":
            self.handle_extended(params)
        elif path == "/yf/halts":
            self.handle_halts()
        elif path == "/yf/proxy":
            self.handle_proxy(params)
        elif path == "/yf/crawler":
            self.handle_crawler(params)
        elif path == "/yf/delisted":
            self.handle_delisted()
        elif path == "/yf/status":
            self.handle_status()
        elif path == "/yf/search":
            self.handle_search(params)
        else:
            # ── STATIC FILE SERVING ─────────────────────────
            super().do_GET()

    def send_json(self, data, status=200):
        """Send a JSON response with CORS headers."""
        body = json.dumps(data, default=str).encode("utf-8")
        self.send_response(status)
        self.send_header("Content-Type", "application/json")
        self.send_header("Access-Control-Allow-Origin", "*")
        self.send_header("Content-Length", str(len(body)))
        self.end_headers()
        self.wfile.write(body)

    def send_error_json(self, message, status=400):
        self.send_json({"error": message}, status)

    def _alpaca_proxy(self, method):
        """
        Proxy requests to Alpaca APIs — eliminates all CORS issues.
        /alpaca/trade/v2/account  → https://paper-api.alpaca.markets/v2/account
        /alpaca/data/v2/stocks/...  → https://data.alpaca.markets/v2/stocks/...
        """
        parsed = urllib.parse.urlparse(self.path)
        # Path format: /alpaca/{trade|data}/rest/of/path
        parts = parsed.path.split("/", 3)  # ['', 'alpaca', 'trade|data', 'v2/...']
        if len(parts) < 4:
            self.send_error_json("Invalid alpaca proxy path", 400)
            return

        target_type = parts[2]  # 'trade' or 'data'
        rest_path = "/" + parts[3]  # '/v2/account' etc.
        if parsed.query:
            rest_path += "?" + parsed.query

        if target_type == "trade":
            base = "https://paper-api.alpaca.markets"
        elif target_type == "data":
            base = "https://data.alpaca.markets"
        else:
            self.send_error_json("Invalid target: use 'trade' or 'data'", 400)
            return

        url = base + rest_path

        # Forward Alpaca auth headers
        key_id = self.headers.get("APCA-API-KEY-ID", "")
        secret = self.headers.get("APCA-API-SECRET-KEY", "")

        if not key_id or not secret:
            self.send_error_json("Missing Alpaca API keys in headers", 401)
            return

        # Read request body for POST/PATCH
        body = None
        content_length = int(self.headers.get("Content-Length", 0))
        if content_length > 0:
            body = self.rfile.read(content_length)

        try:
            req = urllib.request.Request(url, data=body, method=method)
            req.add_header("APCA-API-KEY-ID", key_id)
            req.add_header("APCA-API-SECRET-KEY", secret)
            req.add_header("Accept", "application/json")
            if body:
                req.add_header("Content-Type", "application/json")

            ctx = ssl.create_default_context()
            resp = urllib.request.urlopen(req, timeout=15, context=ctx)
            resp_body = resp.read()

            self.send_response(resp.status)
            self.send_header("Content-Type", "application/json")
            self.send_header("Access-Control-Allow-Origin", "*")
            self.send_header("Content-Length", str(len(resp_body)))
            self.end_headers()
            self.wfile.write(resp_body)

        except urllib.error.HTTPError as e:
            err_body = e.read().decode("utf-8", errors="replace")
            self.send_response(e.code)
            self.send_header("Content-Type", "application/json")
            self.send_header("Access-Control-Allow-Origin", "*")
            resp_bytes = err_body.encode("utf-8")
            self.send_header("Content-Length", str(len(resp_bytes)))
            self.end_headers()
            self.wfile.write(resp_bytes)

        except Exception as e:
            print(f"[AXIOM] Alpaca proxy error: {e}")
            self.send_error_json(f"Proxy error: {str(e)}", 502)

    def handle_batch(self, params):
        """GET /yf/batch?symbols=AAPL,TSLA,GPN"""
        symbols_str = params.get("symbols", [""])[0]
        if not symbols_str:
            self.send_error_json("Missing 'symbols' parameter")
            return

        symbols = [s.strip().upper() for s in symbols_str.split(",") if s.strip()]
        if len(symbols) > 50:
            symbols = symbols[:50]  # Cap at 50 per request

        data = fetch_batch_quotes(symbols)
        self.send_json(data)

    def handle_history(self, params):
        """GET /yf/history?symbol=GPN&days=90&interval=1d"""
        symbol = params.get("symbol", [""])[0].strip().upper()
        if not symbol:
            self.send_error_json("Missing 'symbol' parameter")
            return

        days = int(params.get("days", ["90"])[0])
        interval = params.get("interval", ["1d"])[0]

        # Validate interval
        valid_intervals = ["1m", "2m", "5m", "15m", "30m", "60m", "90m", "1h", "1d", "5d", "1wk", "1mo"]
        if interval not in valid_intervals:
            interval = "1d"

        data = fetch_history(symbol, days, interval)
        self.send_json(data)

    def handle_short(self, params):
        """GET /yf/short?symbol=SOFI"""
        symbol = params.get("symbol", [""])[0].strip().upper()
        if not symbol:
            self.send_error_json("Missing 'symbol' parameter")
            return

        data = fetch_short_interest(symbol)
        self.send_json(data)

    def handle_fundamentals(self, params):
        """GET /yf/fundamentals?symbol=SOFI"""
        symbol = params.get("symbol", [""])[0].strip().upper()
        if not symbol:
            self.send_error_json("Missing 'symbol' parameter")
            return

        data = fetch_fundamentals(symbol)
        self.send_json(data)

    def handle_earnings(self, params):
        """GET /yf/earnings?symbols=AAPL,TSLA"""
        symbols_str = params.get("symbols", [""])[0]
        if not symbols_str:
            self.send_error_json("Missing 'symbols' parameter")
            return

        symbols = [s.strip().upper() for s in symbols_str.split(",") if s.strip()]
        if len(symbols) > 30:
            symbols = symbols[:30]

        data = fetch_earnings_calendar(symbols)
        self.send_json(data)

    def _get_symbol(self, params):
        """Extract and validate symbol param. Returns uppercase symbol or None (sends error)."""
        sym = params.get("symbol", [""])[0].strip().upper()
        if not sym:
            self.send_error_json("Missing 'symbol' parameter")
            return None
        return sym

    def _symbol_route(self, params, fetcher):
        """Common pattern: extract symbol → call fetcher → return JSON."""
        sym = self._get_symbol(params)
        if sym:
            self.send_json(fetcher(sym))

    def handle_finra(self, params):      self._symbol_route(params, fetch_finra_short_volume)
    def handle_finviz(self, params):     self._symbol_route(params, fetch_finviz_short)
    def handle_news(self, params):       self._symbol_route(params, fetch_ticker_news)
    def handle_news_scan(self, params):  self._symbol_route(params, fetch_news_scan)
    def handle_extended(self, params):   self._symbol_route(params, fetch_extended_hours)
    def handle_short_sellers(self):      self.send_json(fetch_short_seller_feeds())
    def handle_halts(self):              self.send_json(fetch_trading_halts())

    def handle_proxy(self, params):
        url = params.get("url", [""])[0]
        if not url:
            self.send_error_json("Missing 'url' parameter")
            return
        data = fetch_proxy(url)
        self.send_json(data, 403 if ("error" in data and "allowed" in data) else 200)

    def handle_crawler(self, params):
        sym = params.get("symbol", [""])[0].strip().upper() or None
        self.send_json(get_crawler_data(sym))

    def handle_delisted(self):
        """GET /yf/delisted — returns list of tickers confirmed as delisted."""
        with delisted_lock:
            self.send_json({
                "delisted": sorted(list(delisted_confirmed)),
                "suspect": {sym: cnt for sym, cnt in delisted_tracker.items()
                            if cnt >= 1 and sym not in delisted_confirmed},
            })

    def handle_status(self):
        self.send_json({
            "status": "ok", "version": "3.0",
            "cache_entries": len(cache),
            "uptime": int(time.time() - SERVER_START),
            "crawler_scans": crawler_store.get("scan_count", 0),
            "crawler_sources": len(crawler_store.get("sources_ok", [])),
        })

    def handle_ai_analyze(self):
        """POST /yf/ai-analyze — Batch-analyze SEC filings via Anthropic API.
        Proxies through the server to avoid browser CORS issues."""
        try:
            length = int(self.headers.get("Content-Length", 0))
            body = json.loads(self.rfile.read(length).decode("utf-8"))
        except Exception:
            self.send_error_json("Invalid JSON body", 400)
            return

        api_key = body.get("key", "")
        filings = body.get("filings", [])
        model = body.get("model", "claude-sonnet-4-20250514")

        if not api_key:
            self.send_error_json("Missing Anthropic API key", 400)
            return
        if not filings:
            self.send_error_json("No filings to analyze", 400)
            return

        # Build a single batched prompt for all filings
        filing_lines = []
        for i, f in enumerate(filings[:20]):  # Cap at 20 to keep prompt reasonable
            filing_lines.append(
                f"{i+1}. COMPANY: {f.get('company','Unknown')} | "
                f"FORM: {f.get('formType','8-K')} | "
                f"FILED: {f.get('date','')} | "
                f"TICKER: {f.get('ticker','')} | "
                f"SUMMARY: {f.get('summary','')}"
            )

        prompt = (
            "You are a day-trading analyst. Analyze each SEC 8-K filing below. "
            "For EACH filing, determine the trading impact.\n\n"
            "FILINGS:\n" + "\n".join(filing_lines) + "\n\n"
            "Respond with a JSON array where each element has:\n"
            '{"idx": 1, "sentiment": "BULLISH|BEARISH|NEUTRAL", '
            '"impact": "HIGH|MEDIUM|LOW", '
            '"what_happened": "1-2 sentence explanation", '
            '"stock_impact": "How this affects the stock", '
            '"trade_idea": "Actionable trade setup if any", '
            '"watch_for": "What to monitor next"}\n\n'
            "Return ONLY a valid JSON array, no other text."
        )

        try:
            req_body = json.dumps({
                "model": model,
                "max_tokens": 3000,
                "messages": [{"role": "user", "content": prompt}]
            }).encode("utf-8")

            req = urllib.request.Request(
                "https://api.anthropic.com/v1/messages",
                data=req_body,
                headers={
                    "Content-Type": "application/json",
                    "x-api-key": api_key,
                    "anthropic-version": "2023-06-01",
                },
            )
            resp = urllib.request.urlopen(req, timeout=60, context=SSL_CTX)
            data = json.loads(resp.read().decode("utf-8"))

            text = ""
            if "content" in data and len(data["content"]) > 0:
                text = data["content"][0].get("text", "")

            # Robust JSON extraction — find the array in the response
            text = text.replace("```json", "").replace("```", "").strip()
            arr_start = text.find("[")
            arr_end = text.rfind("]")
            if arr_start >= 0 and arr_end > arr_start:
                text = text[arr_start:arr_end + 1]

            analyses = json.loads(text)
            self.send_json({"analyses": analyses})

        except urllib.error.HTTPError as e:
            err_body = e.read().decode("utf-8", errors="ignore")[:200]
            print(f"[AXIOM] AI analyze API error {e.code}: {err_body}")
            self.send_error_json(f"Anthropic API error {e.code}: {err_body}", e.code)
        except json.JSONDecodeError as e:
            print(f"[AXIOM] AI analyze JSON parse failed: {text[:200]}")
            self.send_error_json(f"Could not parse AI response as JSON", 500)
        except Exception as e:
            print(f"[AXIOM] AI analyze error: {e}")
            self.send_error_json(str(e), 500)

    def handle_search(self, params):
        """GET /yf/search?q=AAPL — Search for tickers.
        Uses local SEC EDGAR ticker database for fast prefix matching,
        supplemented by Yahoo Finance for fuzzy/name matching."""
        query = params.get("q", [""])[0].strip()
        if not query:
            self.send_json([])
            return

        # Step 1: Local ticker DB — instant prefix matching (exact > prefix > name contains)
        local_results = search_ticker_db(query, limit=12)

        # Step 2: Try Yahoo Finance for additional fuzzy/relevance matches
        yahoo_results = []
        try:
            url = f"https://query1.finance.yahoo.com/v1/finance/search?q={urllib.parse.quote(query)}&quotesCount=10&newsCount=0&listsCount=0&enableFuzzyQuery=true&quotesQueryId=tss_match_phrase_query&lang=en-US&region=US"
            req = urllib.request.Request(url, headers={
                "User-Agent": UA,
                "Accept": "application/json, text/plain, */*",
                "Accept-Language": "en-US,en;q=0.9",
                "Origin": "https://finance.yahoo.com",
                "Referer": "https://finance.yahoo.com/",
            })
            resp = urllib.request.urlopen(req, timeout=5, context=SSL_CTX)
            data = json.loads(resp.read().decode("utf-8"))
            for q in data.get("quotes", []):
                qtype = q.get("quoteType", "")
                if qtype not in ("EQUITY", "ETF", "MUTUALFUND"):
                    continue
                yahoo_results.append({
                    "symbol": q.get("symbol", ""),
                    "name": q.get("shortname") or q.get("longname") or "",
                    "exchange": q.get("exchDisp") or q.get("exchange") or "",
                    "type": qtype,
                })
        except Exception as e:
            print(f"[AXIOM] Yahoo search failed: {e}")

        # Step 3: Merge — local results first, then unique Yahoo results
        seen = set()
        merged = []
        for r in local_results:
            if r["symbol"] not in seen:
                seen.add(r["symbol"])
                merged.append(r)
        for r in yahoo_results:
            if r["symbol"] not in seen:
                seen.add(r["symbol"])
                # Yahoo has better exchange info — add it
                merged.append(r)
        # Also backfill exchange info from Yahoo into local results
        yahoo_map = {r["symbol"]: r for r in yahoo_results}
        for r in merged:
            if not r["exchange"] and r["symbol"] in yahoo_map:
                r["exchange"] = yahoo_map[r["symbol"]].get("exchange", "")
                r["type"] = yahoo_map[r["symbol"]].get("type", r["type"])
            # Capitalize company names nicely if they're ALL CAPS from SEC
            if r["name"] and r["name"] == r["name"].upper() and len(r["name"]) > 3:
                r["name"] = r["name"].title()

        # Cap at 12 results
        merged = merged[:12]
        print(f"[AXIOM] Search '{query}' -> {len(merged)} results (local={len(local_results)}, yahoo={len(yahoo_results)})")
        self.send_json(merged)

    def handle_ethics_rating(self):
        """POST /yf/ethics-rating — AI-generated ethics/sin rating for a company."""
        try:
            length = int(self.headers.get("Content-Length", 0))
            body = json.loads(self.rfile.read(length).decode("utf-8"))
        except Exception:
            self.send_error_json("Invalid JSON body", 400)
            return

        api_key = body.get("key", "")
        symbol = body.get("symbol", "").upper().strip()
        company_name = body.get("company_name", symbol)
        model = body.get("model", "claude-sonnet-4-20250514")

        if not api_key:
            self.send_error_json("Missing Anthropic API key", 400)
            return
        if not symbol:
            self.send_error_json("Missing symbol", 400)
            return

        prompt = (
            f"You are an ESG and corporate ethics analyst. Rate the company {company_name} (ticker: {symbol}) "
            "on ethical conduct across ALL of the following categories. Use a 0-100 scale where:\n"
            "  0-20 = Deeply unethical / major harm\n"
            "  21-40 = Significant concerns\n"
            "  41-60 = Mixed record / average\n"
            "  61-80 = Generally ethical / above average\n"
            "  81-100 = Exemplary / industry leader\n\n"
            "CATEGORIES TO RATE:\n"
            "1. environmental - Carbon emissions, pollution, sustainability, resource usage\n"
            "2. labor - Worker treatment, wages, safety, supply chain labor practices\n"
            "3. governance - Board independence, executive pay, transparency, accounting\n"
            "4. controversies - Lawsuits, scandals, fines, regulatory actions\n"
            "5. product_impact - Does the product help or harm society? (tobacco, weapons = low; healthcare, education = high)\n"
            "6. lobbying - Political spending, lobbying against public interest, regulatory capture\n"
            "7. diversity - Workforce diversity, inclusion, pay equity\n"
            "8. legal - Compliance record, antitrust, fraud, corruption\n\n"
            "For EACH category, provide:\n"
            "- A numeric score (0-100)\n"
            "- A short 1-sentence reason explaining the score\n"
            "- 1-2 verifiable source references with real URLs to news articles, SEC filings, "
            "EPA records, court cases, sustainability reports, or official company pages that "
            "support the rating. Use REAL URLs from major outlets (Reuters, Bloomberg, AP, "
            "WSJ, NYT, SEC.gov, EPA.gov, company investor relations pages, ESG rating sites). "
            "Do NOT fabricate URLs — only include links you are confident exist.\n\n"
            "Also provide:\n"
            "- overall_score: weighted average (product_impact and controversies weighted 1.5x)\n"
            "- sin_industry: true/false if in tobacco, alcohol, gambling, weapons, fossil fuels, private prisons, predatory lending\n"
            "- summary: 2-3 sentence plain-English summary of the company's ethical standing\n"
            "- top_concern: single biggest ethical issue\n"
            "- top_positive: single biggest ethical strength\n\n"
            "Respond with ONLY valid JSON, no markdown or explanation. Use this exact structure:\n"
            '{"overall_score": 65, "sin_industry": false, '
            '"environmental": {"score": 70, "reason": "Strong renewable energy commitments but high water usage in manufacturing", '
            '"sources": [{"title": "Company 2024 Sustainability Report", "url": "https://..."}, '
            '{"title": "EPA Compliance Record", "url": "https://..."}]}, '
            '"labor": {"score": 55, "reason": "...", "sources": [{"title": "...", "url": "..."}]}, '
            '"governance": {"score": 72, "reason": "...", "sources": [{"title": "...", "url": "..."}]}, '
            '"controversies": {"score": 50, "reason": "...", "sources": [{"title": "...", "url": "..."}]}, '
            '"product_impact": {"score": 80, "reason": "...", "sources": [{"title": "...", "url": "..."}]}, '
            '"lobbying": {"score": 45, "reason": "...", "sources": [{"title": "...", "url": "..."}]}, '
            '"diversity": {"score": 60, "reason": "...", "sources": [{"title": "...", "url": "..."}]}, '
            '"legal": {"score": 68, "reason": "...", "sources": [{"title": "...", "url": "..."}]}, '
            '"summary": "...", "top_concern": "...", "top_positive": "..."}'
        )

        try:
            req_body = json.dumps({
                "model": model,
                "max_tokens": 4000,
                "messages": [{"role": "user", "content": prompt}]
            }).encode("utf-8")

            req = urllib.request.Request(
                "https://api.anthropic.com/v1/messages",
                data=req_body,
                headers={
                    "Content-Type": "application/json",
                    "x-api-key": api_key,
                    "anthropic-version": "2023-06-01",
                },
            )
            resp = urllib.request.urlopen(req, timeout=90, context=SSL_CTX)
            data = json.loads(resp.read().decode("utf-8"))

            text = ""
            if "content" in data and len(data["content"]) > 0:
                text = data["content"][0].get("text", "")

            text = text.replace("```json", "").replace("```", "").strip()
            obj_start = text.find("{")
            obj_end = text.rfind("}")
            if obj_start >= 0 and obj_end > obj_start:
                text = text[obj_start:obj_end + 1]

            rating = json.loads(text)
            rating["symbol"] = symbol
            self.send_json(rating)

        except urllib.error.HTTPError as e:
            err_body = e.read().decode("utf-8", errors="ignore")[:200]
            print(f"[AXIOM] Ethics rating API error {e.code}: {err_body}")
            self.send_error_json(f"Anthropic API error {e.code}: {err_body}", e.code)
        except json.JSONDecodeError:
            print(f"[AXIOM] Ethics rating JSON parse failed: {text[:200]}")
            self.send_error_json("Could not parse AI response as JSON", 500)
        except Exception as e:
            print(f"[AXIOM] Ethics rating error: {e}")
            self.send_error_json(str(e), 500)

    def handle_ai_research(self):
        """POST /yf/ai-research — Synthesize web-scraped news + fundamentals into
        a plain-English research brief explaining what's moving a stock."""
        try:
            length = int(self.headers.get("Content-Length", 0))
            body = json.loads(self.rfile.read(length).decode("utf-8"))
        except Exception:
            self.send_error_json("Invalid JSON body", 400)
            return

        api_key = body.get("key", "")
        symbol = (body.get("symbol", "") or "").upper().strip()
        company_name = body.get("company_name", symbol)
        headlines = body.get("headlines", [])
        fundamentals = body.get("fundamentals", {}) or {}
        price_ctx = body.get("price", {}) or {}
        model = body.get("model", "claude-sonnet-4-20250514")

        if not api_key:
            self.send_error_json("Missing Anthropic API key", 400)
            return
        if not symbol:
            self.send_error_json("Missing symbol", 400)
            return

        # Build a compact headline digest for the prompt (cap to keep it fast)
        hl_lines = []
        for i, h in enumerate(headlines[:40]):
            src = h.get("source", "")
            stype = h.get("source_type", "news")
            hl = (h.get("headline", "") or "")[:180]
            sent = h.get("sentiment_label", "")
            hl_lines.append(f"- [{stype}/{src}] {hl}" + (f" (sentiment: {sent})" if sent else ""))
        hl_block = "\n".join(hl_lines) if hl_lines else "(no recent headlines found)"

        f = fundamentals
        fund_block = (
            f"Sector: {f.get('sector')}; Industry: {f.get('industry')}; "
            f"MarketCap: {f.get('marketCap')}; P/E: {f.get('pe')}; "
            f"RevGrowth: {f.get('revenueGrowth')}; ProfitMargin: {f.get('profitMargin')}; "
            f"52wHigh: {f.get('fiftyTwoWeekHigh')}; 52wLow: {f.get('fiftyTwoWeekLow')}; "
            f"InstHold%: {f.get('institutionalHolding')}; InsiderHold%: {f.get('insiderHolding')}; "
            f"AnalystTarget: {f.get('targetMeanPrice')}; Rec: {f.get('recommendationKey')}"
        )
        price_block = (
            f"Current: {price_ctx.get('current')}; PrevClose: {price_ctx.get('prevClose')}; "
            f"ChangePct: {price_ctx.get('changePct')}"
        )

        prompt = (
            f"You are an elite equity research analyst. Using ONLY the data provided below, write a "
            f"sharp, current research brief on {company_name} ({symbol}) for an active trader who wants "
            f"to understand what is moving the stock and learn the company fast.\n\n"
            f"PRICE CONTEXT: {price_block}\n\n"
            f"FUNDAMENTALS: {fund_block}\n\n"
            f"RECENT WEB-SCRAPED HEADLINES:\n{hl_block}\n\n"
            "Synthesize across the headlines (do not just list them). Be concrete and cite the relevant "
            "headline themes. If data is missing, say so rather than inventing it.\n\n"
            "Respond with ONLY valid JSON, no markdown, in this exact structure:\n"
            '{"whats_moving": "2-4 sentences: the dominant driver(s) of the stock right now", '
            '"why": "2-4 sentences: the mechanism/context behind the move", '
            '"background": "3-5 sentences: what the company does and its current narrative", '
            '"key_points": ["concise bullet", "..."], '
            '"catalysts_ahead": ["upcoming events/catalysts to watch", "..."], '
            '"bull_case": "2-3 sentences", '
            '"bear_case": "2-3 sentences", '
            '"insider_read": "1-2 sentences interpreting insider/institutional ownership signals", '
            '"sentiment": "BULLISH|BEARISH|NEUTRAL|MIXED", '
            '"summary": "one punchy sentence takeaway"}'
        )

        try:
            req_body = json.dumps({
                "model": model,
                "max_tokens": 2500,
                "messages": [{"role": "user", "content": prompt}]
            }).encode("utf-8")

            req = urllib.request.Request(
                "https://api.anthropic.com/v1/messages",
                data=req_body,
                headers={
                    "Content-Type": "application/json",
                    "x-api-key": api_key,
                    "anthropic-version": "2023-06-01",
                },
            )
            resp = urllib.request.urlopen(req, timeout=90, context=SSL_CTX)
            data = json.loads(resp.read().decode("utf-8"))

            text = ""
            if "content" in data and len(data["content"]) > 0:
                text = data["content"][0].get("text", "")

            text = text.replace("```json", "").replace("```", "").strip()
            obj_start = text.find("{")
            obj_end = text.rfind("}")
            if obj_start >= 0 and obj_end > obj_start:
                text = text[obj_start:obj_end + 1]

            brief = json.loads(text)
            brief["symbol"] = symbol
            self.send_json(brief)

        except urllib.error.HTTPError as e:
            err_body = e.read().decode("utf-8", errors="ignore")[:200]
            print(f"[AXIOM] AI research API error {e.code}: {err_body}")
            self.send_error_json(f"Anthropic API error {e.code}: {err_body}", e.code)
        except json.JSONDecodeError:
            print(f"[AXIOM] AI research JSON parse failed")
            self.send_error_json("Could not parse AI response as JSON", 500)
        except Exception as e:
            print(f"[AXIOM] AI research error: {e}")
            self.send_error_json(str(e), 500)

    def log_message(self, format, *args):
        """Custom log format — quieter for static files, verbose for API."""
        path = args[0] if args else ""
        if "/yf/" in str(path):
            print(f"[AXIOM API] {self.address_string()} - {format % args}")
        # Suppress static file logs to reduce noise


# ═══════════════════════════════════════════════════════════════
# SERVER STARTUP
# ═══════════════════════════════════════════════════════════════

SERVER_START = time.time()


def main():
    # Determine the directory to serve static files from
    serve_dir = os.getcwd()
    os.chdir(serve_dir)

    total_sources = len(CRAWLER_RSS_SOURCES) + 2
    mode = "REMOTE (no crawler)" if NO_CRAWLER else "LOCAL (full)"
    print("=" * 50)
    print(f"  AXIOM v3.0 | http://localhost:{PORT}/axiom-alpaca.html")
    print(f"  Mode: {mode}")
    if not NO_CRAWLER:
        print(f"  Crawler: {total_sources} sources every {CRAWLER_INTERVAL}s")
    print("=" * 50)

    # Start background crawler thread (skip on Oracle to save bandwidth)
    if not NO_CRAWLER:
        crawler_t = threading.Thread(target=crawler_thread_main, daemon=True)
        crawler_t.start()
        print("[AXIOM] Background crawler thread started.")
    else:
        print("[AXIOM] Crawler DISABLED (--no-crawler mode)")

    server = http.server.ThreadingHTTPServer((BIND, PORT), AXIOMHandler)

    try:
        server.serve_forever()
    except KeyboardInterrupt:
        print("\n[AXIOM] Server stopped.")
        server.server_close()


if __name__ == "__main__":
    main()
