#!/usr/bin/env python3
import argparse
import json
import math
import re
import sys
from collections import Counter, defaultdict
from typing import Dict, Any, List

try:
    from transformers import pipeline
except Exception as e:
    sys.stderr.write(f"Failed to import transformers: {e}\n")
    sys.exit(1)

# Initialize sentiment pipeline lazily
_sentiment_pipe = None

def get_sentiment_pipe():
    global _sentiment_pipe
    if _sentiment_pipe is None:
        _sentiment_pipe = pipeline(
            "sentiment-analysis",
            model="distilbert-base-uncased-finetuned-sst-2-english",
            device=-1,
        )
    return _sentiment_pipe

STOPWORDS = set(
    """
    a an and are as at be but by for if in into is it no not of on or such that the their then there these they this to was will with from you your we our can could should would may might about after again against all also am among around because been before being between both came come did do does doing down during each few further had has have having here how i just more most much near now off once only other over own same some than too under until up very via what when where which while who whom why won yet across along already else ever get got like many next past per rather than though through via without within upon onto out outside inside across toward towards
    subject body summary notes description email call meeting task opportunity proposal pricing budget quote timeline demo trial contract procurement legal issue concern problem delay blocker churn cancel competitor interest decision stakeholder implementation
    """.split()
)

KEYWORD_SIGNALS_POS = {
    "budget": 10, "timeline": 8, "decision": 8, "stakeholder": 6, "proposal": 10,
    "pilot": 8, "expand": 8, "upgrade": 8, "pricing": 8, "quote": 8, "next": 5,
}
KEYWORD_SIGNALS_NEG = {
    "delay": -10, "issue": -10, "problem": -10, "blocker": -12, "churn": -20,
    "cancel": -18, "concern": -8, "risk": -8, "competitor": -10, "stuck": -8,
}

NBA_RULES = [
    (lambda t, txt, s: any(k in txt for k in ["issue","concern","problem","delay","blocker"]) or s < -0.2,
     "Schedule follow-up to address concerns and propose corrective actions."),
    (lambda t, txt, s: any(k in txt for k in ["pricing","quote","budget"]),
     "Send detailed proposal/quote and confirm budget alignment."),
    (lambda t, txt, s: any(k in txt for k in ["demo","trial","proof of concept","poc"]),
     "Offer a tailored demo or trial with success criteria."),
    (lambda t, txt, s: any(k in txt for k in ["contract","procurement","legal"]),
     "Engage legal/procurement to accelerate contracting."),
    (lambda t, txt, s: t == "email" and s >= 0.2,
     "Send recap email with clear next steps and timeline."),
    (lambda t, txt, s: t in ("call","meeting") and s >= 0.2,
     "Propose next meeting to align stakeholders on implementation plan."),
]


def normalize_text(item: Dict[str, Any]) -> str:
    fields = [item.get("subject",""), item.get("body",""), item.get("summary",""), item.get("notes",""), item.get("description","")]
    txt = "\n".join([str(x) for x in fields if x]).strip()
    return re.sub(r"\s+", " ", txt)


def analyze_sentiment(text: str) -> Dict[str, Any]:
    if not text:
        return {"label": "NEUTRAL", "probability": 0.5, "score": 0.0}
    pipe = get_sentiment_pipe()
    out = pipe(text[:4500])[0]  # truncate long text for efficiency
    label = out.get("label", "NEUTRAL").upper()
    prob = float(out.get("score", 0.5))
    score = prob if label.startswith("POS") else -prob
    # small band for neutrality
    if abs(score) < 0.2:
        label = "NEUTRAL"
    return {"label": label, "probability": round(prob, 4), "score": round(score, 4)}


def extract_keywords(text: str, top_k: int = 6) -> List[str]:
    if not text:
        return []
    words = re.findall(r"[a-zA-Z][a-zA-Z\-']{2,}", text.lower())
    words = [w for w in words if w not in STOPWORDS and len(w) > 3]
    counts = Counter(words)
    return [w for w, _ in counts.most_common(top_k)]


def opportunity_score(text: str, sentiment: Dict[str, Any]) -> int:
    score = 50.0
    s = float(sentiment.get("score", 0.0))
    if s > 0:
        score += 15.0 * min(1.0, s)
    elif s < 0:
        score += -20.0 * min(1.0, abs(s))
    txt = text.lower()
    for k, v in KEYWORD_SIGNALS_POS.items():
        if k in txt:
            score += v
    for k, v in KEYWORD_SIGNALS_NEG.items():
        if k in txt:
            score += v
    return int(max(0, min(100, round(score))))


def next_best_action(itype: str, text: str, sentiment: Dict[str, Any]) -> str:
    t = (itype or '').lower()
    txt = text.lower()
    s = float(sentiment.get("score", 0.0))
    for cond, rec in NBA_RULES:
        try:
            if cond(t, txt, s):
                return rec
        except Exception:
            continue
    return "Send recap and propose next step with clear owner and date."


def aggregate(results: List[Dict[str, Any]]) -> Dict[str, Any]:
    overall = {
        'count': len(results),
        'positive': 0,
        'neutral': 0,
        'negative': 0,
        'avg_sentiment': 0.0,
        'avg_opportunity': 0.0,
        'top_keywords': []
    }
    by_client: Dict[str, Dict[str, Any]] = defaultdict(lambda: {'count': 0, 'avg_sentiment': 0.0, 'avg_opportunity': 0.0})

    kw_counter = Counter()
    if not results:
        return {'overall': overall, 'by_client': {}}

    sent_sum = 0.0
    opp_sum = 0.0
    for r in results:
        s = float(r.get('sentiment',{}).get('score', 0.0))
        if s > 0.2:
            overall['positive'] += 1
        elif s < -0.2:
            overall['negative'] += 1
        else:
            overall['neutral'] += 1
        sent_sum += s
        opp = int(r.get('opportunity_score', 0))
        opp_sum += opp
        for kw in r.get('keywords', []):
            kw_counter[kw] += 1
        cid = str(r.get('client_id') or '')
        if cid:
            bc = by_client[cid]
            bc['count'] += 1
            bc['avg_sentiment'] += s
            bc['avg_opportunity'] += opp

    overall['avg_sentiment'] = round(sent_sum / len(results), 4)
    overall['avg_opportunity'] = round(opp_sum / len(results), 2)
    overall['top_keywords'] = [w for w, _ in kw_counter.most_common(10)]

    for cid, bc in list(by_client.items()):
        if bc['count'] > 0:
            bc['avg_sentiment'] = round(bc['avg_sentiment'] / bc['count'], 4)
            bc['avg_opportunity'] = round(bc['avg_opportunity'] / bc['count'], 2)

    return {'overall': overall, 'by_client': dict(by_client)}


def main():
    ap = argparse.ArgumentParser()
    ap.add_argument('--stdin', action='store_true', help='Read input JSON from STDIN')
    ap.add_argument('--input', type=str, help='Path to input JSON')
    args = ap.parse_args()

    raw = ''
    if args.stdin:
        raw = sys.stdin.read()
    elif args.input:
        with open(args.input, 'r', encoding='utf-8') as f:
            raw = f.read()
    else:
        sys.stderr.write('No input provided. Use --stdin or --input path.\n')
        sys.exit(2)

    try:
        data = json.loads(raw)
    except Exception as e:
        sys.stderr.write(f'Invalid JSON: {e}\n')
        sys.exit(3)

    interactions = data.get('interactions') or []
    results = []

    for item in interactions:
        if not isinstance(item, dict):
            continue
        itype = (item.get('type') or '').lower()
        text = normalize_text(item)
        sent = analyze_sentiment(text)
        opp = opportunity_score(text, sent)
        nba = next_best_action(itype, text, sent)
        keywords = extract_keywords(text)
        results.append({
            'id': item.get('id') or '',
            'type': itype,
            'client_id': item.get('client_id') or '',
            'sentiment': sent,
            'next_best_action': nba,
            'opportunity_score': opp,
            'keywords': keywords,
            'text_preview': (text[:240] + ('…' if len(text) > 240 else ''))
        })

    agg = aggregate(results)

    out = {'results': results, 'aggregates': agg}
    print(json.dumps(out, ensure_ascii=False))

if __name__ == '__main__':
    main()
