#!/usr/bin/env python3
"""
CyberDudeBivash Threat Hunting Script (Utility Package)
Defensive • Alert-First • SOC-Ready JSONL output

- Monitors process starts by taking snapshots and detecting new PIDs
- Applies explainable heuristics to score risk
- Writes events to JSON Lines (JSONL) for SIEM ingestion

Usage:
  python cyberdudebivash_threat_hunt.py --config ../config/shield_policy.json --out ../logs/cyberdudebivash_threat_hunt.jsonl
"""

from __future__ import annotations

import argparse
import hashlib
import json
import os
import socket
import time
from dataclasses import dataclass, asdict
from typing import Dict, Any, List, Tuple, Optional

import psutil


@dataclass
class Event:
    timestamp: int
    host: str
    event: str
    process: str
    cmdline: str
    score: int
    reasons: List[str]
    policy_version: str


def utc_now() -> int:
    return int(time.time())


def hostname() -> str:
    return socket.gethostname()


def sha256_text(s: str) -> str:
    return hashlib.sha256(s.encode("utf-8", errors="ignore")).hexdigest()


def load_json(path: str) -> Dict[str, Any]:
    with open(path, "r", encoding="utf-8") as f:
        return json.load(f)


def load_allowlist(path: Optional[str]) -> List[str]:
    if not path:
        return []
    if not os.path.exists(path):
        return []
    out: List[str] = []
    with open(path, "r", encoding="utf-8") as f:
        for line in f:
            line = line.strip()
            if not line or line.startswith("#"):
                continue
            out.append(line.lower())
    return out


def ensure_parent_name(ppid: int) -> str:
    try:
        return psutil.Process(ppid).name() if ppid else ""
    except Exception:
        return ""


def is_allowlisted(allowlist: List[str], proc_name: str, exe_path: str, cmdline: str, parent_name: str) -> bool:
    n = (proc_name or "").lower()
    e = (exe_path or "").lower()
    c = (cmdline or "").lower()
    p = (parent_name or "").lower()
    for rule in allowlist:
        if rule in n or rule in e or rule in c or rule in p:
            return True
    return False


def score_process(p: Dict[str, Any], policy: Dict[str, Any]) -> Tuple[int, List[str]]:
    """
    Explainable scoring based on practical SOC heuristics.
    Tune weights and lists inside config/shield_policy.json.
    """
    weights = policy.get("weights", {})
    user_writable_hints = [x.lower() for x in policy.get("user_writable_path_hints", [])]
    suspicious_frags = [x.lower() for x in policy.get("suspicious_cmd_fragments", [])]
    office_parents = [x.lower() for x in policy.get("office_parents", [])]
    shell_children = [x.lower() for x in policy.get("shell_children", [])]

    exe = (p.get("exe") or "").lower()
    cmd = (p.get("cmdline") or "").lower()
    parent = (p.get("parent_name") or "").lower()
    name = (p.get("name") or "").lower()

    score = 0
    reasons: List[str] = []

    if any(h in exe for h in user_writable_hints):
        score += int(weights.get("exec_from_user_writable_path", 20))
        reasons.append("exec_from_user_writable_path")

    if "powershell" in name and (" -enc " in cmd or " -encodedcommand " in cmd):
        score += int(weights.get("encoded_powershell", 30))
        reasons.append("encoded_powershell")

    if parent in office_parents and name in shell_children:
        score += int(weights.get("office_spawn_shell", 40))
        reasons.append("office_spawn_shell")

    if any(frag in cmd for frag in suspicious_frags):
        # small additive signal; do not overcount
        add = int(weights.get("persistence_intent_strings", 15))
        score += min(add, 15)
        if "persistence_intent_strings" not in reasons:
            reasons.append("persistence_intent_strings")

    score = max(0, min(100, score))
    return score, reasons


def emit_jsonl(evt: Event, out_path: str) -> None:
    os.makedirs(os.path.dirname(out_path) or ".", exist_ok=True)
    with open(out_path, "a", encoding="utf-8") as f:
        f.write(json.dumps(asdict(evt), sort_keys=True) + "\n")


def main() -> int:
    ap = argparse.ArgumentParser(description="CyberDudeBivash Threat Hunting Script (Defensive)")
    ap.add_argument("--config", required=True, help="Path to shield policy JSON")
    ap.add_argument("--out", required=True, help="Output JSONL path")
    ap.add_argument("--allowlist", default="", help="Optional allowlist file")
    args = ap.parse_args()

    policy = load_json(args.config)
    allowlist = load_allowlist(args.allowlist)

    poll_seconds = int(policy.get("poll_seconds", 5))
    alert_threshold = int(policy.get("alert_threshold", 40))
    watch_threshold = int(policy.get("watch_threshold", 25))
    policy_version = str(policy.get("policy_version", "shield-policy-v1"))

    # pid -> fingerprint (exe|cmd|create_time) to dedupe per process lifetime
    seen: Dict[int, str] = {}

    while True:
        # snapshot processes
        current = []
        for p in psutil.process_iter(attrs=["pid", "ppid", "name", "exe", "cmdline", "create_time"]):
            try:
                info = p.info
                cmdline = " ".join(info.get("cmdline") or [])
                parent_name = ensure_parent_name(int(info.get("ppid") or 0))
                current.append({
                    "pid": int(info.get("pid") or 0),
                    "ppid": int(info.get("ppid") or 0),
                    "name": info.get("name") or "",
                    "exe": info.get("exe") or "",
                    "cmdline": cmdline,
                    "create_time": int(info.get("create_time") or 0),
                    "parent_name": parent_name
                })
            except Exception:
                continue

        # detect new pids
        current_pids = set()
        for proc in current:
            pid = proc["pid"]
            if pid <= 0:
                continue
            current_pids.add(pid)
            fp = sha256_text(f"{proc.get('exe')}|{proc.get('cmdline')}|{proc.get('create_time')}")
            if pid not in seen:
                seen[pid] = fp

                # allowlist check
                if is_allowlisted(allowlist, proc["name"], proc["exe"], proc["cmdline"], proc["parent_name"]):
                    continue

                score, reasons = score_process(proc, policy)
                if score >= watch_threshold:
                    action = "suspicious_process" if score >= alert_threshold else "watch_process"
                    evt = Event(
                        timestamp=utc_now(),
                        host=hostname(),
                        event=action,
                        process=proc["name"],
                        cmdline=proc["cmdline"],
                        score=score,
                        reasons=reasons,
                        policy_version=policy_version
                    )
                    emit_jsonl(evt, args.out)

        # cleanup old pids
        for pid in list(seen.keys()):
            if pid not in current_pids:
                seen.pop(pid, None)

        time.sleep(poll_seconds)

    return 0


if __name__ == "__main__":
    raise SystemExit(main())
