#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""The autonomous Mapper — a model-in-the-loop that maps an app by itself.

No human, no Claude Code. You launch it; it drives the tool layer (WinTools)
with a BRAIN model running through the Gateway, explores the target application,
and writes `software-map.json` + a human-readable `REVIEW.md` — the first review
of the system.

The loop, each step:
  1. show the BRAIN the current screenshot + a compact running state,
  2. the BRAIN replies with ONE JSON action (a tool call),
  3. we execute it via WinTools (safety gate enforced in the tools, not here),
  4. we feed the result back, update the running state, and repeat
until the BRAIN calls `finish` (after `emit_verdict`), or we hit the step/time
cap, or it gets stuck. Every model call — brain and vision — is via the Gateway,
so OpenRouter<->local is a one-line config switch.

The BRAIN is a vision model: it SEES each screenshot and decides where to click /
what to read, so grounding (where things are) is the model's job and reading
(what they say) is the vision role's job — both through the Gateway.
"""

from __future__ import annotations

import json
import os
import re
import time

from gateway import Gateway, image_part, text_part
from tools import WinTools

HERE = os.path.dirname(os.path.abspath(__file__))
SYSTEM_PROMPT_PATH = os.path.join(HERE, "MAPPER_SYSTEM.md")

# tools the brain may call -> (WinTools method, arg keys)
TOOLSPEC = {
    "screenshot":          ("screenshot", []),
    "probe_accessibility": ("probe_accessibility", []),
    "probe_windows":       ("probe_windows", []),
    "probe_keyboard":      ("probe_keyboard", ["keys"]),
    "read_region":         ("read_region", ["x", "y", "w", "h", "question"]),
    "locate":              ("locate", ["goal"]),
    "click":               ("click", ["x", "y", "label"]),
    "type_text":           ("type_text", ["text"]),
    "press_key":           ("press_key", ["key"]),
    "scroll":              ("scroll", ["notches", "x", "y"]),
    "save_map_entry":      ("save_map_entry", ["obj"]),
    "save_template":       ("save_template", ["name", "region"]),
    "emit_verdict":        ("emit_verdict", ["obj"]),
}
TERMINAL = {"finish"}


def _short(v, n=240):
    s = v if isinstance(v, str) else json.dumps(v, ensure_ascii=False)
    return s if len(s) <= n else s[:n] + "…"


class MapperAgent:
    def __init__(self, client, window_title=None, exe_path=None,
                 target_software=None, login=None,
                 provider=None, max_steps=140, max_minutes=40, config=None):
        self.gw = Gateway(provider_override=provider)
        self.tools = WinTools(client=client, gateway=self.gw, config=config,
                              target_software=target_software,
                              window_title=window_title, exe_path=exe_path)
        self.client = client
        self.max_steps = max_steps
        self.deadline = time.time() + max_minutes * 60
        with open(SYSTEM_PROMPT_PATH, "r", encoding="utf-8") as f:
            self.system_prompt = f.read()
        # If the app starts behind a login screen, the brain logs in FIRST.
        if login and login.get("username"):
            self.system_prompt += (
                "\n\n# FIRST STEP — LOG IN\n"
                "The application opens on a LOGIN screen. Before any mapping, log in:\n"
                "1. Find the **username** field (use `locate` or read the screen), "
                "`click` it to focus, then `type_text` the username: «%s».\n"
                "2. Find the **password** field, `click` it to focus, then "
                "`type_text` the password: «%s».\n"
                "3. Submit by `press_key` **Enter** (preferred — it is never blocked). "
                "If Enter does nothing, `click` the login button (כניסה / התחבר / "
                "Login) — those are allowed; only commit/destructive buttons are blocked.\n"
                "Logging in is NOT a destructive commit. Once the screen has clearly "
                "changed past the login form, begin the mapping methodology above. "
                "If you are already past the login screen, ignore this section.\n"
                % (login["username"], login.get("password", "")))
        # running state surfaced back to the brain each turn
        self.tech_verdict = None
        self.screens = []          # [{id, description}]
        self.popups = []           # [{id, policy}]
        self.recent = []           # last actions+result summaries
        self.escalations = []      # blocked/destructive clicks the brain attempted
        self.models_used = set()
        self.step = 0
        self.verdict_emitted = False
        self.log_lines = []

    # ----------------------------------------------------------------------
    def log(self, msg):
        line = "[%03d] %s" % (self.step, msg)
        self.log_lines.append(line)
        print(line, flush=True)

    def _state_text(self):
        st = {
            "client": self.client,
            "step": self.step,
            "steps_left": self.max_steps - self.step,
            "technology_verdict": self.tech_verdict,
            "screens_mapped": self.screens,
            "popups_mapped": self.popups,
            "escalations_so_far": self.escalations[-5:],
            "recent_actions": self.recent[-8:],
        }
        return ("STATE (what you have already established — do not redo it):\n"
                + json.dumps(st, ensure_ascii=False, indent=1)
                + "\n\nThe image below is the CURRENT screen. "
                + "Reply with exactly ONE JSON action object and nothing else.")

    def _ingest_map_entry(self, obj):
        if not isinstance(obj, dict):
            return
        tv = obj.get("technology_verdict")
        if isinstance(tv, dict):
            self.tech_verdict = tv
        for s in obj.get("screens", []) or []:
            if isinstance(s, dict) and s.get("id"):
                if not any(x["id"] == s["id"] for x in self.screens):
                    self.screens.append({"id": s["id"],
                                         "description": s.get("description", "")})
        for p in obj.get("popups", []) or []:
            if isinstance(p, dict) and p.get("id"):
                if not any(x["id"] == p["id"] for x in self.popups):
                    self.popups.append({"id": p["id"],
                                        "policy": p.get("recommended_policy", "")})

    # ----------------------------------------------------------------------
    def _ask_brain(self, screenshot_path):
        b64 = self.tools.png_b64(screenshot_path)
        messages = [
            {"role": "system", "content": self.system_prompt},
            {"role": "user", "content": [text_part(self._state_text()),
                                         image_part(b64)]},
        ]
        text, meta = self.gw.chat("brain", messages)
        self.models_used.add("brain:%s" % meta.get("model"))
        return text

    @staticmethod
    def _parse_action(text):
        # take the last {...} JSON object in the reply
        cands = re.findall(r'\{(?:[^{}]|\{[^{}]*\})*\}', text or "", re.S)
        for raw in reversed(cands):
            try:
                obj = json.loads(raw)
                if isinstance(obj, dict) and "tool" in obj:
                    return obj
            except Exception:  # noqa: BLE001
                continue
        return None

    def _dispatch(self, action):
        tool = action.get("tool")
        if tool in TERMINAL:
            return {"ok": True, "data": "finish", "terminal": True}, None
        if tool not in TOOLSPEC:
            return {"ok": False, "error": "unknown tool %r" % tool}, None
        method_name, keys = TOOLSPEC[tool]
        args = action.get("args", {}) if isinstance(action.get("args"), dict) else {}
        kwargs = {k: args[k] for k in keys if k in args}
        method = getattr(self.tools, method_name)
        tr = method(**kwargs)
        # side-effects on running state
        if tool == "save_map_entry":
            self._ingest_map_entry(args.get("obj"))
        if tool == "emit_verdict":
            self.verdict_emitted = True
            self._ingest_map_entry({"automation_verdict": args.get("obj")})
        if tool == "click" and not tr.get("ok") and (tr.get("data") or {}).get("escalate"):
            self.escalations.append({"x": args.get("x"), "y": args.get("y"),
                                     "seen": (tr.get("data") or {}).get("seen_text"),
                                     "reason": tr.get("error")})
        return tr, tool

    # ----------------------------------------------------------------------
    def run(self):
        self.log("opening target app …")
        op = self.tools.open_app()
        if not op.get("ok"):
            self.log("open_app FAILED: %s" % op.get("error"))
            self._write_review(aborted="open_app failed: %s" % op.get("error"))
            return False
        shot = op.get("screenshot")
        stuck = 0

        while self.step < self.max_steps and time.time() < self.deadline:
            self.step += 1
            try:
                reply = self._ask_brain(shot)
            except Exception as e:  # noqa: BLE001
                self.log("brain call failed: %s" % e)
                time.sleep(2)
                continue
            action = self._parse_action(reply)
            if not action:
                self.log("no valid JSON action parsed; nudging brain")
                self.recent.append({"action": "(unparseable)",
                                    "note": _short(reply, 120)})
                continue

            tool = action.get("tool")
            thought = action.get("thought", "")
            self.log("%s %s  | %s" % (tool, _short(action.get("args", {}), 120),
                                      _short(thought, 80)))
            tr, _ = self._dispatch(action)

            # terminal?
            if isinstance(tr, dict) and tr.get("terminal"):
                self.log("brain called finish.")
                break

            ok = tr.get("ok")
            changed = tr.get("changed")
            summary = {"action": tool, "args": _short(action.get("args", {}), 100),
                       "ok": ok, "changed": changed,
                       "result": _short((tr.get("data") if ok else tr.get("error")), 160)}
            self.recent.append(summary)

            # advance the current screenshot
            if tr.get("screenshot") and os.path.exists(tr["screenshot"]):
                shot = tr["screenshot"]

            # stuck detection (action that should change something, didn't)
            if tool in ("click", "press_key", "type_text", "scroll"):
                stuck = stuck + 1 if (ok and not changed) else 0
                if stuck >= 5:
                    self.recent.append({"note": "STUCK: 5 actuations with no screen "
                                        "change. Try a probe, scroll, or a different "
                                        "navigation path; if the app is fully mapped, "
                                        "emit_verdict then finish."})
                    self.log("stuck=5 — injected hint")
                    stuck = 0

        else:
            self.log("reached step/time cap")

        if not self.verdict_emitted:
            self.log("no verdict emitted by brain — synthesizing a partial one")
            self.tools.emit_verdict({
                "level": "incomplete",
                "rationale": "Mapping ended (cap/stop) before the brain emitted a "
                             "verdict. See screens/popups gathered so far.",
                "per_area": []})
        self._write_review()
        return True

    # ----------------------------------------------------------------------
    def _write_review(self, aborted=None):
        map_path = os.path.join(self.tools.client_dir, "software-map.json")
        smap = {}
        if os.path.exists(map_path):
            try:
                smap = json.loads(open(map_path, encoding="utf-8").read())
            except Exception:  # noqa: BLE001
                pass
        tv = smap.get("technology_verdict") or self.tech_verdict or {}
        av = smap.get("automation_verdict") or {}
        screens = smap.get("screens", [])
        popups = smap.get("popups", [])
        transitions = smap.get("transitions", [])

        L = []
        L.append("# REVIEW — מיפוי ראשוני של המערכת")
        L.append("> client: **%s**  ·  steps: %d  ·  provider/models: %s" %
                 (self.client, self.step, ", ".join(sorted(self.models_used)) or "—"))
        if aborted:
            L.append("\n> ⚠️ **המיפוי נעצר:** %s\n" % aborted)
        L.append("\n## 1. מה האפליקציה (verdict טכנולוגי)")
        L.append("- מסגרת UI: **%s**" % tv.get("ui_framework", "—"))
        L.append("- עץ נגישות (UIA): **%s**" % tv.get("accessibility_tree", "—"))
        L.append("- מניית חלונות: **%s**" % tv.get("window_enumeration", "—"))
        L.append("- ניווט מקלדת: **%s**" % tv.get("keyboard_nav", "—"))
        L.append("- נדרשת ראייה (vision): **%s**" % tv.get("vision_required", "—"))

        L.append("\n## 2. מסכים שמופו (%d)" % len(screens))
        for s in screens:
            if isinstance(s, dict) and s.get("id"):
                els = s.get("elements", []) or []
                grids = s.get("grids", []) or []
                L.append("- **%s** — %s  _(%d אלמנטים, %d גרידים; הגעה: %s)_" %
                         (s.get("id"), s.get("description", ""), len(els), len(grids),
                          s.get("reached_by", "—")))

        L.append("\n## 3. פופאפים שנלכדו (%d)" % len(popups))
        for p in popups:
            if isinstance(p, dict) and p.get("id"):
                L.append("- **%s** — מדיניות: `%s`  (%s)" %
                         (p.get("id"), p.get("recommended_policy", "—"),
                          p.get("title", "")))

        L.append("\n## 4. מעברים (%d)" % len(transitions))
        for t in transitions[:40]:
            if isinstance(t, dict):
                L.append("- `%s` --%s--> `%s`" %
                         (t.get("from", "?"), t.get("action", "?"), t.get("to", "?")))

        L.append("\n## 5. verdict אוטומציה")
        L.append("- רמה: **%s**" % av.get("level", "—"))
        L.append("- נימוק: %s" % av.get("rationale", "—"))
        for pa in av.get("per_area", []) or []:
            if isinstance(pa, dict):
                L.append("  - %s → %s" % (pa.get("area", "?"), pa.get("strategy", "?")))

        L.append("\n## 6. בטיחות — פעולות שנחסמו (escalations: %d)" % len(self.escalations))
        if self.escalations:
            L.append("המערכת חסמה בקוד לחיצות על פקדים הרסניים (commit/destructive). "
                     "הן לא בוצעו — דורשות החלטת אדם:")
            for e in self.escalations:
                L.append("- (%s,%s) טקסט שזוהה: %r — %s" %
                         (e.get("x"), e.get("y"), _short(e.get("seen"), 40),
                          _short(e.get("reason"), 100)))
        else:
            L.append("לא נחסמו לחיצות הרסניות במהלך המיפוי.")

        L.append("\n## 7. הסתייגויות / מה לא הושלם")
        if not self.verdict_emitted:
            L.append("- ה-brain לא הספיק verdict סופי לפני העצירה — המפה חלקית.")
        L.append("- דיוק קריאה/grounding על המסכים האמיתיים לא נמדד אוטומטית כאן — "
                 "מומלץ לדגום ידנית 10–20 קריאות מול האמת לפני בניית זרימות.")
        L.append("- כל טענה במפה מבוססת על קריאת-כלי בפועל; אין oracle.")

        L.append("\n---\n_REVIEW נוצר אוטומטית ע\"י Mapper. המפה המלאה: `software-map.json`. "
                 "צילומי הסשן: `session/`._")

        review_path = os.path.join(self.tools.client_dir, "REVIEW.md")
        with open(review_path, "w", encoding="utf-8") as f:
            f.write("\n".join(L) + "\n")
        # also dump the run log
        with open(os.path.join(self.tools.client_dir, "mapping_run.log"), "w",
                  encoding="utf-8") as f:
            f.write("\n".join(self.log_lines) + "\n")
        self.log("wrote REVIEW.md + software-map.json + mapping_run.log -> %s"
                 % self.tools.client_dir)
        return review_path