Add human-in-the-loop survey helper (GUI + CLI + Agent tool) (#181)

* Add human-in-the-loop survey helper Co-authored-by: nic <nicsins@users.noreply.github.com> * Make survey helper launcher robust and add CLI fallback Co-authored-by: nic <nicsins@users.noreply.github.com> * Detect missing display for tkinter GUI Co-authored-by: nic <nicsins@users.noreply.github.com> * Add prediction dataset + review workflow for uncertain survey answers Co-authored-by: nic <nicsins@users.noreply.github.com> --------- Co-authored-by: Cursor Agent <cursoragent@cursor.com> Co-authored-by: nic <nicsins@users.noreply.github.com>
2026-05-25 06:27:33 +00:00 · 2026-03-17 04:02:15 -05:00 · 2026-03-17 04:02:15 -05:00 · b7efe4992a
commit b7efe4992a
parent 37e696b104
9 changed files with 526 additions and 49 deletions
--- a/python/tools/survey_helper.py
+++ b/python/tools/survey_helper.py
@ -4,7 +4,15 @@ from python.helpers.tool import Tool, Response
 from python.survey_assistant.browser_render import render_url_async
 from python.survey_assistant.extract import extract_form_fields
 from python.survey_assistant.profile import SurveyProfile
-from python.survey_assistant.llm import ollama_available, suggest_answers_with_ollama
+from python.survey_assistant.llm import ollama_available, predict_answers_json_with_ollama
+from python.survey_assistant.predictions import (
+    DEFAULT_PREDICTIONS_PATH,
+    PredictionRecord,
+    Candidate,
+    append_predictions,
+    build_question_id,
+    utc_now_iso,
+)


 class SurveyHelper(Tool):
@ -20,6 +28,9 @@ class SurveyHelper(Tool):
        html: str = "",
        include_suggestions: bool = False,
        ollama_model: str = "llama3",
+        top_k: int = 3,
+        record_predictions: bool = False,
+        predictions_path: str = str(DEFAULT_PREDICTIONS_PATH),
        **kwargs,
    ) -> Response:
        if not url and not html:
@ -46,31 +57,68 @@ class SurveyHelper(Tool):

        if include_suggestions:
            profile = SurveyProfile.load()
-            questions_lines = []
-            for i, f in enumerate(fields, start=1):
-                label = f.label or f.name or f.id or "(unlabeled)"
-                t = f.input_type or f.kind
-                req = " (required)" if f.required else ""
-                questions_lines.append(f"{i}. {label} — {t}{req}")
-                if f.options:
-                    for opt in f.options[:30]:
-                        questions_lines.append(f"   - {opt.get('label')}")
-                    if len(f.options) > 30:
-                        questions_lines.append("   - ...")
-
            if ollama_available():
                try:
-                    payload["suggestions"] = suggest_answers_with_ollama(
+                    pred = predict_answers_json_with_ollama(
                        model=ollama_model,
-                        questions_text="\n".join(questions_lines),
+                        url=final_url,
+                        title=page_title,
+                        fields_json=json.dumps([f.to_dict() for f in fields], ensure_ascii=False),
                        profile_json=json.dumps(profile.as_dict(), indent=2, ensure_ascii=False),
+                        top_k=max(1, min(8, int(top_k or 3))),
                    )
+                    payload["predictions"] = pred.get("predictions", [])
+                    if pred.get("error"):
+                        payload["predictions_error"] = pred.get("error")
+                        payload["predictions_raw"] = pred.get("raw")
+
+                    if record_predictions and isinstance(payload.get("predictions"), list):
+                        records: list[PredictionRecord] = []
+                        for item in payload["predictions"]:
+                            try:
+                                idx = int(item.get("field_index"))
+                            except Exception:
+                                continue
+                            if idx < 1 or idx > len(fields):
+                                continue
+                            if not bool(item.get("needs_clarification")):
+                                continue
+                            field_dict = fields[idx - 1].to_dict()
+                            qid = build_question_id(url=final_url, field=field_dict)
+                            cand_objs: list[Candidate] = []
+                            for c in (item.get("candidates") or [])[: max(1, min(10, top_k))]:
+                                try:
+                                    cand_objs.append(
+                                        Candidate(
+                                            value=str(c.get("value", "")),
+                                            confidence=float(c.get("confidence", 0.0)),
+                                        )
+                                    )
+                                except Exception:
+                                    continue
+                            records.append(
+                                PredictionRecord(
+                                    id=qid,
+                                    timestamp=utc_now_iso(),
+                                    url=final_url,
+                                    title=page_title,
+                                    field_index=idx,
+                                    field=field_dict,
+                                    selected=str(item.get("selected", "")),
+                                    confidence=float(item.get("confidence", 0.0) or 0.0),
+                                    candidates=cand_objs,
+                                    rationale=str(item.get("rationale", "")),
+                                    needs_clarification=True,
+                                    source="llm",
+                                )
+                            )
+                        if records:
+                            p = append_predictions(records, path=predictions_path)
+                            payload["recorded_predictions_path"] = str(p)
                except Exception as exc:
-                    payload["suggestions_error"] = str(exc)
+                    payload["predictions_error"] = str(exc)
            else:
-                payload["suggestions_error"] = (
-                    "Ollama not available at http://localhost:11434"
-                )
+                payload["predictions_error"] = "Ollama not available at http://localhost:11434"

        return Response(message=json.dumps(payload, indent=2, ensure_ascii=False), break_loop=False)