agent-zero/python/surveys/db.py

from __future__ import annotations

import json
import os
import sqlite3
import time
from dataclasses import asdict
from typing import Any, Iterable

from python.helpers import memory as memory_helper
from agent import Agent

from .schemas import Persona, UserProfile, SurveyField


def _now_ts() -> int:
    return int(time.time())


class SurveyDB:
    """Structured persistence for personas, profiles, and survey answers."""

    def __init__(self, path: str):
        self.path = path
        os.makedirs(os.path.dirname(self.path) or ".", exist_ok=True)
        self._conn = sqlite3.connect(self.path, check_same_thread=False)
        self._conn.execute("PRAGMA journal_mode=WAL;")
        self._conn.execute("PRAGMA foreign_keys=ON;")
        self._init_schema()

    @staticmethod
    def for_agent(agent: Agent) -> "SurveyDB":
        base = memory_helper.get_memory_subdir_abs(agent)
        path = os.path.join(base, "survey_profiles.db")
        return SurveyDB(path)

    def close(self) -> None:
        try:
            self._conn.close()
        except Exception:
            pass

    def _init_schema(self) -> None:
        cur = self._conn.cursor()
        cur.executescript(
            """
            CREATE TABLE IF NOT EXISTS personas (
              id TEXT PRIMARY KEY,
              name TEXT NOT NULL,
              description TEXT NOT NULL,
              constraints_json TEXT NOT NULL,
              created_at INTEGER NOT NULL
            );

            CREATE TABLE IF NOT EXISTS profiles (
              id TEXT PRIMARY KEY,
              persona_id TEXT NULL REFERENCES personas(id) ON DELETE SET NULL,
              data_json TEXT NOT NULL,
              updated_at INTEGER NOT NULL
            );

            CREATE TABLE IF NOT EXISTS survey_sessions (
              id TEXT PRIMARY KEY,
              url TEXT NOT NULL,
              persona_id TEXT NULL REFERENCES personas(id) ON DELETE SET NULL,
              profile_id TEXT NULL REFERENCES profiles(id) ON DELETE SET NULL,
              started_at INTEGER NOT NULL,
              completed_at INTEGER NULL,
              status TEXT NOT NULL,
              notes TEXT NULL
            );

            CREATE TABLE IF NOT EXISTS survey_answers (
              id TEXT PRIMARY KEY,
              session_id TEXT NOT NULL REFERENCES survey_sessions(id) ON DELETE CASCADE,
              question_text TEXT NULL,
              field_kind TEXT NOT NULL,
              selector TEXT NULL,
              answer_text TEXT NOT NULL,
              field_json TEXT NOT NULL,
              raw_json TEXT NULL,
              processed INTEGER NOT NULL DEFAULT 0,
              created_at INTEGER NOT NULL
            );

            CREATE INDEX IF NOT EXISTS idx_survey_answers_processed
              ON survey_answers(processed, created_at);
            """
        )
        self._conn.commit()

    # ---- Persona/profile API -------------------------------------------------

    def upsert_persona(self, persona: Persona) -> None:
        self._conn.execute(
            """
            INSERT INTO personas(id, name, description, constraints_json, created_at)
            VALUES (?, ?, ?, ?, ?)
            ON CONFLICT(id) DO UPDATE SET
              name=excluded.name,
              description=excluded.description,
              constraints_json=excluded.constraints_json;
            """,
            (
                persona.id,
                persona.name,
                persona.description,
                json.dumps(persona.constraints or {}, ensure_ascii=False),
                _now_ts(),
            ),
        )
        self._conn.commit()

    def get_persona(self, persona_id: str) -> Persona | None:
        row = self._conn.execute(
            "SELECT id, name, description, constraints_json FROM personas WHERE id=?",
            (persona_id,),
        ).fetchone()
        if not row:
            return None
        return Persona(
            id=row[0],
            name=row[1],
            description=row[2],
            constraints=json.loads(row[3] or "{}"),
        )

    def upsert_profile(self, profile: UserProfile) -> None:
        self._conn.execute(
            """
            INSERT INTO profiles(id, persona_id, data_json, updated_at)
            VALUES (?, ?, ?, ?)
            ON CONFLICT(id) DO UPDATE SET
              persona_id=excluded.persona_id,
              data_json=excluded.data_json,
              updated_at=excluded.updated_at;
            """,
            (
                profile.id,
                profile.persona_id,
                json.dumps(profile.data or {}, ensure_ascii=False),
                _now_ts(),
            ),
        )
        self._conn.commit()

    def get_profile(self, profile_id: str) -> UserProfile | None:
        row = self._conn.execute(
            "SELECT id, persona_id, data_json FROM profiles WHERE id=?", (profile_id,)
        ).fetchone()
        if not row:
            return None
        return UserProfile(
            id=row[0],
            persona_id=row[1],
            data=json.loads(row[2] or "{}"),
        )

    # ---- Survey session/answers API -----------------------------------------

    def create_session(
        self,
        session_id: str,
        url: str,
        persona_id: str | None,
        profile_id: str | None,
        status: str = "running",
        notes: str | None = None,
    ) -> None:
        self._conn.execute(
            """
            INSERT INTO survey_sessions(id, url, persona_id, profile_id, started_at, status, notes)
            VALUES (?, ?, ?, ?, ?, ?, ?)
            """,
            (session_id, url, persona_id, profile_id, _now_ts(), status, notes),
        )
        self._conn.commit()

    def complete_session(self, session_id: str, status: str = "completed") -> None:
        self._conn.execute(
            """
            UPDATE survey_sessions
            SET completed_at=?, status=?
            WHERE id=?
            """,
            (_now_ts(), status, session_id),
        )
        self._conn.commit()

    def insert_answer(
        self,
        answer_id: str,
        session_id: str,
        question_text: str | None,
        field: SurveyField,
        answer_text: str,
        raw: dict[str, Any] | None = None,
    ) -> None:
        self._conn.execute(
            """
            INSERT INTO survey_answers(
              id, session_id, question_text, field_kind, selector, answer_text,
              field_json, raw_json, processed, created_at
            )
            VALUES (?, ?, ?, ?, ?, ?, ?, ?, 0, ?)
            """,
            (
                answer_id,
                session_id,
                question_text,
                field.kind.value,
                field.selector,
                answer_text,
                json.dumps(asdict(field), ensure_ascii=False),
                json.dumps(raw, ensure_ascii=False) if raw else None,
                _now_ts(),
            ),
        )
        self._conn.commit()

    def fetch_unprocessed_answers(
        self, limit: int = 200
    ) -> list[dict[str, Any]]:
        rows = self._conn.execute(
            """
            SELECT id, session_id, question_text, field_kind, selector, answer_text, field_json, raw_json, created_at
            FROM survey_answers
            WHERE processed=0
            ORDER BY created_at ASC, id ASC
            LIMIT ?
            """,
            (limit,),
        ).fetchall()
        results: list[dict[str, Any]] = []
        for row in rows:
            results.append(
                {
                    "id": row[0],
                    "session_id": row[1],
                    "question_text": row[2],
                    "field_kind": row[3],
                    "selector": row[4],
                    "answer_text": row[5],
                    "field": json.loads(row[6] or "{}"),
                    "raw": json.loads(row[7]) if row[7] else None,
                    "created_at": row[8],
                }
            )
        return results

    def fetch_unprocessed_answer_events(self, limit: int = 200) -> list[dict[str, Any]]:
        """Fetch unprocessed answers with session context (url/profile/persona)."""
        rows = self._conn.execute(
            """
            SELECT
              a.id, a.session_id, a.question_text, a.field_kind, a.selector, a.answer_text,
              a.field_json, a.raw_json, a.created_at,
              s.url, s.persona_id, s.profile_id
            FROM survey_answers a
            JOIN survey_sessions s ON s.id = a.session_id
            WHERE a.processed=0
            ORDER BY a.created_at ASC, a.id ASC
            LIMIT ?
            """,
            (limit,),
        ).fetchall()
        results: list[dict[str, Any]] = []
        for row in rows:
            results.append(
                {
                    "id": row[0],
                    "session_id": row[1],
                    "question_text": row[2],
                    "field_kind": row[3],
                    "selector": row[4],
                    "answer_text": row[5],
                    "field": json.loads(row[6] or "{}"),
                    "raw": json.loads(row[7]) if row[7] else None,
                    "created_at": row[8],
                    "url": row[9],
                    "persona_id": row[10],
                    "profile_id": row[11],
                }
            )
        return results

    def mark_answers_processed(self, answer_ids: Iterable[str]) -> None:
        ids = list(answer_ids)
        if not ids:
            return
        cur = self._conn.cursor()
        cur.executemany(
            "UPDATE survey_answers SET processed=1 WHERE id=?",
            [(i,) for i in ids],
        )
        self._conn.commit()