10 files changed, 536 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5b7a10c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+venv
+models
+\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..534001e
--- /dev/null
+++ b/README.md
@@ -0,0 +1,28 @@
+# cheat
+
+Local command-helper retrieval system using JSONL, SQLite, and sentence-transformers.
+
+## Setup
+
+```sh
+export HF_HOME="$PWD/models/hf"
+export SENTENCE_TRANSFORMERS_HOME="$PWD/models/hf"
+python -m venv venv
+source .venv/bin/activate
+pip install -U pip
+pip install -r requirements.txt
+python scripts/init_db.py
+python scripts/build_index.py
+```
+
+Then run a query like this:
+
+```sh
+python scripts/query_index.py "get free disk space"
+```
+
+To add commands, add to `./cards.jsonl` and rebuild the index:
+
+```sh
+python scripts/build_index.py
+```
diff --git a/build_index.py b/build_index.py
new file mode 100644
index 0000000..8597c68
--- /dev/null
+++ b/build_index.py
@@ -0,0 +1,192 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import json
+import sqlite3
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from sentence_transformers import SentenceTransformer
+
+import os
+from pathlib import Path
+
+DB_PATH = Path("cheat.db")
+CARDS_PATH = Path("./cards.jsonl")
+MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+LOCAL_CACHE_DIR = Path("models/hf")
+
+LOCAL_CACHE_DIR = Path("models/hf")
+os.environ.setdefault("HF_HOME", str(LOCAL_CACHE_DIR.resolve()))
+os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(LOCAL_CACHE_DIR.resolve()))
+
+REQUIRED_FIELDS = {
+    "id",
+    "intent",
+    "command",
+    "alternatives",
+    "explanation",
+    "requires",
+    "packages",
+    "tags",
+    "platform",
+    "shell",
+    "safety",
+}
+
+
+def load_cards(path: Path) -> list[dict[str, Any]]:
+    cards: list[dict[str, Any]] = []
+    with path.open("r", encoding="utf-8") as f:
+        for line_no, line in enumerate(f, start=1):
+            line = line.strip()
+            if not line:
+                continue
+            try:
+                card = json.loads(line)
+            except json.JSONDecodeError as e:
+                raise ValueError(f"Invalid JSON on line {line_no}: {e}") from e
+
+            missing = REQUIRED_FIELDS - set(card.keys())
+            if missing:
+                raise ValueError(f"Missing required fields on line {line_no}: {sorted(missing)}")
+
+            cards.append(card)
+    return cards
+
+
+def build_search_text(card: dict[str, Any]) -> str:
+    """
+    Build a compact semantic representation for embedding.
+    This is what the retriever will search over.
+    """
+    parts: list[str] = []
+
+    intents = card.get("intent", [])
+    tags = card.get("tags", [])
+    command = card.get("command", "")
+    explanation = card.get("explanation", "")
+    alternatives = card.get("alternatives", [])
+    requires = card.get("requires", [])
+    platform = card.get("platform", [])
+
+    if intents:
+        parts.append("Intents: " + " | ".join(intents))
+    if tags:
+        parts.append("Tags: " + ", ".join(tags))
+    if command:
+        parts.append("Command: " + command)
+    if alternatives:
+        parts.append("Alternatives: " + " | ".join(alternatives))
+    if explanation:
+        parts.append("Explanation: " + explanation)
+    if requires:
+        parts.append("Requires: " + ", ".join(requires))
+    if platform:
+        parts.append("Platform: " + ", ".join(platform))
+
+    return "\n".join(parts)
+
+
+def serialize_embedding(vec: np.ndarray) -> bytes:
+    return vec.astype(np.float32).tobytes()
+
+
+def upsert_card(conn: sqlite3.Connection, card: dict[str, Any], search_text: str) -> None:
+    conn.execute("""
+    INSERT INTO cards (
+        id, command, explanation, intent_json, alternatives_json, requires_json,
+        packages_json, tags_json, platform_json, shell_json, safety, search_text, updated_at
+    )
+    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, CURRENT_TIMESTAMP)
+    ON CONFLICT(id) DO UPDATE SET
+        command=excluded.command,
+        explanation=excluded.explanation,
+        intent_json=excluded.intent_json,
+        alternatives_json=excluded.alternatives_json,
+        requires_json=excluded.requires_json,
+        packages_json=excluded.packages_json,
+        tags_json=excluded.tags_json,
+        platform_json=excluded.platform_json,
+        shell_json=excluded.shell_json,
+        safety=excluded.safety,
+        search_text=excluded.search_text,
+        updated_at=CURRENT_TIMESTAMP
+    """, (
+        card["id"],
+        card["command"],
+        card["explanation"],
+        json.dumps(card["intent"], ensure_ascii=False),
+        json.dumps(card["alternatives"], ensure_ascii=False),
+        json.dumps(card["requires"], ensure_ascii=False),
+        json.dumps(card["packages"], ensure_ascii=False),
+        json.dumps(card["tags"], ensure_ascii=False),
+        json.dumps(card["platform"], ensure_ascii=False),
+        json.dumps(card["shell"], ensure_ascii=False),
+        card["safety"],
+        search_text,
+    ))
+
+
+def upsert_embedding(
+    conn: sqlite3.Connection,
+    card_id: str,
+    model_name: str,
+    vec: np.ndarray,
+) -> None:
+    conn.execute("""
+    INSERT INTO card_embeddings (
+        card_id, model_name, embedding_blob, embedding_dim
+    )
+    VALUES (?, ?, ?, ?)
+    ON CONFLICT(card_id) DO UPDATE SET
+        model_name=excluded.model_name,
+        embedding_blob=excluded.embedding_blob,
+        embedding_dim=excluded.embedding_dim
+    """, (
+        card_id,
+        model_name,
+        serialize_embedding(vec),
+        int(vec.shape[0]),
+    ))
+
+
+def main() -> None:
+    if not DB_PATH.exists():
+        raise FileNotFoundError(
+            f"Database not found at {DB_PATH}. Run scripts/init_db.py first."
+        )
+    if not CARDS_PATH.exists():
+        raise FileNotFoundError(f"Cards file not found at {CARDS_PATH}")
+
+    cards = load_cards(CARDS_PATH)
+
+    model = SentenceTransformer(
+        MODEL_NAME,
+        cache_folder=str(LOCAL_CACHE_DIR.resolve()),
+        local_files_only=True,
+    )
+
+    search_texts = [build_search_text(card) for card in cards]
+    embeddings = model.encode(
+        search_texts,
+        normalize_embeddings=True,
+        convert_to_numpy=True,
+        show_progress_bar=True,
+    )
+
+    conn = sqlite3.connect(DB_PATH)
+    try:
+        conn.execute("PRAGMA foreign_keys=ON;")
+        for card, vec, search_text in zip(cards, embeddings, search_texts):
+            upsert_card(conn, card, search_text)
+            upsert_embedding(conn, card["id"], MODEL_NAME, vec)
+        conn.commit()
+        print(f"Indexed {len(cards)} cards into {DB_PATH}")
+    finally:
+        conn.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cache_model.py b/cache_model.py
new file mode 100644
index 0000000..6478a50
--- /dev/null
+++ b/cache_model.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+from sentence_transformers import SentenceTransformer
+
+MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+LOCAL_CACHE_DIR = Path("models/hf")
+
+
+def main() -> None:
+    LOCAL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+    os.environ.setdefault("HF_HOME", str(LOCAL_CACHE_DIR.resolve()))
+    os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(LOCAL_CACHE_DIR.resolve()))
+
+    print(f"Caching model: {MODEL_NAME}")
+    print(f"Cache dir: {LOCAL_CACHE_DIR.resolve()}")
+
+    model = SentenceTransformer(
+        MODEL_NAME,
+        cache_folder=str(LOCAL_CACHE_DIR.resolve()),
+    )
+
+    # Force an actual encode call so all needed files are loaded.
+    _ = model.encode(["test"], convert_to_numpy=True)
+
+    print("Model cached successfully.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/cards.jsonl b/cards.jsonl
new file mode 100644
index 0000000..ed19844
--- /dev/null
+++ b/cards.jsonl
@@ -0,0 +1,31 @@
+{"id":"glxinfo_opengl_renderer","intent":["show which GPU OpenGL is using","see OpenGL renderer","check OpenGL vendor and renderer","find what graphics device OpenGL is using"],"command":"glxinfo -B","alternatives":["glxinfo | grep \"OpenGL\""],"explanation":"Shows the OpenGL vendor, renderer, and version in a short summary.","requires":["glxinfo"],"packages":{"fedora":["mesa-demos"],"debian":["mesa-utils"], "arch":["mesa-utils"]},"tags":["gpu","opengl","graphics","diagnostics"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"vulkaninfo_gpu","intent":["show which GPU Vulkan is using","see Vulkan driver info","check Vulkan renderer","find Vulkan device information"],"command":"vulkaninfo --summary","alternatives":["vulkaninfo | grep driverName"],"explanation":"Shows a concise summary of Vulkan devices and drivers.","requires":["vulkaninfo"],"packages":{"fedora":["vulkan-tools"],"debian":["vulkan-tools"]},"tags":["gpu","vulkan","graphics","diagnostics"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"lsblk_block_devices","intent":["list disks and partitions","show block devices","see drives and partitions","inspect storage devices"],"command":"lsblk","alternatives":["lsblk -f"],"explanation":"Lists block devices such as disks, partitions, and mount points.","requires":["lsblk"],"packages":{},"tags":["disk","storage","devices"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"df_h_disk_usage","intent":["show disk free space","check filesystem usage","see disk usage by filesystem","how full are my disks"],"command":"df -h","alternatives":[],"explanation":"Shows filesystem size, used space, and available space in human-readable units.","requires":["df"],"packages":{},"tags":["disk","filesystem","space"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"du_sh_directory_size","intent":["show size of current directory","check folder size","how large is this directory","measure directory disk usage"],"command":"du -sh .","alternatives":["du -sh *"],"explanation":"Shows total size of the current directory.","requires":["du"],"packages":{},"tags":["disk","directory","size"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"find_large_files","intent":["find large files","show biggest files under current directory","locate files larger than 500MB","search for huge files"],"command":"find . -type f -size +500M","alternatives":["find /path -type f -size +1G"],"explanation":"Finds files larger than a given size.","requires":["find"],"packages":{},"tags":["find","files","disk","cleanup"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"find_name_case_insensitive","intent":["find a file by name ignoring case","search for filename case insensitive","locate file by partial name","find matching filename"],"command":"find . -iname '*pattern*'","alternatives":[],"explanation":"Searches recursively for files whose names match a case-insensitive pattern.","requires":["find"],"packages":{},"tags":["find","files","search"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"rg_text_recursive","intent":["search text recursively in files","grep through a project","find text in source tree","search contents of files fast"],"command":"rg 'pattern'","alternatives":["grep -R \"pattern\" ."],"explanation":"Searches recursively for text, usually faster and cleaner than grep -R.","requires":["rg"],"packages":{"fedora":["ripgrep"],"debian":["ripgrep"]},"tags":["search","text","grep","files","code"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"ss_listening_ports","intent":["show open listening ports","list listening sockets","see what ports are open locally","check listening network services"],"command":"ss -ltnp","alternatives":["sudo ss -ltnp"],"explanation":"Shows listening TCP sockets and associated processes when permitted.","requires":["ss"],"packages":{},"tags":["network","ports","sockets","diagnostics"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"ss_process_on_port","intent":["find process using port 8080","see what is listening on a port","which program owns port 3000","identify service bound to port"],"command":"ss -ltnp | grep ':8080'","alternatives":["sudo ss -ltnp | grep ':8080'"],"explanation":"Filters listening sockets to the requested port.","requires":["ss","grep"],"packages":{},"tags":["network","ports","process"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"ip_addr_show","intent":["show IP addresses","list network interfaces and addresses","see local IPs","inspect interface addresses"],"command":"ip addr","alternatives":["ip -brief addr"],"explanation":"Shows network interfaces and assigned IP addresses.","requires":["ip"],"packages":{},"tags":["network","ip","interfaces"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"ip_route_show","intent":["show routing table","see default route","inspect routes","what route is this host using"],"command":"ip route","alternatives":[],"explanation":"Displays the kernel routing table, including the default route.","requires":["ip"],"packages":{},"tags":["network","routing"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"ping_basic","intent":["test network reachability","ping a host","check if a machine is reachable","see if DNS and network work"],"command":"ping -c 4 example.com","alternatives":[],"explanation":"Sends a few ICMP echo requests to test reachability and latency.","requires":["ping"],"packages":{},"tags":["network","diagnostics","latency"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"curl_headers","intent":["show HTTP headers","inspect response headers from a URL","check server response headers","see HTTP status and headers"],"command":"curl -I https://example.com","alternatives":["curl -sSI https://example.com"],"explanation":"Fetches only the response headers from a URL.","requires":["curl"],"packages":{},"tags":["http","curl","headers","web"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"curl_download_file","intent":["download a file from a URL","save URL to local file","fetch a file with curl","download file without browser"],"command":"curl -LO https://example.com/file.tar.gz","alternatives":["wget https://example.com/file.tar.gz"],"explanation":"Downloads a file and saves it with the remote name.","requires":["curl"],"packages":{},"tags":["http","download","curl"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"journalctl_boot_errors","intent":["show boot errors","inspect errors from current boot","check systemd errors since boot","look at boot logs"],"command":"journalctl -b -p err","alternatives":["journalctl -b"],"explanation":"Shows error-priority messages from the current boot.","requires":["journalctl"],"packages":{},"tags":["logs","systemd","boot","errors"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"journalctl_service_follow","intent":["follow logs for a systemd service","tail service logs","watch logs from sshd","stream unit logs"],"command":"journalctl -u sshd -f","alternatives":[],"explanation":"Follows logs for a specific systemd unit in real time.","requires":["journalctl"],"packages":{},"tags":["logs","systemd","service"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"ps_grep_process","intent":["find a running process by name","check if process is running","search process list","locate process from command name"],"command":"ps aux | grep '[n]ame'","alternatives":["pgrep -a name"],"explanation":"Searches the process list while avoiding matching the grep process itself.","requires":["ps","grep"],"packages":{},"tags":["process","ps","grep"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"pgrep_full_cmdline","intent":["find process by name with pid","show pid for matching process","lookup running process and command line","find process quickly"],"command":"pgrep -a name","alternatives":[],"explanation":"Prints matching PIDs along with their command lines.","requires":["pgrep"],"packages":{},"tags":["process","pid","lookup"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"free_h_memory","intent":["show memory usage","check RAM usage","how much memory is free","inspect memory consumption summary"],"command":"free -h","alternatives":[],"explanation":"Shows total, used, and available memory in human-readable units.","requires":["free"],"packages":{},"tags":["memory","ram","diagnostics"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"top_overview","intent":["show top processes","inspect CPU usage live","watch system activity","interactive process viewer"],"command":"top","alternatives":["htop"],"explanation":"Shows a live interactive overview of processes and resource usage.","requires":["top"],"packages":{"fedora":["htop"],"debian":["htop"]},"tags":["cpu","memory","process","monitoring"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"chmod_executable","intent":["make a script executable","add execute permission to file","chmod script so it can run directly","set executable bit"],"command":"chmod +x script.sh","alternatives":[],"explanation":"Adds execute permission so the file can be run directly.","requires":["chmod"],"packages":{},"tags":["permissions","chmod","scripts"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-modifying"}
+{"id":"tar_extract_gz","intent":["extract a tar.gz archive","unpack tgz file","untar compressed archive","open tarball"],"command":"tar -xvf archive.tar.gz","alternatives":["tar -xzf archive.tar.gz"],"explanation":"Extracts files from a tar archive; some users prefer -z explicitly for gzip.","requires":["tar"],"packages":{},"tags":["archive","tar","extract"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-modifying"}
+{"id":"tar_create_gz","intent":["create a tar.gz archive","compress a directory into tar.gz","make a tarball","archive files into gzip tar"],"command":"tar -cvzf archive.tar.gz dir/","alternatives":[],"explanation":"Creates a gzip-compressed tar archive from a directory.","requires":["tar"],"packages":{},"tags":["archive","tar","compress"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-modifying"}
+{"id":"systemctl_status_service","intent":["check status of a service","see if systemd service is running","inspect service health","show systemctl status"],"command":"systemctl status sshd","alternatives":["systemctl --no-pager status sshd"],"explanation":"Shows service state, recent logs, and unit metadata.","requires":["systemctl"],"packages":{},"tags":["systemd","service","status"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"systemctl_restart_service","intent":["restart a service","bounce systemd unit","restart sshd","reload service by restarting it"],"command":"sudo systemctl restart sshd","alternatives":["sudo systemctl try-restart sshd"],"explanation":"Restarts a systemd service; usually requires root privileges.","requires":["systemctl","sudo"],"packages":{},"tags":["systemd","service","restart"],"platform":["linux"],"shell":["bash","zsh","fish"],"safety":"privileged-modifying"}
+{"id":"dnf_which_package_owns_file","intent":["find which package owns a file","what rpm provides this path","determine package for installed file","lookup file owner package"],"command":"rpm -qf /path/to/file","alternatives":["dnf provides /path/to/file"],"explanation":"Shows which installed package owns a given file; dnf provides can search repos too.","requires":["rpm"],"packages":{},"tags":["package","rpm","dnf","files"],"platform":["linux","fedora"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"dnf_search_package","intent":["search for a package in dnf","find package by name or description","lookup package in repositories","search repo packages"],"command":"dnf search keyword","alternatives":[],"explanation":"Searches package metadata in enabled repositories.","requires":["dnf"],"packages":{},"tags":["package","dnf","search"],"platform":["linux","fedora"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"git_status_short","intent":["show git status briefly","check repo state","see changed files in git","short git status"],"command":"git status --short","alternatives":["git status"],"explanation":"Shows tracked and untracked file changes in a compact format.","requires":["git"],"packages":{},"tags":["git","status","repo"],"platform":["linux","macos"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"git_log_oneline_graph","intent":["show concise git history","see commit graph briefly","inspect recent commits compactly","git log one line graph"],"command":"git log --oneline --graph --decorate -n 20","alternatives":[],"explanation":"Shows a compact decorated commit graph for recent history.","requires":["git"],"packages":{},"tags":["git","history","commits"],"platform":["linux","macos"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
+{"id":"git_show_changed_files","intent":["show files changed in last commit","list files changed by commit","see modified files from HEAD","inspect changed paths in commit"],"command":"git show --name-only --oneline HEAD","alternatives":["git diff --name-only HEAD~1 HEAD"],"explanation":"Shows the last commit summary and the files it changed.","requires":["git"],"packages":{},"tags":["git","diff","files","commits"],"platform":["linux","macos"],"shell":["bash","zsh","fish"],"safety":"safe-readonly"}
diff --git a/cheat.db b/cheat.db
new file mode 100644
index 0000000..ce146cf
--- /dev/null
+++ b/cheat.db
diff --git a/init_db.py b/init_db.py
new file mode 100644
index 0000000..a401a91
--- /dev/null
+++ b/init_db.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import sqlite3
+from pathlib import Path
+
+DB_PATH = Path("cheat.db")
+
+
+def main() -> None:
+    DB_PATH.parent.mkdir(parents=True, exist_ok=True)
+
+    conn = sqlite3.connect(DB_PATH)
+    try:
+        conn.execute("PRAGMA journal_mode=WAL;")
+        conn.execute("PRAGMA foreign_keys=ON;")
+
+        conn.execute("""
+        CREATE TABLE IF NOT EXISTS cards (
+            id TEXT PRIMARY KEY,
+            command TEXT NOT NULL,
+            explanation TEXT NOT NULL,
+            intent_json TEXT NOT NULL,
+            alternatives_json TEXT NOT NULL,
+            requires_json TEXT NOT NULL,
+            packages_json TEXT NOT NULL,
+            tags_json TEXT NOT NULL,
+            platform_json TEXT NOT NULL,
+            shell_json TEXT NOT NULL,
+            safety TEXT NOT NULL,
+            search_text TEXT NOT NULL,
+            created_at TEXT DEFAULT CURRENT_TIMESTAMP,
+            updated_at TEXT DEFAULT CURRENT_TIMESTAMP
+        );
+        """)
+
+        conn.execute("""
+        CREATE TABLE IF NOT EXISTS card_embeddings (
+            card_id TEXT PRIMARY KEY,
+            model_name TEXT NOT NULL,
+            embedding_blob BLOB NOT NULL,
+            embedding_dim INTEGER NOT NULL,
+            created_at TEXT DEFAULT CURRENT_TIMESTAMP,
+            FOREIGN KEY(card_id) REFERENCES cards(id) ON DELETE CASCADE
+        );
+        """)
+
+        conn.execute("""
+        CREATE INDEX IF NOT EXISTS idx_cards_command
+        ON cards(command);
+        """)
+
+        conn.execute("""
+        CREATE INDEX IF NOT EXISTS idx_cards_safety
+        ON cards(safety);
+        """)
+
+        conn.commit()
+        print(f"Initialized database at {DB_PATH}")
+    finally:
+        conn.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/inspect_db.py b/inspect_db.py
new file mode 100644
index 0000000..0419f26
--- /dev/null
+++ b/inspect_db.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import sqlite3
+from pathlib import Path
+
+DB_PATH = Path("storage/cmdhelp.db")
+
+
+def main() -> None:
+    conn = sqlite3.connect(DB_PATH)
+    try:
+        card_count = conn.execute("SELECT COUNT(*) FROM cards").fetchone()[0]
+        emb_count = conn.execute("SELECT COUNT(*) FROM card_embeddings").fetchone()[0]
+
+        print(f"cards: {card_count}")
+        print(f"embeddings: {emb_count}")
+        print()
+
+        rows = conn.execute("""
+        SELECT id, command, safety
+        FROM cards
+        ORDER BY id
+        LIMIT 10
+        """).fetchall()
+
+        for row in rows:
+            print(row)
+    finally:
+        conn.close()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/query_index.py b/query_index.py
new file mode 100644
index 0000000..7dee4d3
--- /dev/null
+++ b/query_index.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import argparse
+import json
+import sqlite3
+from pathlib import Path
+from typing import Any
+
+import numpy as np
+from sentence_transformers import SentenceTransformer
+import os
+from pathlib import Path
+
+DB_PATH = Path("cheat.db")
+
+LOCAL_CACHE_DIR = Path("models/hf")
+os.environ.setdefault("HF_HOME", str(LOCAL_CACHE_DIR.resolve()))
+os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(LOCAL_CACHE_DIR.resolve()))
+
+def deserialize_embedding(blob: bytes, dim: int) -> np.ndarray:
+    vec = np.frombuffer(blob, dtype=np.float32)
+    if vec.shape[0] != dim:
+        raise ValueError(f"Embedding length mismatch: expected {dim}, got {vec.shape[0]}")
+    return vec
+
+
+def load_index(conn: sqlite3.Connection) -> tuple[list[dict[str, Any]], np.ndarray, str]:
+    rows = conn.execute("""
+    SELECT
+        c.id,
+        c.command,
+        c.explanation,
+        c.intent_json,
+        c.alternatives_json,
+        c.requires_json,
+        c.packages_json,
+        c.tags_json,
+        c.platform_json,
+        c.shell_json,
+        c.safety,
+        e.model_name,
+        e.embedding_blob,
+        e.embedding_dim
+    FROM cards c
+    JOIN card_embeddings e ON c.id = e.card_id
+    ORDER BY c.id
+    """).fetchall()
+
+    if not rows:
+        raise RuntimeError("No indexed cards found. Run build_index.py first.")
+
+    cards: list[dict[str, Any]] = []
+    vectors: list[np.ndarray] = []
+    model_name: str | None = None
+
+    for row in rows:
+        (
+            card_id,
+            command,
+            explanation,
+            intent_json,
+            alternatives_json,
+            requires_json,
+            packages_json,
+            tags_json,
+            platform_json,
+            shell_json,
+            safety,
+            row_model_name,
+            embedding_blob,
+            embedding_dim,
+        ) = row
+
+        if model_name is None:
+            model_name = row_model_name
+        elif model_name != row_model_name:
+            raise RuntimeError("Mixed embedding models found in the index.")
+
+        cards.append({
+            "id": card_id,
+            "command": command,
+            "explanation": explanation,
+            "intent": json.loads(intent_json),
+            "alternatives": json.loads(alternatives_json),
+            "requires": json.loads(requires_json),
+            "packages": json.loads(packages_json),
+            "tags": json.loads(tags_json),
+            "platform": json.loads(platform_json),
+            "shell": json.loads(shell_json),
+            "safety": safety,
+        })
+        vectors.append(deserialize_embedding(embedding_blob, embedding_dim))
+
+    matrix = np.vstack(vectors)
+    return cards, matrix, model_name
+
+
+def search(
+    query: str,
+    top_k: int = 5,
+) -> list[dict[str, Any]]:
+    conn = sqlite3.connect(DB_PATH)
+    try:
+        cards, matrix, model_name = load_index(conn)
+    finally:
+        conn.close()
+
+    model = SentenceTransformer(
+        model_name,
+        cache_folder=str(LOCAL_CACHE_DIR.resolve()),
+        local_files_only=True,
+    )
+
+    qvec = model.encode([query], normalize_embeddings=True, convert_to_numpy=True)[0]
+
+    scores = matrix @ qvec
+    top_indices = np.argsort(scores)[::-1][:top_k]
+
+    results: list[dict[str, Any]] = []
+    for idx in top_indices:
+        card = dict(cards[idx])
+        card["score"] = float(scores[idx])
+        results.append(card)
+
+    return results
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Query the local command card index.")
+    parser.add_argument("query", type=str, help="Natural language query")
+    parser.add_argument("--top-k", type=int, default=5, help="Number of results to return")
+    args = parser.parse_args()
+
+    results = search(args.query, top_k=args.top_k)
+
+    for i, result in enumerate(results, start=1):
+        print(f"[{i}] score={result['score']:.4f} id={result['id']}")
+        print(f"    command: {result['command']}")
+        print(f"    explanation: {result['explanation']}")
+        if result["alternatives"]:
+            print(f"    alternatives: {', '.join(result['alternatives'])}")
+        print(f"    intent: {', '.join(result['intent'][:3])}")
+        print()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..24c6e0b
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+sentence-transformers>=3.0.0
+numpy>=1.26.0