Initial commit

author: twells46 <173561638+twells46@users.noreply.github.com> 2026-04-01 15:20:50 -0500
committer: twells46 <173561638+twells46@users.noreply.github.com> 2026-04-01 15:20:50 -0500
commit: 2f37974a4c84f7ffdd07e2c223eba2d8bd981b61 (patch)
tree: 1741f17884077e9d4e0dbfe5908305fc21661ced /cache_model.py
1 files changed, 34 insertions, 0 deletions
diff --git a/cache_model.py b/cache_model.py
new file mode 100644
index 0000000..6478a50
--- /dev/null
+++ b/cache_model.py
@@ -0,0 +1,34 @@
+#!/usr/bin/env python3
+from __future__ import annotations
+
+import os
+from pathlib import Path
+
+from sentence_transformers import SentenceTransformer
+
+MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
+LOCAL_CACHE_DIR = Path("models/hf")
+
+
+def main() -> None:
+    LOCAL_CACHE_DIR.mkdir(parents=True, exist_ok=True)
+
+    os.environ.setdefault("HF_HOME", str(LOCAL_CACHE_DIR.resolve()))
+    os.environ.setdefault("SENTENCE_TRANSFORMERS_HOME", str(LOCAL_CACHE_DIR.resolve()))
+
+    print(f"Caching model: {MODEL_NAME}")
+    print(f"Cache dir: {LOCAL_CACHE_DIR.resolve()}")
+
+    model = SentenceTransformer(
+        MODEL_NAME,
+        cache_folder=str(LOCAL_CACHE_DIR.resolve()),
+    )
+
+    # Force an actual encode call so all needed files are loaded.
+    _ = model.encode(["test"], convert_to_numpy=True)
+
+    print("Model cached successfully.")
+
+
+if __name__ == "__main__":
+    main()
author	twells46 <173561638+twells46@users.noreply.github.com>	2026-04-01 15:20:50 -0500
committer	twells46 <173561638+twells46@users.noreply.github.com>	2026-04-01 15:20:50 -0500
commit	2f37974a4c84f7ffdd07e2c223eba2d8bd981b61 (patch)
tree	1741f17884077e9d4e0dbfe5908305fc21661ced /cache_model.py