Maintenance_website / retriever.py
Prathamesh1420's picture
Create retriever.py
8077ead verified
import os
import json
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
model = SentenceTransformer("all-MiniLM-L6-v2")
def build_embeddings(video_folder="data/videos", output_file="data/video_metadata.json"):
metadata = []
for file in os.listdir(video_folder):
if file.endswith((".mp4", ".mov", ".avi")):
description = file.replace("_", " ").replace(".mp4", "").replace(".mov", "").replace(".avi", "")
embedding = model.encode(description).tolist()
metadata.append({
"file": file,
"description": description,
"embedding": embedding
})
with open(output_file, "w") as f:
json.dump(metadata, f)
def find_video(user_query, metadata_file="data/video_metadata.json"):
with open(metadata_file, "r") as f:
videos = json.load(f)
query_embedding = model.encode(user_query).reshape(1, -1)
similarities = [cosine_similarity([v["embedding"]], query_embedding)[0][0] for v in videos]
best_idx = similarities.index(max(similarities))
return videos[best_idx]["file"], videos[best_idx]["description"]