from fastapi import FastAPI, Query from langchain_community.vectorstores import Chroma from langchain_community.embeddings import HuggingFaceEmbeddings from together import Together import os embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") vectordb = Chroma(persist_directory="chroma_db", embedding_function=embedding) TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY", "") client = Together(api_key=TOGETHER_API_KEY) def call_llama(prompt: str): response = client.chat.completions.create( model="meta-llama/Llama-3-8b-chat-hf", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt} ] ) return response.choices[0].message.content app = FastAPI() @app.get("/ask") async def ask(q: str = Query(..., description="Your question")): docs = vectordb.similarity_search(q, k=3) context = "\n".join([doc.page_content for doc in docs]) final_prompt = f"Use the context below to answer the question.\n\nContext:\n{context}\n\nQuestion: {q}" answer = call_llama(final_prompt) return {"answer": answer}