File size: 1,146 Bytes
905d2fe |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
from fastapi import FastAPI, Query
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from together import Together
import os
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
vectordb = Chroma(persist_directory="chroma_db", embedding_function=embedding)
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY", "")
client = Together(api_key=TOGETHER_API_KEY)
def call_llama(prompt: str):
response = client.chat.completions.create(
model="meta-llama/Llama-3-8b-chat-hf",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
)
return response.choices[0].message.content
app = FastAPI()
@app.get("/ask")
async def ask(q: str = Query(..., description="Your question")):
docs = vectordb.similarity_search(q, k=3)
context = "\n".join([doc.page_content for doc in docs])
final_prompt = f"Use the context below to answer the question.\n\nContext:\n{context}\n\nQuestion: {q}"
answer = call_llama(final_prompt)
return {"answer": answer}
|