|
from fastapi import FastAPI, Query |
|
from langchain_community.vectorstores import Chroma |
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
from together import Together |
|
import os |
|
|
|
embedding = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
|
vectordb = Chroma(persist_directory="chroma_db", embedding_function=embedding) |
|
|
|
TOGETHER_API_KEY = os.getenv("TOGETHER_API_KEY", "") |
|
client = Together(api_key=TOGETHER_API_KEY) |
|
|
|
def call_llama(prompt: str): |
|
response = client.chat.completions.create( |
|
model="meta-llama/Llama-3-8b-chat-hf", |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful assistant."}, |
|
{"role": "user", "content": prompt} |
|
] |
|
) |
|
return response.choices[0].message.content |
|
|
|
app = FastAPI() |
|
|
|
@app.get("/ask") |
|
async def ask(q: str = Query(..., description="Your question")): |
|
docs = vectordb.similarity_search(q, k=3) |
|
context = "\n".join([doc.page_content for doc in docs]) |
|
final_prompt = f"Use the context below to answer the question.\n\nContext:\n{context}\n\nQuestion: {q}" |
|
answer = call_llama(final_prompt) |
|
return {"answer": answer} |
|
|