from fastapi import FastAPI, HTTPException, Request from fastapi.responses import JSONResponse, RedirectResponse from pydantic import BaseModel from sentence_transformers import SentenceTransformer, util from transformers import pipeline from typing import List import numpy as np app = FastAPI() # Load models model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") question_model = "deepset/tinyroberta-squad2" nlp = pipeline('question-answering', model=question_model, tokenizer=question_model) summarizer = pipeline("summarization", model="facebook/bart-large-cnn") class ModifyQueryRequest_v3(BaseModel): query_string_list: List[str] class T5QuestionRequest(BaseModel): context: str class T5Response(BaseModel): answer: str # API endpoints @app.post("/modify_query") async def modify_query(request: Request): try: raw_data = await request.json() binary_embeddings = model.encode([raw_data['query_string']], precision="binary") return JSONResponse(content={'embeddings':binary_embeddings[0].tolist()}) except Exception as e: raise HTTPException(status_code=500, detail=str(e)) @app.post("/modify_query_v3") async def modify_query_v3(request: Request): try: # Generate embeddings for a list of query strings raw_data = await request.json() embeddings = model.encode(raw_data['query_string_list']) return JSONResponse(content={'embeddings':[emb.tolist() for emb in embeddings]}) except Exception as e: raise HTTPException(status_code=500, detail=f"Error in modifying query v3: {str(e)}") @app.post("/answer_question") async def answer_question(request: Request): try: raw_data = await request.json() res_locs = [] context_string = '' corpus_embeddings = model.encode(raw_data['context'], convert_to_tensor=True) query_embeddings = model.encode(raw_data['question'], convert_to_tensor=True) hits = util.semantic_search(query_embeddings, corpus_embeddings) # Collect relevant contexts for hit in hits[0]: if hit['score'] > 0.4: loc = hit['corpus_id'] res_locs.append(raw_data['locations'][loc]) context_string += raw_data['context'][loc] + ' ' # If no relevant contexts are found if not res_locs: answer = "Sorry, I couldn't find any results for your query. Please try again!" else: # Use the question-answering pipeline QA_input = { 'question': raw_data['question'], 'context': context_string.replace('\n', ' ') } result = nlp(QA_input) answer = result['answer'] return JSONResponse(content={'answer':answer, "location":res_locs}) except Exception as e: raise HTTPException(status_code=500, detail=f"Error in answering question: {str(e)}") @app.post("/t5answer", response_model=T5Response) async def t5answer(request: T5QuestionRequest): try: # Summarize the context response = summarizer(request.context, max_length=130, min_length=30, do_sample=False) return T5Response(answer=response[0]["summary_text"]) except Exception as e: raise HTTPException(status_code=500, detail=f"Error in T5 summarization: {str(e)}") if __name__ == "__main__": import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000)