Spaces:
Sleeping
Sleeping
File size: 5,288 Bytes
37b6839 e83efea 37b6839 3bfbb8d 37b6839 e83efea 37b6839 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 |
from src.embeddings_model import GEmbeddings
from src.text_generation_model import GLLM
from src.pinecone_index import PineconeIndex
from typing import Dict, List, Any, Union
import datetime
import asyncio
from llama_index.core.evaluation import SemanticSimilarityEvaluator
from llama_index.core.base.embeddings.base import SimilarityMode
prompt_template = """
<system instruction>
You are Gerard Lee. Gerard is a data enthusiast and humble about his success. Imagine you are in a conversation with potential employer.
Reply as faifhfully as possible and in no more than 5 complete sentences unless the <user query> requests to elaborate in details. Use contents from <context> only without prior knowledge except referring to <chat history> for seamless conversatation.
</system instruction>
<chat history>
{context_history}
</chat history>
<context>
{context_from_index}
</context>
<user query>
{user_query}
</user query>
"""
class GLlamaIndex():
def __init__(
self,
logger,
emb_model: GEmbeddings,
text_model: GLLM,
index: PineconeIndex,
similarity_threshold: float
) -> None:
self.logger = logger
self.emb_model = emb_model
self.llm = text_model
self.index = index
self.evaluator = self._set_evaluator(similarity_threshold)
self.prompt_template = prompt_template
def _set_evaluator(self, similarity_threshold: float) -> SemanticSimilarityEvaluator:
sem_evaluator = SemanticSimilarityEvaluator(
similarity_mode=SimilarityMode.DEFAULT,
similarity_threshold=similarity_threshold,
)
return sem_evaluator
def format_history(self, history: List[str]) -> str:
return "\n".join(list(filter(None, history)))
async def aget_context_with_history(
self,
query: str,
history: List[str]
) -> str:
if not history:
result = await self.index.retrieve_context(query)
return result["result"]
extended_query = f"<chat history>]\n{history[-1]}\n</chat history><new query>\n{query}\n</new query>"
print(history[-1], history[:-1])
results = await self.index.aretrieve_context_multi(
[query, extended_query]
)
print(results)
eval_results = await self.aevaluate_context_multi(
[query, extended_query],
[r["result"] for r in results]
)
print(eval_results)
return results[0]["result"] if eval_results[0].score > eval_results[1].score \
else results[1]["result"]
async def aevaluate_context(
self,
query: str,
returned_context: str
) -> Dict[str, Any]:
result = await self.evaluator.aevaluate(
response=returned_context,
reference=query,
)
return result
async def aevaluate_context_multi(
self,
query_list: List[str],
returned_context_list: List[str]
) -> List[Dict]:
result = await asyncio.gather(*(self.aevaluate_context(query, returned_context) for query, returned_context in zip(query_list, returned_context_list)))
return result
def generate_text(
self,
query: str,
history: List[str],
) -> str:
# get chat history
context_history = self.format_history(history=history)
# get retrieval context(s) from llama-index vectorstore index
try:
# without history, single context retrieval without evaluation
if not history:
# w&b trace retrieval context
result_query_only = self.index.retrieve_context(query)
context_from_index_selected = result_query_only["result"]
# with history, multiple context retrieval with async, then evaluation to determine which context to choose
else:
context_from_index_selected = asyncio.run(self.aget_context_with_history(query=query, history=history))
except Exception as e:
self.logger.error(f"Exception {e} occured when retriving context\n")
llm_end_time_ms = round(datetime.datetime.now().timestamp() * 1000)
result = "Something went wrong. Please try again later."
return result
self.logger.info(f"Context from Llama-Index:\n{context_from_index_selected}\n")
# generate text with prompt template to roleplay myself
prompt_with_context = self.prompt_template.format(context_history=context_history, context_from_index=context_from_index_selected, user_query=query)
try:
result = self.llm.gai_generate_content(
prompt=prompt_with_context,
temperature=0.5,
)
success_flag = "success"
if result is None:
result = "Seems something went wrong. Please try again later."
self.logger.error(f"Result with 'None' received\n")
success_flag = "fail"
except Exception as e:
result = "Seems something went wrong. Please try again later."
self.logger.error(f"Exception {e} occured\n")
success_flag = "fail"
return result |