Nivetha29 commited on
Commit
fe30477
·
1 Parent(s): d08aea1

Add application file

Browse files
Files changed (1) hide show
  1. llm_qa_bot.py +116 -0
llm_qa_bot.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """LLM-QA-BOT.ipynb
3
+
4
+ Automatically generated by Colaboratory.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1nx-Eaf7yni5D_8u2SkfAdYg_zcXwOhBA
8
+ """
9
+
10
+ !nvidia-smi
11
+
12
+ ! pip install -q langchain gpt-index llama-index transformers sentence_transformers
13
+
14
+ from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex,GPTSimpleVectorIndex, PromptHelper
15
+ from langchain.embeddings.huggingface import HuggingFaceEmbeddings
16
+ from llama_index import LLMPredictor
17
+ import torch
18
+ from langchain.llms.base import LLM
19
+ from transformers import pipeline
20
+
21
+ class FlanLLM(LLM):
22
+ model_name = "google/flan-t5-base"
23
+ pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})
24
+
25
+ def _call(self, prompt, stop=None):
26
+ return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
27
+
28
+ def _identifying_params(self):
29
+ return {"name_of_model": self.model_name}
30
+
31
+ def _llm_type(self):
32
+ return "custom"
33
+
34
+
35
+ llm_predictor = LLMPredictor(llm=FlanLLM())
36
+
37
+ hfemb = HuggingFaceEmbeddings()
38
+ embed_model = LangchainEmbedding(hfemb)
39
+
40
+ text1 = """Computer vision is a field of artificial intelligence (AI) that enables computers and systems to derive meaningful information from digital images, videos and other visual inputs — and take actions or make recommendations based on that information. If AI enables computers to think, computer vision enables them to see, observe and understand.
41
+
42
+ Computer vision works much the same as human vision, except humans have a head start. Human sight has the advantage of lifetimes of context to train how to tell objects apart, how far away they are, whether they are moving and whether there is something wrong in an image.
43
+
44
+ Computer vision trains machines to perform these functions, but it has to do it in much less time with cameras, data and algorithms rather than retinas, optic nerves and a visual cortex. Because a system trained to inspect products or watch a production asset can analyze thousands of products or processes a minute, noticing imperceptible defects or issues, it can quickly surpass human capabilities.
45
+
46
+ Computer vision is used in industries ranging from energy and utilities to manufacturing and automotive – and the market is continuing to grow. It is expected to reach USD 48.6 billion by 2022.1"""
47
+
48
+ #documents = SimpleDirectoryReader('data').load_data()
49
+
50
+ from llama_index import Document
51
+
52
+ text_list = [text1]
53
+
54
+ documents = [Document(t) for t in text_list]
55
+
56
+ # set number of output tokens
57
+ num_output = 250
58
+ # set maximum input size
59
+ max_input_size = 512
60
+ # set maximum chunk overlap
61
+ max_chunk_overlap = 20
62
+
63
+
64
+ prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)
65
+
66
+ #index = GPTSimpleVectorIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor)
67
+
68
+ index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor)
69
+
70
+ #index.save_to_disk('index.json')
71
+
72
+ import logging
73
+
74
+ logging.getLogger().setLevel(logging.CRITICAL)
75
+
76
+ # response = index.query( "What is computer vision?")
77
+ response = index.query("It is expected to reach USD 48.6 billion by")
78
+ print(response)
79
+
80
+ #Frontend
81
+
82
+ !pip install -q gradio
83
+
84
+ import gradio as gr
85
+ index = None
86
+
87
+ def build_the_bot(input_text):
88
+ text_list = [input_text]
89
+ documents = [Document(t) for t in text_list]
90
+ global index
91
+ index = GPTSimpleVectorIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor)
92
+ return('Index saved successfull!!!')
93
+
94
+ def chat(chat_history, user_input):
95
+
96
+ bot_response = index.query(user_input)
97
+ #print(bot_response)
98
+ response = ""
99
+ for letter in ''.join(bot_response.response): #[bot_response[i:i+1] for i in range(0, len(bot_response), 1)]:
100
+ response += letter + ""
101
+ yield chat_history + [(user_input, response)]
102
+
103
+ with gr.Blocks() as demo:
104
+ gr.Markdown('# Q&A Bot with Hugging Face Models')
105
+ with gr.Tab("Input Text Document"):
106
+ text_input = gr.Textbox()
107
+ text_output = gr.Textbox("Start Building the Bot")
108
+ text_button = gr.Button("Build the Bot!!!")
109
+ text_button.click(build_the_bot, text_input, text_output)
110
+ with gr.Tab("Knowledge Bot"):
111
+ # inputbox = gr.Textbox("Input your text to build a Q&A Bot here.....")
112
+ chatbot = gr.Chatbot()
113
+ message = gr.Textbox ("What is this document about?")
114
+ message.submit(chat, [chatbot, message], chatbot)
115
+
116
+ demo.queue().launch(debug = True)