File size: 4,474 Bytes
fe30477
 
 
 
 
 
 
 
 
b693fde
fe30477
b693fde
fe30477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313df65
fe30477
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# -*- coding: utf-8 -*-
"""LLM-QA-BOT.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1nx-Eaf7yni5D_8u2SkfAdYg_zcXwOhBA
"""

# !nvidia-smi

# ! pip install -q langchain gpt-index llama-index transformers sentence_transformers

from llama_index import SimpleDirectoryReader, LangchainEmbedding, GPTListIndex,GPTSimpleVectorIndex, PromptHelper
from langchain.embeddings.huggingface import HuggingFaceEmbeddings
from llama_index import LLMPredictor
import torch
from langchain.llms.base import LLM
from transformers import pipeline

class FlanLLM(LLM):
    model_name = "google/flan-t5-base"
    pipeline = pipeline("text2text-generation", model=model_name, device=0, model_kwargs={"torch_dtype":torch.bfloat16})

    def _call(self, prompt, stop=None):
        return self.pipeline(prompt, max_length=9999)[0]["generated_text"]
 
    def _identifying_params(self):
        return {"name_of_model": self.model_name}

    def _llm_type(self):
        return "custom"


llm_predictor = LLMPredictor(llm=FlanLLM())

hfemb = HuggingFaceEmbeddings()
embed_model = LangchainEmbedding(hfemb)

text1 = """Computer vision is a field of artificial intelligence (AI) that enables computers and systems to derive meaningful information from digital images, videos and other visual inputs — and take actions or make recommendations based on that information. If AI enables computers to think, computer vision enables them to see, observe and understand.

Computer vision works much the same as human vision, except humans have a head start. Human sight has the advantage of lifetimes of context to train how to tell objects apart, how far away they are, whether they are moving and whether there is something wrong in an image.

Computer vision trains machines to perform these functions, but it has to do it in much less time with cameras, data and algorithms rather than retinas, optic nerves and a visual cortex. Because a system trained to inspect products or watch a production asset can analyze thousands of products or processes a minute, noticing imperceptible defects or issues, it can quickly surpass human capabilities.

Computer vision is used in industries ranging from energy and utilities to manufacturing and automotive – and the market is continuing to grow. It is expected to reach USD 48.6 billion by 2022.1"""

#documents = SimpleDirectoryReader('data').load_data()

from llama_index import Document

text_list = [text1]

documents = [Document(t) for t in text_list]

# set number of output tokens
num_output = 250
# set maximum input size
max_input_size = 512
# set maximum chunk overlap
max_chunk_overlap = 20


prompt_helper = PromptHelper(max_input_size, num_output, max_chunk_overlap)

#index = GPTSimpleVectorIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor)

index = GPTListIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor)

#index.save_to_disk('index.json')

import logging

logging.getLogger().setLevel(logging.CRITICAL)

# response = index.query( "What is computer vision?") 
response = index.query("It is expected to reach USD 48.6 billion by")
print(response)

#Frontend

# !pip install -q gradio

import gradio as gr
index = None

def build_the_bot(input_text):
  text_list = [input_text]
  documents = [Document(t) for t in text_list]
  global index
  index = GPTSimpleVectorIndex(documents, embed_model=embed_model, llm_predictor=llm_predictor)
  return('Index saved successfull!!!')

def chat(chat_history, user_input):
  
  bot_response = index.query(user_input)
  #print(bot_response)
  response = ""
  for letter in ''.join(bot_response.response): #[bot_response[i:i+1] for i in range(0, len(bot_response), 1)]:
      response += letter + ""
      yield chat_history + [(user_input, response)]

with gr.Blocks() as demo:
    gr.Markdown('# Q&A Bot with Hugging Face Models')
    with gr.Tab("Input Text Document"):
        text_input = gr.Textbox()
        text_output = gr.Textbox("Start Building the Bot")
        text_button = gr.Button("Build the Bot!!!")
        text_button.click(build_the_bot, text_input, text_output)
    with gr.Tab("Knowledge Bot"):
#          inputbox = gr.Textbox("Input your text to build a Q&A Bot here.....")
          chatbot = gr.Chatbot()
          message = gr.Textbox ("What is this document about?")
          message.submit(chat, [chatbot, message], chatbot)

demo.queue().launch(debug = True)