Spaces:
Sleeping
Sleeping
| import io | |
| import os | |
| import warnings | |
| import numpy as np | |
| import time | |
| from matplotlib import pyplot as plt | |
| import math | |
| from IPython.display import display | |
| from PIL import Image, ImageDraw | |
| import getpass | |
| from transformers import AutoTokenizer, AutoModel | |
| import langchain | |
| from langchain_openai import OpenAIEmbeddings | |
| from langchain.vectorstores import Pinecone | |
| from pinecone import Pinecone, ServerlessSpec | |
| from tqdm.notebook import tqdm | |
| import openai | |
| from openai import OpenAI | |
| import string | |
| import pandas as pd | |
| import urllib.request | |
| from io import BytesIO | |
| import pillow_heif | |
| from itertools import islice | |
| from sklearn.metrics.pairwise import cosine_similarity | |
| import gc | |
| import ast | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from sentence_transformers import SentenceTransformer | |
| import streamlit as st | |
| import re | |
| import Levenshtein | |
| from tabulate import tabulate | |
| #from stability_sdk import client | |
| #import stability_sdk.interfaces.gooseai.generation.generation_pb2 as generation | |
| open_ai_key_file = "open_ai_key.txt" # Your OPEN AI Key in this file | |
| with open(open_ai_key_file, "r") as f: | |
| for line in f: | |
| OPENAI_KEY = line.strip() | |
| OPEN_AI_API_KEY = line | |
| break | |
| # GETTING OpenAI and Pinecone api key | |
| openai.api_key = OPENAI_KEY | |
| openai_client = OpenAI(api_key=openai.api_key) | |
| # GETTING OpenAI and Pinecone api key | |
| openai.api_key = OPENAI_KEY | |
| pc_apikey = "959aded5-f2fe-4f9e-966c-3c7bd5907202" | |
| openai_client = OpenAI(api_key=openai.api_key) | |
| # Function to get the embeddings of the text using OpenAI text-embedding-ada-002 model | |
| def get_openai_embedding(openai_client, text, model="text-embedding-ada-002"): | |
| text = text.replace("\n", " ") | |
| return openai_client.embeddings.create(input = [text], model=model).data[0].embedding | |
| def display_image_grid(image_caption_tuples): | |
| # Number of images | |
| n = len(image_caption_tuples) | |
| # Grid dimensions | |
| columns = 5 | |
| rows = math.ceil(n / columns) | |
| # Plot size | |
| plt.figure(figsize=(20, rows * 4)) # Adjust figure size as needed | |
| for i, (image_path, caption) in enumerate(image_caption_tuples, start=1): | |
| # Load image, assuming image_path is a path. Use Image.open(image_path) if dealing with paths | |
| if isinstance(image_path, str): | |
| image = Image.open(image_path) | |
| else: | |
| image = image_path # Assuming image_path is already an image object | |
| # Create subplot | |
| plt.subplot(rows, columns, i) | |
| plt.imshow(image) | |
| plt.title(caption, fontsize=10) # Show caption as title | |
| plt.axis('off') # Hide axis | |
| plt.tight_layout() | |
| plt.show() | |
| def get_completion(client, prompt, model="gpt-3.5-turbo"): | |
| message = {"role": "user", "content": prompt} | |
| with st.spinner("Generating ..."): | |
| response = openai_client.chat.completions.create( | |
| model=model, | |
| messages=[message] | |
| ) | |
| return response.choices[0].message.content | |
| def query_pinecone_vector_store(index, query_embeddn, top_k=5): | |
| ns = get_namespace(index) | |
| return index.query( | |
| namespace=ns, | |
| top_k=top_k, | |
| vector=query_embeddn, | |
| include_values=True, | |
| include_metadata=True | |
| ) | |
| def get_top_k_text(matches): | |
| text_list = [] | |
| for i in range(0, 5): | |
| text_list.append(matches.get('matches')[i]['metadata']['text']) | |
| return ' '.join(text_list) | |
| def get_top_filename(matches): | |
| filename = matches.get('matches')[0]['metadata']['filename'] | |
| text = matches.get('matches')[0]['metadata']['text'] | |
| return f"[{filename}]: {text}" | |
| def is_Yes(response) -> bool: | |
| similarityYes = Levenshtein.ratio("Yes", response) | |
| similarityNo = Levenshtein.ratio("No", response) | |
| return similarityYes > similarityNo | |
| def contains_py_filename(filename): | |
| return '.py' in filename | |
| def contains_sorry(response) -> bool: | |
| return "Sorry" in response | |
| general_greeting_num = 0 | |
| general_question_num = 1 | |
| machine_learning_num = 2 | |
| python_code_num = 3 | |
| obnoxious_num = 4 | |
| progress_num = 5 | |
| debug_num = 6 | |
| default_num = 7 | |
| query_classes = {'[General greeting]': general_greeting_num, | |
| '[General question]': general_question_num, | |
| '[Question about Machine Learning]': machine_learning_num, | |
| '[Question about Python programming]' : python_code_num, | |
| '[Obnoxious statement]': obnoxious_num, | |
| '[Request for Progress]': progress_num, | |
| '[Request for Score]': progress_num, | |
| '[Debug statement]': debug_num | |
| } | |
| query_classes_text = ", ".join(query_classes.keys()) | |
| class Classify_Agent: | |
| def __init__(self, openai_client) -> None: | |
| # TODO: Initialize the client and prompt for the Obnoxious_Agent | |
| self.openai_client = openai_client | |
| def classify_query(self, query): | |
| prompt = f"Please classify this query in angle brackets <{query}> as one of the following in square brackets only: {query_classes_text}." | |
| classification_response = get_completion(self.openai_client, prompt) | |
| if classification_response != None and classification_response in query_classes.keys(): | |
| query_class = query_classes.get(classification_response, default_num) | |
| #st.write(f"query <{query}>: {classification_response}") | |
| return query_classes.get(classification_response, default_num) | |
| else: | |
| #st.write(f"query <{query}>: {classification_response}") | |
| return default_num | |
| class Relevant_Documents_Agent: | |
| def __init__(self, openai_client) -> None: | |
| # TODO: Initialize the Relevant_Documents_Agent | |
| self.client = openai_client | |
| def is_relevant(self, matches_text, user_query_plus_conversation) -> bool: | |
| prompt = f"Please confirm that the text in angle brackets: <{matches_text}>, is relevant to the text in double square brackets: [[{user_query_plus_conversation}]]. Return Yes or No" | |
| #st.write(f"is_relevant prompt {prompt}") | |
| # response = get_completion(self.client, prompt) | |
| #st.write(f"is_relevant response {response}") | |
| count = 0 | |
| for i in range(3): | |
| response = get_completion(self.client, prompt) | |
| count += int(is_Yes(response)) | |
| # st.write(str(count)) | |
| # return is_Yes(response) | |
| return count >= 1 | |
| class OpenAI_Agent: | |
| def __init__(self, model="gpt-3.5-turbo", key_filename="open_ai_key.txt"): | |
| self.model = model | |
| self.open_ai_key_file = key_filename | |
| self.OPENAI_KEY = "" | |
| self.OPEN_AI_API_KEY = "" | |
| self.openai_client = None | |
| with open(self.open_ai_key_file, "r") as f: | |
| for line in f: | |
| self.OPENAI_KEY = line.strip() | |
| self.OPEN_AI_API_KEY = line | |
| break | |
| class Pinecone_Agent: | |
| def __init__(self, key_filename="pc_api_key"): | |
| self.pc_api_key_file = key_filename | |
| self.PC_KEY = "" | |
| self.PC_API_KEY = "" | |
| with open(self.open_ai_key_file, "r") as f: | |
| for line in f: | |
| self.PC_KEY = line.strip() | |
| self.PC_API_KEY = line | |
| break | |
| self.pc = Pinecone(api_key=self.PC_API_KEY) | |
| self.ml_namespace = "ns-600" | |
| self.ml_index = self.pc.Index("index-600") | |
| self.python_namespace = "ns-python-files" | |
| self.python_index = self.pc.Index("index-python-files") | |