Spaces:

tferhan
/

data_gov_ma

Sleeping

App Files Files Community

Ferhan taha commited on Mar 8, 2024

Commit

1d555fd

verified ·

1 Parent(s): 7692855

Delete app.py

Browse files

Files changed (1) hide show

app.py +0 -134

app.py DELETED Viewed

@@ -1,134 +0,0 @@
-# -*- coding: utf-8 -*-
-"""app.ipynb
-Automatically generated by Colaboratory.
-Original file is located at
-    https://colab.research.google.com/drive/14JJlKx1Oj4px4gdVwHn55FstUl2Dvh9z
-"""
-#|export
-import os
-from langchain.document_loaders import PyPDFLoader
-from langchain.text_splitter import RecursiveCharacterTextSplitter
-from langchain.vectorstores import Chroma
-from langchain.chains import ConversationalRetrievalChain
-from langchain.embeddings import HuggingFaceEmbeddings
-from langchain.llms import HuggingFacePipeline
-from langchain.chains import ConversationChain
-from langchain.memory import ConversationBufferMemory
-from langchain.llms import HuggingFaceHub
-import pandas as pd
-from pathlib import Path
-import chromadb
-import gradio as gr
-from transformers import AutoTokenizer
-import transformers
-import torch
-import tqdm
-import accelerate
-#|export
-def initialize_database(file_path):
-    # Create list of documents (when valid)
-    collection_name = Path(file_path).stem
-    # Fix potential issues from naming convention
-    ## Remove space
-    collection_name = collection_name.replace(" ","-")
-    ## Limit lenght to 50 characters
-    collection_name = collection_name[:50]
-    ## Enforce start and end as alphanumeric character
-    if not collection_name[0].isalnum():
-        collection_name[0] = 'A'
-    if not collection_name[-1].isalnum():
-        collection_name[-1] = 'Z'
-    # print('list_file_path: ', list_file_path)
-    print('Collection name: ', collection_name)
-    # Load document and create splits
-    doc_splits = load_doc(file_path)
-    # Create or load vector database
-    # global vector_db
-    vector_db = create_db(doc_splits, collection_name)
-    return vector_db, collection_name, "Complete!"
-#|export
-def load_doc(file_path):
-    loader = PyPDFLoader(file_path)
-    pages = loader.load()
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size = 600, chunk_overlap = 50)
-    doc_splits = text_splitter.split_documents(pages)
-    return doc_splits
-#|export
-def create_db(splits, collection_name):
-    embedding = HuggingFaceEmbeddings()
-    new_client = chromadb.EphemeralClient()
-    vectordb = Chroma.from_documents(
-        documents=splits,
-        embedding=embedding,
-        client=new_client,
-        collection_name=collection_name,
-        # persist_directory=default_persist_directory
-    )
-    return vectordb
-#|export
-splt = load_doc('data.pdf')
-#|export
-vec = initialize_database('data.pdf')
-#|export
-vec_cre = create_db(splt, 'data')
-vec_cre
-#|export
-def initialize_llmchain(temperature, max_tokens, top_k, vector_db):
-    memory = ConversationBufferMemory(
-        memory_key="chat_history",
-        output_key='answer',
-        return_messages=True
-    )
-    llm = HuggingFaceHub(
-            repo_id='mistralai/Mixtral-8x7B-Instruct-v0.1',
-            model_kwargs={"temperature": temperature, "max_new_tokens": max_tokens, "top_k": top_k, "load_in_8bit": True}
-        )
-    retriever=vector_db.as_retriever()
-    qa_chain = ConversationalRetrievalChain.from_llm(
-        llm,
-        retriever=retriever,
-        chain_type="stuff",
-        memory=memory,
-        # combine_docs_chain_kwargs={"prompt": your_prompt})
-        return_source_documents=True,
-        #return_generated_question=False,
-        verbose=False,
-    )
-    return qa_chain
-#|export
-qa = initialize_llmchain(0.7, 1024, 1, vec_cre)
-#|export
-def format_chat_history(message, chat_history):
-    formatted_chat_history = []
-    for user_message, bot_message in chat_history:
-        formatted_chat_history.append(f"User: {user_message}")
-        formatted_chat_history.append(f"Assistant: {bot_message}")
-    return formatted_chat_history
-#|export
-def conversation(message, history):
-    formatted_chat_history = format_chat_history(message, history)
-    response = qa({"question": message, "chat_history": formatted_chat_history})
-    response_answer = response["answer"]
-    if response_answer.find("Helpful Answer:") != -1:
-        response_answer = response_answer.split("Helpful Answer:")[-1]
-    return response_answer
-#|export
-gr.ChatInterface(conversation).launch()