|
import streamlit as st |
|
from langchain import OpenAI, PromptTemplate, LLMChain |
|
from langchain.text_splitter import CharacterTextSplitter |
|
from langchain.chains.mapreduce import MapReduceChain |
|
from langchain.prompts import PromptTemplate |
|
from langchain.chat_models import AzureChatOpenAI |
|
from langchain.chains.summarize import load_summarize_chain |
|
from langchain.chains import AnalyzeDocumentChain |
|
from PyPDF2 import PdfReader |
|
from langchain.document_loaders import TextLoader |
|
from langchain.indexes import VectorstoreIndexCreator |
|
from langchain.document_loaders import PyPDFLoader |
|
|
|
|
|
|
|
import os |
|
|
|
|
|
os.environ["OPENAI_API_TYPE"] = "azure" |
|
os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview" |
|
|
|
|
|
|
|
llm = AzureChatOpenAI( |
|
deployment_name="esujnand", model_name="gpt-35-turbo" |
|
) |
|
|
|
|
|
|
|
st.title("Wipro CSRD AI 1") |
|
|
|
|
|
st.write("Step 1: Summary of your selected section of CSRD... Sections in this are enviormental topic1, enviornamtal topic2 ") |
|
st.write("Step 2: Ask your specfici questions regarding a CSRD disclosure requirments") |
|
|
|
|
|
|
|
pdf_file = st.file_uploader("Upload file", type=["pdf"]) |
|
|
|
numberofpages = 100 |
|
|
|
if st.button("How many pages? "): |
|
reader = PdfReader(pdf_file) |
|
numberofpages = len(reader.pages) |
|
st.write("length is ", numberofpages) |
|
|
|
if st.button("table of contents? "): |
|
reader = PdfReader(pdf_file) |
|
page = reader.pages[2].extract_text() |
|
st.write(page) |
|
|
|
|
|
|
|
|
|
startpage = st.slider('Which section to look at', 0, numberofpages, 1) |
|
st.write("starting section page", startpage) |
|
|
|
|
|
pagecount = st.slider('How many pages', 1, 5, 1) |
|
st.write("pages to read", pagecount) |
|
|
|
|
|
def extract_text_from_pdf(): |
|
reader = PdfReader(pdf_file) |
|
|
|
text = [reader.pages[i].extract_text() for i in range(startpage, startpage + pagecount )] |
|
|
|
text = " ".join(text) |
|
return text |
|
|
|
|
|
def extract_text_from_pdf2(): |
|
reader = PdfReader(pdf_file) |
|
|
|
text = [reader.pages[i].extract_text() for i in range(len(reader.pages))] |
|
|
|
text = " ".join(text) |
|
return text |
|
|
|
if st.button("Summerize "): |
|
with st.spinner("Extracting Text..."): |
|
summary_chain = load_summarize_chain(llm, chain_type="map_reduce") |
|
summarize_document_chain = AnalyzeDocumentChain(combine_docs_chain=summary_chain, verbose=True) |
|
text = extract_text_from_pdf() |
|
with st.spinner("Summarizing..."): |
|
result = summarize_document_chain.run(text) |
|
st.write(result) |
|
|
|
|