File size: 2,514 Bytes
4fe6e77
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51f1445
 
4fe6e77
8722edc
4fe6e77
8722edc
4fe6e77
8722edc
 
 
4fe6e77
 
2275cf7
 
4fe6e77
 
2275cf7
 
 
4fe6e77
 
 
 
2275cf7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4fe6e77
 
 
 
2275cf7
4fe6e77
 
 
 
 
2275cf7
 
 
 
 
 
 
4fe6e77
2275cf7
4fe6e77
 
 
 
 
 
 
2275cf7
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import streamlit as st
from langchain import OpenAI, PromptTemplate, LLMChain
from langchain.text_splitter import CharacterTextSplitter
from langchain.chains.mapreduce import MapReduceChain
from langchain.prompts import PromptTemplate
from langchain.chat_models import AzureChatOpenAI
from langchain.chains.summarize import load_summarize_chain
from langchain.chains import AnalyzeDocumentChain
from PyPDF2 import PdfReader
from langchain.document_loaders import TextLoader
from langchain.indexes import VectorstoreIndexCreator
from langchain.document_loaders import PyPDFLoader



import os


os.environ["OPENAI_API_TYPE"] = "azure"
os.environ["OPENAI_API_VERSION"] = "2023-03-15-preview"

 

llm = AzureChatOpenAI(
    deployment_name="esujnand", model_name="gpt-35-turbo"
)



st.title("Wipro CSRD AI 1")

# description text
st.write("Step 1: Summary of your selected section of CSRD... Sections in this are enviormental  topic1, enviornamtal topic2 ")
st.write("Step 2: Ask your specfici questions regarding a CSRD disclosure requirments")


# pdf file upload
pdf_file = st.file_uploader("Upload file", type=["pdf"])

numberofpages = 100

if st.button("How many pages? "):
    reader = PdfReader(pdf_file)
    numberofpages = len(reader.pages)
    st.write("length is ", numberofpages)

if st.button("table of contents? "):
  reader = PdfReader(pdf_file)
  page = reader.pages[2].extract_text()
  st.write(page)




startpage = st.slider('Which section to look at', 0, numberofpages, 1)
st.write("starting section page", startpage)


pagecount = st.slider('How many pages', 1, 5, 1)
st.write("pages to read", pagecount)


def extract_text_from_pdf():
    reader = PdfReader(pdf_file)
    # get all pages text
    text = [reader.pages[i].extract_text() for i in range(startpage, startpage + pagecount )]
    # join all pages text
    text = " ".join(text)
    return text


def extract_text_from_pdf2():
    reader = PdfReader(pdf_file)
    # get all pages text
    text = [reader.pages[i].extract_text() for i in range(len(reader.pages))]
    # join all pages text
    text = " ".join(text)
    return text

if st.button("Summerize "):
    with st.spinner("Extracting Text..."):
        summary_chain = load_summarize_chain(llm, chain_type="map_reduce")
        summarize_document_chain = AnalyzeDocumentChain(combine_docs_chain=summary_chain, verbose=True)
        text = extract_text_from_pdf()
    with st.spinner("Summarizing..."):
        result = summarize_document_chain.run(text)
    st.write(result)