Adignite commited on
Commit
b0b0f10
·
verified ·
1 Parent(s): 9c60683

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +144 -0
app.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sentence_transformers import SentenceTransformer
5
+ from transformers import pipeline
6
+ from sklearn.metrics.pairwise import cosine_similarity
7
+ from docx import Document
8
+ import io
9
+
10
+ class CarbonCreditDocGenerator:
11
+ def __init__(self):
12
+ self.sbert_model = SentenceTransformer('all-MiniLM-L6-v2')
13
+ self.nlg_pipeline = pipeline("text-generation", model="gpt2", max_length=500)
14
+
15
+ # Load your knowledge base here
16
+ self.knowledge_base = self.load_knowledge_base()
17
+
18
+ def load_knowledge_base(self):
19
+ # This should load your carbon credit domain knowledge
20
+ return [
21
+ "Carbon credits represent the reduction of one metric ton of carbon dioxide emissions.",
22
+ "Afforestation projects involve planting trees in areas where there were none before.",
23
+ "The Verified Carbon Standard (VCS) is a widely recognized certification for carbon credits.",
24
+ "Carbon credit projects must demonstrate additionality, meaning the reductions wouldn't have occurred without the project.",
25
+ "Monitoring, reporting, and verification (MRV) are crucial components of carbon credit projects.",
26
+ # Add more knowledge base entries...
27
+ ]
28
+
29
+ def process_input_data(self, input_text):
30
+ # In a real scenario, you'd parse the input document more thoroughly
31
+ lines = input_text.split('\n')
32
+ data = {}
33
+ for line in lines:
34
+ if ':' in line:
35
+ key, value = line.split(':', 1)
36
+ data[key.strip()] = value.strip()
37
+ return data
38
+
39
+ def retrieve_relevant_knowledge(self, query, top_k=3):
40
+ query_embedding = self.sbert_model.encode([query])[0]
41
+ knowledge_embeddings = self.sbert_model.encode(self.knowledge_base)
42
+
43
+ similarities = cosine_similarity([query_embedding], knowledge_embeddings)[0]
44
+ top_indices = np.argsort(similarities)[-top_k:][::-1]
45
+
46
+ return [self.knowledge_base[i] for i in top_indices]
47
+
48
+ def generate_section_content(self, section_title, input_data, max_length=500):
49
+ query = f"Generate content for the '{section_title}' section of a carbon credit document."
50
+ relevant_knowledge = self.retrieve_relevant_knowledge(query)
51
+
52
+ context = f"Input data: {input_data}\n\nRelevant knowledge: {' '.join(relevant_knowledge)}"
53
+ prompt = f"{context}\n\nTask: {query}\n\nContent:"
54
+
55
+ generated_text = self.nlg_pipeline(prompt, max_length=max_length, num_return_sequences=1)[0]['generated_text']
56
+
57
+ # Apply corrective RAG
58
+ corrected_text = self.apply_corrective_rag(generated_text, input_data, relevant_knowledge)
59
+
60
+ return corrected_text
61
+
62
+ def apply_corrective_rag(self, generated_text, input_data, relevant_knowledge):
63
+ # This is a simplified version of corrective RAG
64
+ corrected_text = generated_text
65
+
66
+ # Ensure all input data is represented
67
+ for key, value in input_data.items():
68
+ if value.lower() not in corrected_text.lower():
69
+ corrected_text += f" {key}: {value}."
70
+
71
+ # Ensure relevant knowledge is incorporated
72
+ for knowledge in relevant_knowledge:
73
+ if knowledge.lower() not in corrected_text.lower():
74
+ corrected_text += f" {knowledge}"
75
+
76
+ return corrected_text
77
+
78
+ def create_document(self, input_text):
79
+ doc = Document()
80
+ doc.add_heading('Carbon Credit Project Document', 0)
81
+
82
+ input_data = self.process_input_data(input_text)
83
+
84
+ sections = [
85
+ "Executive Summary",
86
+ "Certificate Identification",
87
+ "Emission Reduction Details",
88
+ "Project Information",
89
+ "Verification and Certification",
90
+ "Issuance and Expiration Dates",
91
+ "Market Type",
92
+ "Transferability Information",
93
+ "Legal Framework",
94
+ "Accountability Measures",
95
+ "Contact Information"
96
+ ]
97
+
98
+ for section in sections:
99
+ doc.add_heading(section, level=1)
100
+ content = self.generate_section_content(section, input_data)
101
+ doc.add_paragraph(content)
102
+
103
+ return doc
104
+
105
+ def generate_document(self, input_text):
106
+ doc = self.create_document(input_text)
107
+
108
+ # Save the document to a BytesIO object
109
+ doc_io = io.BytesIO()
110
+ doc.save(doc_io)
111
+ doc_io.seek(0)
112
+
113
+ return doc_io
114
+
115
+ # Streamlit app
116
+ def main():
117
+ st.title("Carbon Credit Document Generator")
118
+
119
+ # File uploader
120
+ uploaded_file = st.file_uploader("Choose a text file", type="txt")
121
+
122
+ if uploaded_file is not None:
123
+ # Read the file
124
+ input_text = uploaded_file.read().decode("utf-8")
125
+ st.text_area("Input Data", input_text, height=200)
126
+
127
+ if st.button("Generate Document"):
128
+ generator = CarbonCreditDocGenerator()
129
+
130
+ with st.spinner("Generating document..."):
131
+ doc_io = generator.generate_document(input_text)
132
+
133
+ st.success("Document generated successfully!")
134
+
135
+ # Provide download button
136
+ st.download_button(
137
+ label="Download Carbon Credit Document",
138
+ data=doc_io.getvalue(),
139
+ file_name="carbon_credit_document.docx",
140
+ mime="application/vnd.openxmlformats-officedocument.wordprocessingml.document"
141
+ )
142
+
143
+ if __name__ == "__main__":
144
+ main()