Spaces:
Sleeping
Sleeping
| #!/usr/bin/env -S poetry run python | |
| import os | |
| import json | |
| import pdfplumber | |
| import streamlit as st | |
| from openai import OpenAI | |
| client = OpenAI() | |
| def load_user_data(user_id): | |
| file_path = os.path.join("data", "user_data", f"user_data_{user_id}.json") | |
| if not os.path.exists(file_path): | |
| return {} | |
| with open(file_path, "r") as file: | |
| return json.load(file) | |
| def parse_pdf_to_json(pdf_path): | |
| user_id = {} | |
| serie_factura = {} | |
| data_factura = {} | |
| costuri = {} | |
| with pdfplumber.open(pdf_path, ) as pdf: | |
| for page in pdf.pages: | |
| text = page.extract_text() | |
| if text: | |
| lines = text.split('\n') | |
| # Process each line and look for specific categories | |
| for line in lines: | |
| # Check for 'Data emiterii facturii' | |
| if 'Data facturii' in line: | |
| date = line.split()[-1] | |
| data_factura['Data factura'] = date | |
| # Check for 'Serie factură' | |
| if 'rul facturii:' in line: | |
| serie = line.split()[-1] | |
| serie_factura['Serie numar'] = serie | |
| # Check for 'Cont client' | |
| if 'Cont client' in line: | |
| cont = line.split()[-1] | |
| user_id['Cont client'] = cont | |
| # Check for 'Valoare facturată fără TVA' | |
| if 'Sold precedent' in line: | |
| value = line.split()[-2].replace(',', '.') # Extract and convert to float | |
| costuri['Sold precedent'] = value | |
| # Check for 'Total bază de impozitare TVA' | |
| elif 'din sold precedent' in line: | |
| value = line.split()[-2].replace(',', '.') # Extract and convert to float | |
| costuri['Total platit din sold precedent'] = value | |
| # Check for 'TVA' | |
| elif 'TVA' in line and '%' in line: | |
| value = line.split()[-2].replace(',', '.') # Extract and convert to float | |
| costuri['TVA'] = value | |
| # Check for 'Dobânzi penalizatoare' | |
| elif 'Abonamente' in line: | |
| value = line.split()[-2].replace(',', '.') # Extract and convert to float | |
| costuri['Abonamente si extraopiuni'] = value | |
| # Check for 'TOTAL DE PLATĂ FACTURĂ CURENTĂ' | |
| elif 'Total factura curenta fara TVA' in line: | |
| value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float | |
| costuri['Total factura curenta fara TVA'] = value | |
| # Check for 'Sold Cont Contract' | |
| elif 'Servicii utilizate' in line: | |
| value = line.split()[-2].replace(',', '.') # Extract and convert to float | |
| costuri['Servicii utilizate'] = value | |
| # Check for 'Compensatii' | |
| elif 'Rate terminal' in line: | |
| value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float | |
| costuri['Rate terminal'] = value | |
| # Check for 'TVA 19,00%' | |
| elif 'TVA 19,00%' in line: | |
| value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float | |
| costuri['TVA'] = value | |
| # Check for 'Compensatii' | |
| elif 'Total factura curenta' in line: | |
| value = float(line.split()[-2].replace(',', '.')) # Extract and convert to float | |
| costuri['Total factura curenta'] = value | |
| return costuri | |
| def check_related_keys(question, user_id): | |
| user_data = load_user_data(user_id) | |
| bill_keys = set() | |
| for bill in user_data.get("bills", []): | |
| bill_keys.update(bill.keys()) | |
| return [key for key in bill_keys if key.lower() in question.lower()] | |
| def process_query(query, user_id): | |
| user_data = load_user_data(user_id) | |
| bill_info = user_data.get("bills", []) | |
| related_keys = check_related_keys(query, user_id) | |
| related_keys_str = ", ".join(related_keys) if related_keys else "N/A" | |
| if related_keys_str != "N/A": | |
| context = ( | |
| f"Citeste informatiile despre costrurile in lei facturate din dictionar: {bill_info} " | |
| f"si raspunde la intrebarea: '{query}' dar numai cu info legate de: {related_keys_str}" | |
| ) | |
| else: | |
| context = ( | |
| f"Citeste informatiile despre costrurile in lei facturate din dictionar: {bill_info} " | |
| f"si raspunde la intrebarea: '{query}' dar numai cu info legate de factura" | |
| ) | |
| max_input_length = 550 | |
| st.write(f"Context:\n{context}") | |
| st.write(f"Context size: {len(context)} characters") | |
| if len(context) > max_input_length: | |
| st.warning("Prea multe caractere în context, solicitarea nu va fi trimisă.") | |
| return None | |
| return context | |
| def main(): | |
| st.title("Telecom Bill Chat with LLM Agent") | |
| if "user_id" not in st.session_state: | |
| st.session_state.user_id = None | |
| user_id = st.sidebar.text_input("Introdu numărul de telefon:") | |
| if user_id and user_id != st.session_state.user_id: | |
| data = load_user_data(user_id) | |
| if data: | |
| st.session_state.user_id = user_id | |
| st.success("Utilizator găsit!") | |
| else: | |
| st.warning("Nu am găsit date pentru acest ID. Încărcați o factură PDF la nevoie.") | |
| st.session_state.user_id = user_id | |
| uploaded_file = st.file_uploader("Încarcă factura PDF", type="pdf") | |
| if uploaded_file and st.session_state.user_id: | |
| bill_data = parse_pdf_to_json(uploaded_file) | |
| existing_data = load_user_data(st.session_state.user_id) | |
| if "bills" not in existing_data: | |
| existing_data["bills"] = [] | |
| existing_data["bills"].append(bill_data) | |
| file_path = os.path.join("data", "user_data", f"user_data_{st.session_state['user_id']}.json") | |
| os.makedirs(os.path.dirname(file_path), exist_ok=True) | |
| with open(file_path, "w") as file: | |
| json.dump(existing_data, file) | |
| st.success("Factura a fost încărcată și salvată cu succes!") | |
| if st.session_state.user_id: | |
| data = load_user_data(st.session_state.user_id) | |
| st.write(f"Phone Number: {st.session_state.user_id}") | |
| st.write("Facturi existente:") | |
| for bill in data.get("bills", []): | |
| st.write(bill) | |
| else: | |
| st.info("Introduceți un ID și/sau încărcați o factură PDF pentru a continua.") | |
| # Initialize conversation in the session state | |
| # "context_prompt_added" indicates whether we've added the specialized "bill info" context yet. | |
| if "messages" not in st.session_state: | |
| st.session_state["messages"] = [ | |
| {"role": "assistant", "content": "Cu ce te pot ajuta?"} | |
| ] | |
| if "context_prompt_added" not in st.session_state: | |
| st.session_state.context_prompt_added = False | |
| st.write("---") | |
| st.subheader("Chat") | |
| for msg in st.session_state["messages"]: | |
| st.chat_message(msg["role"]).write(msg["content"]) | |
| if prompt := st.chat_input("Introduceți întrebarea aici:"): | |
| if not st.session_state.user_id: | |
| st.error("Trebuie să introduceți un număr de telefon valid sau să încărcați date.") | |
| return | |
| # If the context prompt hasn't been added yet, build & inject it once; | |
| # otherwise, just add the user's raw question. | |
| if not st.session_state.context_prompt_added: | |
| final_prompt = process_query(prompt, st.session_state["user_id"]) | |
| if final_prompt is None: | |
| st.stop() | |
| st.session_state["messages"].append({"role": "user", "content": final_prompt}) | |
| st.session_state.context_prompt_added = True | |
| else: | |
| st.session_state["messages"].append({"role": "user", "content": prompt}) | |
| # Display the latest user message in the chat | |
| st.chat_message("user").write(st.session_state["messages"][-1]["content"]) | |
| # Now call GPT-4 with the entire conversation | |
| completion = client.chat.completions.create( | |
| model="gpt-4", | |
| messages=st.session_state["messages"] | |
| ) | |
| response_text = completion.choices[0].message.content.strip() | |
| st.session_state["messages"].append({"role": "assistant", "content": response_text}) | |
| st.chat_message("assistant").write(response_text) | |
| if hasattr(completion, "usage"): | |
| st.write("Prompt tokens:", completion.usage.prompt_tokens) | |
| st.write("Completion tokens:", completion.usage.completion_tokens) | |
| st.write("Total tokens:", completion.usage.total_tokens) | |
| if __name__ == "__main__": | |
| main() |