amiguel commited on
Commit
c9f0dd1
Β·
verified Β·
1 Parent(s): cb3c52a

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +135 -0
app.py ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import time
4
+ import PyPDF2
5
+ from docx import Document
6
+ import pandas as pd
7
+ from dotenv import load_dotenv
8
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
9
+
10
+ # Load environment variables
11
+ load_dotenv()
12
+
13
+ # Avatars and bios
14
+ USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
15
+ BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
16
+
17
+ ATALIBA_BIO = """
18
+ **I am Ataliba Miguel's Digital Twin** πŸ€–
19
+
20
+ **Background:**
21
+ - πŸŽ“ Mechanical Engineering (BSc)
22
+ - β›½ Oil & Gas Engineering (MSc Specialization)
23
+ - πŸ”§ 17+ years in Oil & Gas Industry
24
+ - πŸ” Current: Topside Inspection Methods Engineer @ TotalEnergies
25
+ - πŸ€– AI Practitioner Specialist
26
+ - πŸš€ Founder of ValonyLabs (AI solutions for industrial corrosion, retail analytics, and KPI monitoring)
27
+
28
+ **Capabilities:**
29
+ - Technical document analysis
30
+ - Engineering insights
31
+ - AI-powered problem solving
32
+ - Cross-domain knowledge integration
33
+
34
+ Ask me about engineering challenges, AI applications, or industry best practices!
35
+ """
36
+
37
+ # UI Setup
38
+ st.markdown("""
39
+ <style>
40
+ @import url('https://fonts.cdnfonts.com/css/tw-cen-mt');
41
+ * { font-family: 'Tw Cen MT', sans-serif; }
42
+ .st-emotion-cache-1y4p8pa { padding: 2rem 1rem; }
43
+ </style>
44
+ """, unsafe_allow_html=True)
45
+
46
+ st.title("πŸš€ Ataliba o Agent Nerdx πŸš€")
47
+
48
+ # Sidebar
49
+ with st.sidebar:
50
+ st.header("⚑️ Hugging Face Model Loaded")
51
+ st.markdown("Model: amiguel/unsloth_finetune_test")
52
+ uploaded_file = st.file_uploader("Upload technical documents", type=["pdf", "docx", "xlsx", "xlsm"])
53
+
54
+ # Session state
55
+ if "file_context" not in st.session_state:
56
+ st.session_state.file_context = None
57
+ if "chat_history" not in st.session_state:
58
+ st.session_state.chat_history = []
59
+
60
+ # File parser
61
+ def parse_file(file):
62
+ try:
63
+ if file.type == "application/pdf":
64
+ reader = PyPDF2.PdfReader(file)
65
+ return "\n".join([page.extract_text() for page in reader.pages])
66
+ elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
67
+ doc = Document(file)
68
+ return "\n".join([para.text for para in doc.paragraphs])
69
+ elif file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"]:
70
+ df = pd.read_excel(file)
71
+ return df.to_string()
72
+ except Exception as e:
73
+ st.error(f"Error processing file: {str(e)}")
74
+ return None
75
+
76
+ # Process file
77
+ if uploaded_file and not st.session_state.file_context:
78
+ st.session_state.file_context = parse_file(uploaded_file)
79
+ if st.session_state.file_context:
80
+ st.sidebar.success("βœ… Document loaded successfully")
81
+
82
+ # Load model
83
+ @st.cache_resource
84
+ def load_custom_model():
85
+ model_name = "amiguel/unsloth_finetune_test"
86
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
87
+ model = AutoModelForSequenceClassification.from_pretrained(model_name)
88
+ return pipeline("text-classification", model=model, tokenizer=tokenizer)
89
+
90
+ # Generate response
91
+ def generate_response(prompt):
92
+ bio_triggers = ['who are you', 'ataliba', 'yourself', 'skilled at',
93
+ 'background', 'experience', 'valonylabs', 'totalenergies']
94
+
95
+ if any(trigger in prompt.lower() for trigger in bio_triggers):
96
+ for line in ATALIBA_BIO.split('\n'):
97
+ yield line + '\n'
98
+ time.sleep(0.1)
99
+ return
100
+
101
+ try:
102
+ classifier = load_custom_model()
103
+ result = classifier(prompt)[0]
104
+ label = result['label']
105
+ score = result['score']
106
+ context = st.session_state.file_context or "No document loaded."
107
+
108
+ response_text = f"\nπŸ“˜ **Prediction**: `{label}`\nπŸ“Š **Confidence**: `{score:.2%}`\nπŸ—‚οΈ **Context**: `{context[:300]}...`"
109
+ for line in response_text.split('\n'):
110
+ yield line + '\n'
111
+ time.sleep(0.1)
112
+
113
+ except Exception as e:
114
+ yield f"⚠️ Model Error: {str(e)}"
115
+
116
+ # Chat interface
117
+ for msg in st.session_state.chat_history:
118
+ with st.chat_message(msg["role"], avatar=USER_AVATAR if msg["role"] == "user" else BOT_AVATAR):
119
+ st.markdown(msg["content"])
120
+
121
+ if prompt := st.chat_input("Ask about documents or technical matters..."):
122
+ st.session_state.chat_history.append({"role": "user", "content": prompt})
123
+ with st.chat_message("user", avatar=USER_AVATAR):
124
+ st.markdown(prompt)
125
+
126
+ with st.chat_message("assistant", avatar=BOT_AVATAR):
127
+ response_placeholder = st.empty()
128
+ full_response = ""
129
+
130
+ for chunk in generate_response(prompt):
131
+ full_response += chunk
132
+ response_placeholder.markdown(full_response + "β–Œ")
133
+
134
+ response_placeholder.markdown(full_response)
135
+ st.session_state.chat_history.append({"role": "assistant", "content": full_response})