Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,327 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import difflib
|
3 |
+
import os
|
4 |
+
import re
|
5 |
+
import hashlib
|
6 |
+
from groq import Groq
|
7 |
+
|
8 |
+
# --- Page config ---
|
9 |
+
st.set_page_config(page_title="π AI Assistant with Workflow + Semantic Search", layout="wide")
|
10 |
+
|
11 |
+
# --- Groq API Setup ---
|
12 |
+
GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
|
13 |
+
if not GROQ_API_KEY:
|
14 |
+
st.error("β Please set your GROQ_API_KEY environment variable.")
|
15 |
+
st.stop()
|
16 |
+
client = Groq(api_key=GROQ_API_KEY)
|
17 |
+
|
18 |
+
# --- Cache for embeddings ---
|
19 |
+
embedding_cache = {}
|
20 |
+
|
21 |
+
def get_embedding(text):
|
22 |
+
key = hashlib.sha256(text.encode()).hexdigest()
|
23 |
+
if key in embedding_cache:
|
24 |
+
return embedding_cache[key]
|
25 |
+
embedding = [ord(c) % 100 / 100 for c in text[:512]]
|
26 |
+
embedding_cache[key] = embedding
|
27 |
+
return embedding
|
28 |
+
|
29 |
+
def cosine_similarity(vec1, vec2):
|
30 |
+
dot = sum(a*b for a,b in zip(vec1, vec2))
|
31 |
+
norm1 = sum(a*a for a in vec1) ** 0.5
|
32 |
+
norm2 = sum(b*b for b in vec2) ** 0.5
|
33 |
+
return dot / (norm1 * norm2 + 1e-8)
|
34 |
+
|
35 |
+
def split_code_into_chunks(code, lang):
|
36 |
+
if lang.lower() == "python":
|
37 |
+
pattern = r'(def\s+\w+\(.*?\):|class\s+\w+\(?.*?\)?:)'
|
38 |
+
splits = re.split(pattern, code)
|
39 |
+
chunks = []
|
40 |
+
for i in range(1, len(splits), 2):
|
41 |
+
header = splits[i]
|
42 |
+
body = splits[i+1] if (i+1) < len(splits) else ""
|
43 |
+
chunks.append(header + body)
|
44 |
+
return chunks if chunks else [code]
|
45 |
+
else:
|
46 |
+
return [code]
|
47 |
+
|
48 |
+
def groq_call(prompt):
|
49 |
+
resp = client.chat.completions.create(
|
50 |
+
messages=[{"role": "user", "content": prompt}],
|
51 |
+
model="llama3-70b-8192",
|
52 |
+
)
|
53 |
+
return resp.choices[0].message.content
|
54 |
+
|
55 |
+
def semantic_search_improved(code, question, lang, skill, role, explain_lang):
|
56 |
+
chunks = split_code_into_chunks(code, lang)
|
57 |
+
question_emb = get_embedding(question)
|
58 |
+
scored_chunks = []
|
59 |
+
for chunk in chunks:
|
60 |
+
emb = get_embedding(chunk)
|
61 |
+
score = cosine_similarity(question_emb, emb)
|
62 |
+
scored_chunks.append((score, chunk))
|
63 |
+
scored_chunks.sort(key=lambda x: x[0], reverse=True)
|
64 |
+
top_chunks = [c for _, c in scored_chunks[:3]]
|
65 |
+
combined_code = "\n\n".join(top_chunks)
|
66 |
+
prompt = (
|
67 |
+
f"You are a friendly and insightful {lang} expert helping a {skill} {role}.\n"
|
68 |
+
f"Based on these relevant code snippets:\n{combined_code}\n"
|
69 |
+
f"Answer this question in {explain_lang}:\n{question}\n"
|
70 |
+
f"Explain which parts handle the question and how to modify them if needed."
|
71 |
+
)
|
72 |
+
return groq_call(prompt)
|
73 |
+
|
74 |
+
def error_detection_and_fixes(refactored_code, lang, skill, role, explain_lang):
|
75 |
+
prompt = (
|
76 |
+
f"You are a senior {lang} developer. Analyze this code for bugs, security flaws, "
|
77 |
+
f"and performance issues. Suggest fixes with explanations in {explain_lang}:\n\n{refactored_code}"
|
78 |
+
)
|
79 |
+
return groq_call(prompt)
|
80 |
+
|
81 |
+
def agentic_workflow(code, skill_level, programming_language, explanation_language, user_role):
|
82 |
+
timeline = []
|
83 |
+
suggestions = []
|
84 |
+
|
85 |
+
# Explanation
|
86 |
+
explain_prompt = (
|
87 |
+
f"You are a friendly and insightful {programming_language} expert helping a {skill_level} {user_role}. "
|
88 |
+
f"Explain this code in {explanation_language} with clear examples, analogies, and why each part matters:\n\n{code}"
|
89 |
+
)
|
90 |
+
explanation = groq_call(explain_prompt)
|
91 |
+
timeline.append({"step": "Explain", "description": "Detailed explanation", "output": explanation, "code": code})
|
92 |
+
suggestions.append("Consider refactoring your code to improve readability and performance.")
|
93 |
+
|
94 |
+
# Refactor
|
95 |
+
refactor_prompt = (
|
96 |
+
f"Refactor this {programming_language} code. Explain the changes like a mentor helping a {skill_level} {user_role}. "
|
97 |
+
f"Include best practices and improvements:\n\n{code}"
|
98 |
+
)
|
99 |
+
refactor_response = groq_call(refactor_prompt)
|
100 |
+
if "```" in refactor_response:
|
101 |
+
parts = refactor_response.split("```")
|
102 |
+
refactored_code = ""
|
103 |
+
for part in parts:
|
104 |
+
if part.strip().startswith(programming_language.lower()):
|
105 |
+
refactored_code = part.strip().split('\n', 1)[1] if '\n' in part else ""
|
106 |
+
break
|
107 |
+
if not refactored_code:
|
108 |
+
refactored_code = refactor_response
|
109 |
+
else:
|
110 |
+
refactored_code = refactor_response
|
111 |
+
timeline.append({"step": "Refactor", "description": "Refactored code with improvements", "output": refactored_code, "code": refactored_code})
|
112 |
+
suggestions.append("Review the refactored code and adapt it to your style or project needs.")
|
113 |
+
|
114 |
+
# Review
|
115 |
+
review_prompt = (
|
116 |
+
f"As a senior {programming_language} developer, review the refactored code. "
|
117 |
+
f"Give constructive feedback on strengths, weaknesses, performance, security, and improvements in {explanation_language}:\n\n{refactored_code}"
|
118 |
+
)
|
119 |
+
review = groq_call(review_prompt)
|
120 |
+
timeline.append({"step": "Review", "description": "Code review and suggestions", "output": review, "code": refactored_code})
|
121 |
+
suggestions.append("Incorporate review feedback for cleaner, robust code.")
|
122 |
+
|
123 |
+
# Error detection & fixes
|
124 |
+
errors = error_detection_and_fixes(refactored_code, programming_language, skill_level, user_role, explanation_language)
|
125 |
+
timeline.append({"step": "Error Detection", "description": "Bugs, security, performance suggestions", "output": errors, "code": refactored_code})
|
126 |
+
suggestions.append("Apply fixes to improve code safety and performance.")
|
127 |
+
|
128 |
+
# Test generation
|
129 |
+
test_prompt = (
|
130 |
+
f"Write clear, effective unit tests for this {programming_language} code. "
|
131 |
+
f"Explain what each test does in {explanation_language}, for a {skill_level} {user_role}:\n\n{refactored_code}"
|
132 |
+
)
|
133 |
+
tests = groq_call(test_prompt)
|
134 |
+
timeline.append({"step": "Test Generation", "description": "Generated unit tests", "output": tests, "code": tests})
|
135 |
+
suggestions.append("Run generated tests locally to validate changes.")
|
136 |
+
|
137 |
+
return timeline, suggestions
|
138 |
+
|
139 |
+
def get_inline_diff_html(original, modified):
|
140 |
+
differ = difflib.HtmlDiff(tabsize=4, wrapcolumn=80)
|
141 |
+
html = differ.make_table(
|
142 |
+
original.splitlines(), modified.splitlines(),
|
143 |
+
"Original", "Refactored", context=True, numlines=2
|
144 |
+
)
|
145 |
+
return f'<div style="overflow-x:auto; max-height:400px;">{html}</div>'
|
146 |
+
|
147 |
+
def detect_code_type(code, programming_language):
|
148 |
+
backend_keywords = [
|
149 |
+
'flask', 'django', 'express', 'fastapi', 'spring', 'controller', 'api', 'server', 'database', 'sql', 'mongoose'
|
150 |
+
]
|
151 |
+
frontend_keywords = [
|
152 |
+
'react', 'vue', 'angular', 'component', 'html', 'css', 'document.getelementbyid', 'window.', 'render', 'jsx',
|
153 |
+
'<html', '<body', '<script', '<div', 'getelementbyid', 'queryselector', 'addeventlistener', 'innerhtml'
|
154 |
+
]
|
155 |
+
data_science_keywords = [
|
156 |
+
'pandas', 'numpy', 'sklearn', 'matplotlib', 'seaborn', 'plt', 'train_test_split', 'randomforestclassifier', 'classification_report'
|
157 |
+
]
|
158 |
+
code_lower = code.lower()
|
159 |
+
if any(word in code_lower for word in data_science_keywords):
|
160 |
+
return 'data_science'
|
161 |
+
if any(word in code_lower for word in frontend_keywords):
|
162 |
+
return 'frontend'
|
163 |
+
if programming_language.lower() in ['python', 'java', 'c#']:
|
164 |
+
if any(word in code_lower for word in backend_keywords):
|
165 |
+
return 'backend'
|
166 |
+
if programming_language.lower() in ['javascript', 'typescript', 'java', 'c#']:
|
167 |
+
if any(word in code_lower for word in frontend_keywords):
|
168 |
+
return 'frontend'
|
169 |
+
if programming_language.lower() in ['python', 'java', 'c#']:
|
170 |
+
return 'backend'
|
171 |
+
if programming_language.lower() in ['javascript', 'typescript']:
|
172 |
+
return 'frontend'
|
173 |
+
return 'unknown'
|
174 |
+
|
175 |
+
def code_complexity(code):
|
176 |
+
lines = code.count('\n') + 1
|
177 |
+
functions = code.count('def ')
|
178 |
+
classes = code.count('class ')
|
179 |
+
comments = code.count('#')
|
180 |
+
return f"Lines: {lines}, Functions: {functions}, Classes: {classes}, Comments: {comments}"
|
181 |
+
|
182 |
+
def code_matches_language(code: str, language: str) -> bool:
|
183 |
+
"""Strictly check whether code matches key patterns of the selected language."""
|
184 |
+
code_lower = code.strip().lower()
|
185 |
+
language = language.lower()
|
186 |
+
|
187 |
+
patterns = {
|
188 |
+
"python": [
|
189 |
+
"def ", "class ", "import ", "from ", "try:", "except", "raise", "lambda",
|
190 |
+
"with ", "yield", "async ", "await", "print(", "self.", "__init__", "__name__",
|
191 |
+
"if __name__ == '__main__':", "#!", # shebang for executable scripts
|
192 |
+
],
|
193 |
+
"c++": [
|
194 |
+
"#include", "int main(", "std::", "::", "cout <<", "cin >>", "new ", "delete ",
|
195 |
+
"try {", "catch(", "template<", "using namespace", "class ", "struct ", "#define",
|
196 |
+
],
|
197 |
+
"java": [
|
198 |
+
"package ", "import java.", "public class", "private ", "protected ", "public static void main",
|
199 |
+
"System.out.println", "try {", "catch(", "throw new ", "implements ", "extends ",
|
200 |
+
"@Override", "interface ", "enum ", "synchronized ", "final ",
|
201 |
+
],
|
202 |
+
"c#": [
|
203 |
+
"using System", "namespace ", "class ", "interface ", "public static void Main",
|
204 |
+
"Console.WriteLine", "try {", "catch(", "throw ", "async ", "await ", "get;", "set;",
|
205 |
+
"List<", "Dictionary<", "[Serializable]", "[Obsolete]",
|
206 |
+
],
|
207 |
+
"javascript": [
|
208 |
+
"function ", "const ", "let ", "var ", "document.", "window.", "console.log",
|
209 |
+
"if(", "for(", "while(", "switch(", "try {", "catch(", "export ", "import ", "async ",
|
210 |
+
"await ", "=>", "this.", "class ", "prototype", "new ", "$(",
|
211 |
+
],
|
212 |
+
"typescript": [
|
213 |
+
"function ", "const ", "let ", "interface ", "type ", ": string", ": number", ": boolean",
|
214 |
+
"implements ", "extends ", "enum ", "public ", "private ", "protected ", "readonly ",
|
215 |
+
"import ", "export ", "console.log", "async ", "await ", "=>", "this.",
|
216 |
+
],
|
217 |
+
"html": [
|
218 |
+
"<!doctype html", "<html", "<head>", "<body>", "<script", "<style", "<meta ", "<link ",
|
219 |
+
"<title>", "<div", "<span", "<p>", "<h1>", "<ul>", "<li>", "<form", "<input", "<button",
|
220 |
+
"<table", "<footer", "<header", "<section", "<article", "<nav", "<img", "<a ", "</html>",
|
221 |
+
],
|
222 |
+
}
|
223 |
+
|
224 |
+
match_patterns = patterns.get(language, [])
|
225 |
+
match_count = sum(1 for pattern in match_patterns if pattern in code_lower)
|
226 |
+
|
227 |
+
# Require at least one pattern to match for validation to succeed
|
228 |
+
return match_count >= 1
|
229 |
+
|
230 |
+
|
231 |
+
# --- Sidebar ---
|
232 |
+
st.sidebar.title("π§ Configuration")
|
233 |
+
lang = st.sidebar.selectbox("Programming Language", ["Python", "JavaScript", "C++", "Java", "C#", "TypeScript"])
|
234 |
+
skill = st.sidebar.selectbox("Skill Level", ["Beginner", "Intermediate", "Expert"])
|
235 |
+
role = st.sidebar.selectbox("Your Role", ["Student", "Frontend Developer", "Backend Developer", "Data Scientist"])
|
236 |
+
explain_lang = st.sidebar.selectbox("Explanation Language", ["English", "Spanish", "Chinese", "Urdu"])
|
237 |
+
st.sidebar.markdown("---")
|
238 |
+
st.sidebar.markdown("<span style='color:#fff;'>Powered by <b>BLACKBOX.AI</b></span>", unsafe_allow_html=True)
|
239 |
+
|
240 |
+
tabs = st.tabs(["π§ Full AI Workflow", "π Semantic Search"])
|
241 |
+
# --- Tab 1: Full AI Workflow ---
|
242 |
+
with tabs[0]:
|
243 |
+
st.title("π§ Full AI Workflow")
|
244 |
+
file_types = {
|
245 |
+
"Python": ["py"],
|
246 |
+
"JavaScript": ["js"],
|
247 |
+
"C++": ["cpp", "h", "hpp"],
|
248 |
+
"Java": ["java"],
|
249 |
+
"C#": ["cs"],
|
250 |
+
"TypeScript": ["ts"],
|
251 |
+
}
|
252 |
+
|
253 |
+
uploaded_file = st.file_uploader(
|
254 |
+
f"Upload {', '.join(file_types.get(lang, []))} file(s)",
|
255 |
+
type=file_types.get(lang, None)
|
256 |
+
)
|
257 |
+
if uploaded_file:
|
258 |
+
code_input = uploaded_file.read().decode("utf-8")
|
259 |
+
else:
|
260 |
+
code_input = st.text_area("Your Code", height=300, placeholder="Paste your code here...")
|
261 |
+
|
262 |
+
if code_input:
|
263 |
+
st.markdown(f"<b>Complexity:</b> {code_complexity(code_input)}", unsafe_allow_html=True)
|
264 |
+
|
265 |
+
if st.button("Run AI Workflow"):
|
266 |
+
if not code_input.strip():
|
267 |
+
st.warning("Please paste or upload your code.")
|
268 |
+
elif not code_matches_language(code_input, lang):
|
269 |
+
st.error(f"The pasted code doesnβt look like valid {lang} code. Please check your code or select the correct language.")
|
270 |
+
else:
|
271 |
+
code_type = detect_code_type(code_input, lang)
|
272 |
+
if code_type == "data_science" and role != "Data Scientist":
|
273 |
+
st.error("Data science code detected. Please select 'Data Scientist' role.")
|
274 |
+
elif code_type == "frontend" and role != "Frontend Developer":
|
275 |
+
st.error("Frontend code detected. Please select 'Frontend Developer' role.")
|
276 |
+
elif code_type == "backend" and role != "Backend Developer":
|
277 |
+
st.error("Backend code detected. Please select 'Backend Developer' role.")
|
278 |
+
else:
|
279 |
+
with st.spinner("Running agentic workflow..."):
|
280 |
+
timeline, suggestions = agentic_workflow(code_input, skill, lang, explain_lang, role)
|
281 |
+
|
282 |
+
# Show each step in an expander
|
283 |
+
for step in timeline:
|
284 |
+
with st.expander(f"β
{step['step']} - {step['description']}"):
|
285 |
+
if step['step'] == "Refactor":
|
286 |
+
diff_html = get_inline_diff_html(code_input, step['code'])
|
287 |
+
st.markdown(diff_html, unsafe_allow_html=True)
|
288 |
+
st.code(step['output'], language=lang.lower())
|
289 |
+
else:
|
290 |
+
st.markdown(step['output'])
|
291 |
+
|
292 |
+
st.markdown("#### Agent Suggestions")
|
293 |
+
for s in suggestions:
|
294 |
+
st.markdown(f"- {s}")
|
295 |
+
|
296 |
+
# Download buttons after suggestions
|
297 |
+
st.markdown("---")
|
298 |
+
st.markdown("### π₯ Download Results")
|
299 |
+
|
300 |
+
report_text = ""
|
301 |
+
for step in timeline:
|
302 |
+
report_text += f"## {step['step']}\n{step['description']}\n\n{step['output']}\n\n"
|
303 |
+
|
304 |
+
st.download_button(
|
305 |
+
label="π Download Full Workflow Report",
|
306 |
+
data=report_text,
|
307 |
+
file_name="ai_workflow_report.txt",
|
308 |
+
mime="text/plain",
|
309 |
+
)
|
310 |
+
|
311 |
+
|
312 |
+
|
313 |
+
# --- Tab 2: Semantic Search ---
|
314 |
+
with tabs[1]:
|
315 |
+
st.title("π Semantic Search")
|
316 |
+
sem_code = st.text_area("Your Code", height=300, placeholder="Paste your code...")
|
317 |
+
sem_q = st.text_input("Your Question", placeholder="E.g., What does this function do?")
|
318 |
+
if st.button("Run Semantic Search"):
|
319 |
+
if not sem_code.strip() or not sem_q.strip():
|
320 |
+
st.warning("Code and question required.")
|
321 |
+
else:
|
322 |
+
with st.spinner("Running semantic search..."):
|
323 |
+
answer = semantic_search_improved(sem_code, sem_q, lang, skill, role, explain_lang)
|
324 |
+
st.markdown("### π Answer")
|
325 |
+
st.markdown(answer)
|
326 |
+
|
327 |
+
st.markdown("---")
|