Naz786 commited on
Commit
8b44e94
Β·
verified Β·
1 Parent(s): e8241b0

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +327 -0
app.py ADDED
@@ -0,0 +1,327 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import difflib
3
+ import os
4
+ import re
5
+ import hashlib
6
+ from groq import Groq
7
+
8
+ # --- Page config ---
9
+ st.set_page_config(page_title="πŸš€ AI Assistant with Workflow + Semantic Search", layout="wide")
10
+
11
+ # --- Groq API Setup ---
12
+ GROQ_API_KEY = os.environ.get("GROQ_API_KEY")
13
+ if not GROQ_API_KEY:
14
+ st.error("❌ Please set your GROQ_API_KEY environment variable.")
15
+ st.stop()
16
+ client = Groq(api_key=GROQ_API_KEY)
17
+
18
+ # --- Cache for embeddings ---
19
+ embedding_cache = {}
20
+
21
+ def get_embedding(text):
22
+ key = hashlib.sha256(text.encode()).hexdigest()
23
+ if key in embedding_cache:
24
+ return embedding_cache[key]
25
+ embedding = [ord(c) % 100 / 100 for c in text[:512]]
26
+ embedding_cache[key] = embedding
27
+ return embedding
28
+
29
+ def cosine_similarity(vec1, vec2):
30
+ dot = sum(a*b for a,b in zip(vec1, vec2))
31
+ norm1 = sum(a*a for a in vec1) ** 0.5
32
+ norm2 = sum(b*b for b in vec2) ** 0.5
33
+ return dot / (norm1 * norm2 + 1e-8)
34
+
35
+ def split_code_into_chunks(code, lang):
36
+ if lang.lower() == "python":
37
+ pattern = r'(def\s+\w+\(.*?\):|class\s+\w+\(?.*?\)?:)'
38
+ splits = re.split(pattern, code)
39
+ chunks = []
40
+ for i in range(1, len(splits), 2):
41
+ header = splits[i]
42
+ body = splits[i+1] if (i+1) < len(splits) else ""
43
+ chunks.append(header + body)
44
+ return chunks if chunks else [code]
45
+ else:
46
+ return [code]
47
+
48
+ def groq_call(prompt):
49
+ resp = client.chat.completions.create(
50
+ messages=[{"role": "user", "content": prompt}],
51
+ model="llama3-70b-8192",
52
+ )
53
+ return resp.choices[0].message.content
54
+
55
+ def semantic_search_improved(code, question, lang, skill, role, explain_lang):
56
+ chunks = split_code_into_chunks(code, lang)
57
+ question_emb = get_embedding(question)
58
+ scored_chunks = []
59
+ for chunk in chunks:
60
+ emb = get_embedding(chunk)
61
+ score = cosine_similarity(question_emb, emb)
62
+ scored_chunks.append((score, chunk))
63
+ scored_chunks.sort(key=lambda x: x[0], reverse=True)
64
+ top_chunks = [c for _, c in scored_chunks[:3]]
65
+ combined_code = "\n\n".join(top_chunks)
66
+ prompt = (
67
+ f"You are a friendly and insightful {lang} expert helping a {skill} {role}.\n"
68
+ f"Based on these relevant code snippets:\n{combined_code}\n"
69
+ f"Answer this question in {explain_lang}:\n{question}\n"
70
+ f"Explain which parts handle the question and how to modify them if needed."
71
+ )
72
+ return groq_call(prompt)
73
+
74
+ def error_detection_and_fixes(refactored_code, lang, skill, role, explain_lang):
75
+ prompt = (
76
+ f"You are a senior {lang} developer. Analyze this code for bugs, security flaws, "
77
+ f"and performance issues. Suggest fixes with explanations in {explain_lang}:\n\n{refactored_code}"
78
+ )
79
+ return groq_call(prompt)
80
+
81
+ def agentic_workflow(code, skill_level, programming_language, explanation_language, user_role):
82
+ timeline = []
83
+ suggestions = []
84
+
85
+ # Explanation
86
+ explain_prompt = (
87
+ f"You are a friendly and insightful {programming_language} expert helping a {skill_level} {user_role}. "
88
+ f"Explain this code in {explanation_language} with clear examples, analogies, and why each part matters:\n\n{code}"
89
+ )
90
+ explanation = groq_call(explain_prompt)
91
+ timeline.append({"step": "Explain", "description": "Detailed explanation", "output": explanation, "code": code})
92
+ suggestions.append("Consider refactoring your code to improve readability and performance.")
93
+
94
+ # Refactor
95
+ refactor_prompt = (
96
+ f"Refactor this {programming_language} code. Explain the changes like a mentor helping a {skill_level} {user_role}. "
97
+ f"Include best practices and improvements:\n\n{code}"
98
+ )
99
+ refactor_response = groq_call(refactor_prompt)
100
+ if "```" in refactor_response:
101
+ parts = refactor_response.split("```")
102
+ refactored_code = ""
103
+ for part in parts:
104
+ if part.strip().startswith(programming_language.lower()):
105
+ refactored_code = part.strip().split('\n', 1)[1] if '\n' in part else ""
106
+ break
107
+ if not refactored_code:
108
+ refactored_code = refactor_response
109
+ else:
110
+ refactored_code = refactor_response
111
+ timeline.append({"step": "Refactor", "description": "Refactored code with improvements", "output": refactored_code, "code": refactored_code})
112
+ suggestions.append("Review the refactored code and adapt it to your style or project needs.")
113
+
114
+ # Review
115
+ review_prompt = (
116
+ f"As a senior {programming_language} developer, review the refactored code. "
117
+ f"Give constructive feedback on strengths, weaknesses, performance, security, and improvements in {explanation_language}:\n\n{refactored_code}"
118
+ )
119
+ review = groq_call(review_prompt)
120
+ timeline.append({"step": "Review", "description": "Code review and suggestions", "output": review, "code": refactored_code})
121
+ suggestions.append("Incorporate review feedback for cleaner, robust code.")
122
+
123
+ # Error detection & fixes
124
+ errors = error_detection_and_fixes(refactored_code, programming_language, skill_level, user_role, explanation_language)
125
+ timeline.append({"step": "Error Detection", "description": "Bugs, security, performance suggestions", "output": errors, "code": refactored_code})
126
+ suggestions.append("Apply fixes to improve code safety and performance.")
127
+
128
+ # Test generation
129
+ test_prompt = (
130
+ f"Write clear, effective unit tests for this {programming_language} code. "
131
+ f"Explain what each test does in {explanation_language}, for a {skill_level} {user_role}:\n\n{refactored_code}"
132
+ )
133
+ tests = groq_call(test_prompt)
134
+ timeline.append({"step": "Test Generation", "description": "Generated unit tests", "output": tests, "code": tests})
135
+ suggestions.append("Run generated tests locally to validate changes.")
136
+
137
+ return timeline, suggestions
138
+
139
+ def get_inline_diff_html(original, modified):
140
+ differ = difflib.HtmlDiff(tabsize=4, wrapcolumn=80)
141
+ html = differ.make_table(
142
+ original.splitlines(), modified.splitlines(),
143
+ "Original", "Refactored", context=True, numlines=2
144
+ )
145
+ return f'<div style="overflow-x:auto; max-height:400px;">{html}</div>'
146
+
147
+ def detect_code_type(code, programming_language):
148
+ backend_keywords = [
149
+ 'flask', 'django', 'express', 'fastapi', 'spring', 'controller', 'api', 'server', 'database', 'sql', 'mongoose'
150
+ ]
151
+ frontend_keywords = [
152
+ 'react', 'vue', 'angular', 'component', 'html', 'css', 'document.getelementbyid', 'window.', 'render', 'jsx',
153
+ '<html', '<body', '<script', '<div', 'getelementbyid', 'queryselector', 'addeventlistener', 'innerhtml'
154
+ ]
155
+ data_science_keywords = [
156
+ 'pandas', 'numpy', 'sklearn', 'matplotlib', 'seaborn', 'plt', 'train_test_split', 'randomforestclassifier', 'classification_report'
157
+ ]
158
+ code_lower = code.lower()
159
+ if any(word in code_lower for word in data_science_keywords):
160
+ return 'data_science'
161
+ if any(word in code_lower for word in frontend_keywords):
162
+ return 'frontend'
163
+ if programming_language.lower() in ['python', 'java', 'c#']:
164
+ if any(word in code_lower for word in backend_keywords):
165
+ return 'backend'
166
+ if programming_language.lower() in ['javascript', 'typescript', 'java', 'c#']:
167
+ if any(word in code_lower for word in frontend_keywords):
168
+ return 'frontend'
169
+ if programming_language.lower() in ['python', 'java', 'c#']:
170
+ return 'backend'
171
+ if programming_language.lower() in ['javascript', 'typescript']:
172
+ return 'frontend'
173
+ return 'unknown'
174
+
175
+ def code_complexity(code):
176
+ lines = code.count('\n') + 1
177
+ functions = code.count('def ')
178
+ classes = code.count('class ')
179
+ comments = code.count('#')
180
+ return f"Lines: {lines}, Functions: {functions}, Classes: {classes}, Comments: {comments}"
181
+
182
+ def code_matches_language(code: str, language: str) -> bool:
183
+ """Strictly check whether code matches key patterns of the selected language."""
184
+ code_lower = code.strip().lower()
185
+ language = language.lower()
186
+
187
+ patterns = {
188
+ "python": [
189
+ "def ", "class ", "import ", "from ", "try:", "except", "raise", "lambda",
190
+ "with ", "yield", "async ", "await", "print(", "self.", "__init__", "__name__",
191
+ "if __name__ == '__main__':", "#!", # shebang for executable scripts
192
+ ],
193
+ "c++": [
194
+ "#include", "int main(", "std::", "::", "cout <<", "cin >>", "new ", "delete ",
195
+ "try {", "catch(", "template<", "using namespace", "class ", "struct ", "#define",
196
+ ],
197
+ "java": [
198
+ "package ", "import java.", "public class", "private ", "protected ", "public static void main",
199
+ "System.out.println", "try {", "catch(", "throw new ", "implements ", "extends ",
200
+ "@Override", "interface ", "enum ", "synchronized ", "final ",
201
+ ],
202
+ "c#": [
203
+ "using System", "namespace ", "class ", "interface ", "public static void Main",
204
+ "Console.WriteLine", "try {", "catch(", "throw ", "async ", "await ", "get;", "set;",
205
+ "List<", "Dictionary<", "[Serializable]", "[Obsolete]",
206
+ ],
207
+ "javascript": [
208
+ "function ", "const ", "let ", "var ", "document.", "window.", "console.log",
209
+ "if(", "for(", "while(", "switch(", "try {", "catch(", "export ", "import ", "async ",
210
+ "await ", "=>", "this.", "class ", "prototype", "new ", "$(",
211
+ ],
212
+ "typescript": [
213
+ "function ", "const ", "let ", "interface ", "type ", ": string", ": number", ": boolean",
214
+ "implements ", "extends ", "enum ", "public ", "private ", "protected ", "readonly ",
215
+ "import ", "export ", "console.log", "async ", "await ", "=>", "this.",
216
+ ],
217
+ "html": [
218
+ "<!doctype html", "<html", "<head>", "<body>", "<script", "<style", "<meta ", "<link ",
219
+ "<title>", "<div", "<span", "<p>", "<h1>", "<ul>", "<li>", "<form", "<input", "<button",
220
+ "<table", "<footer", "<header", "<section", "<article", "<nav", "<img", "<a ", "</html>",
221
+ ],
222
+ }
223
+
224
+ match_patterns = patterns.get(language, [])
225
+ match_count = sum(1 for pattern in match_patterns if pattern in code_lower)
226
+
227
+ # Require at least one pattern to match for validation to succeed
228
+ return match_count >= 1
229
+
230
+
231
+ # --- Sidebar ---
232
+ st.sidebar.title("πŸ”§ Configuration")
233
+ lang = st.sidebar.selectbox("Programming Language", ["Python", "JavaScript", "C++", "Java", "C#", "TypeScript"])
234
+ skill = st.sidebar.selectbox("Skill Level", ["Beginner", "Intermediate", "Expert"])
235
+ role = st.sidebar.selectbox("Your Role", ["Student", "Frontend Developer", "Backend Developer", "Data Scientist"])
236
+ explain_lang = st.sidebar.selectbox("Explanation Language", ["English", "Spanish", "Chinese", "Urdu"])
237
+ st.sidebar.markdown("---")
238
+ st.sidebar.markdown("<span style='color:#fff;'>Powered by <b>BLACKBOX.AI</b></span>", unsafe_allow_html=True)
239
+
240
+ tabs = st.tabs(["🧠 Full AI Workflow", "πŸ” Semantic Search"])
241
+ # --- Tab 1: Full AI Workflow ---
242
+ with tabs[0]:
243
+ st.title("🧠 Full AI Workflow")
244
+ file_types = {
245
+ "Python": ["py"],
246
+ "JavaScript": ["js"],
247
+ "C++": ["cpp", "h", "hpp"],
248
+ "Java": ["java"],
249
+ "C#": ["cs"],
250
+ "TypeScript": ["ts"],
251
+ }
252
+
253
+ uploaded_file = st.file_uploader(
254
+ f"Upload {', '.join(file_types.get(lang, []))} file(s)",
255
+ type=file_types.get(lang, None)
256
+ )
257
+ if uploaded_file:
258
+ code_input = uploaded_file.read().decode("utf-8")
259
+ else:
260
+ code_input = st.text_area("Your Code", height=300, placeholder="Paste your code here...")
261
+
262
+ if code_input:
263
+ st.markdown(f"<b>Complexity:</b> {code_complexity(code_input)}", unsafe_allow_html=True)
264
+
265
+ if st.button("Run AI Workflow"):
266
+ if not code_input.strip():
267
+ st.warning("Please paste or upload your code.")
268
+ elif not code_matches_language(code_input, lang):
269
+ st.error(f"The pasted code doesn’t look like valid {lang} code. Please check your code or select the correct language.")
270
+ else:
271
+ code_type = detect_code_type(code_input, lang)
272
+ if code_type == "data_science" and role != "Data Scientist":
273
+ st.error("Data science code detected. Please select 'Data Scientist' role.")
274
+ elif code_type == "frontend" and role != "Frontend Developer":
275
+ st.error("Frontend code detected. Please select 'Frontend Developer' role.")
276
+ elif code_type == "backend" and role != "Backend Developer":
277
+ st.error("Backend code detected. Please select 'Backend Developer' role.")
278
+ else:
279
+ with st.spinner("Running agentic workflow..."):
280
+ timeline, suggestions = agentic_workflow(code_input, skill, lang, explain_lang, role)
281
+
282
+ # Show each step in an expander
283
+ for step in timeline:
284
+ with st.expander(f"βœ… {step['step']} - {step['description']}"):
285
+ if step['step'] == "Refactor":
286
+ diff_html = get_inline_diff_html(code_input, step['code'])
287
+ st.markdown(diff_html, unsafe_allow_html=True)
288
+ st.code(step['output'], language=lang.lower())
289
+ else:
290
+ st.markdown(step['output'])
291
+
292
+ st.markdown("#### Agent Suggestions")
293
+ for s in suggestions:
294
+ st.markdown(f"- {s}")
295
+
296
+ # Download buttons after suggestions
297
+ st.markdown("---")
298
+ st.markdown("### πŸ“₯ Download Results")
299
+
300
+ report_text = ""
301
+ for step in timeline:
302
+ report_text += f"## {step['step']}\n{step['description']}\n\n{step['output']}\n\n"
303
+
304
+ st.download_button(
305
+ label="πŸ“„ Download Full Workflow Report",
306
+ data=report_text,
307
+ file_name="ai_workflow_report.txt",
308
+ mime="text/plain",
309
+ )
310
+
311
+
312
+
313
+ # --- Tab 2: Semantic Search ---
314
+ with tabs[1]:
315
+ st.title("πŸ” Semantic Search")
316
+ sem_code = st.text_area("Your Code", height=300, placeholder="Paste your code...")
317
+ sem_q = st.text_input("Your Question", placeholder="E.g., What does this function do?")
318
+ if st.button("Run Semantic Search"):
319
+ if not sem_code.strip() or not sem_q.strip():
320
+ st.warning("Code and question required.")
321
+ else:
322
+ with st.spinner("Running semantic search..."):
323
+ answer = semantic_search_improved(sem_code, sem_q, lang, skill, role, explain_lang)
324
+ st.markdown("### πŸ“Œ Answer")
325
+ st.markdown(answer)
326
+
327
+ st.markdown("---")