Spaces:
Sleeping
Sleeping
updated completely with open source code
Browse files
app.py
CHANGED
@@ -1,485 +1,206 @@
|
|
1 |
-
# ==============================================================================
|
2 |
-
# PitchPerfect AI: Enterprise-Grade Sales Coach (Single File Application)
|
3 |
-
#
|
4 |
-
# This single file contains the complete application code, enhanced with
|
5 |
-
# YouTube support, JAX-based quantitative analysis, and a more robust
|
6 |
-
# agentic architecture.
|
7 |
-
# ==============================================================================
|
8 |
-
|
9 |
-
# ==============================================================================
|
10 |
-
# File: README.md (Instructions)
|
11 |
-
# ==============================================================================
|
12 |
"""
|
13 |
-
|
14 |
-
|
15 |
-
This
|
16 |
-
|
17 |
-
This advanced version includes:
|
18 |
-
- Support for local video uploads and YouTube URLs.
|
19 |
-
- Quantitative vocal analysis powered by JAX for high performance.
|
20 |
-
- An agentic architecture where specialized tools (YouTube Downloader, JAX Analyzer) work in concert with the Gemini 1.5 Pro model.
|
21 |
-
|
22 |
-
## π Prerequisites
|
23 |
-
|
24 |
-
1. A Google Cloud Platform (GCP) project with billing enabled.
|
25 |
-
2. The Vertex AI API and Cloud Storage API enabled in your GCP project.
|
26 |
-
3. The `gcloud` CLI installed and authenticated on your local machine.
|
27 |
|
28 |
-
|
|
|
|
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
* **Example name:** `your-project-id-pitch-videos`
|
33 |
-
|
34 |
-
2. **Authenticate with Google Cloud:**
|
35 |
-
Run the following command in your terminal and follow the prompts. This sets up Application Default Credentials (ADC).
|
36 |
-
```bash
|
37 |
-
gcloud auth application-default login
|
38 |
-
```
|
39 |
-
*Note: The user/principal needs `Storage Object Admin` and `Vertex AI User` roles.*
|
40 |
-
|
41 |
-
3. **Install Dependencies:**
|
42 |
-
Create a `requirements.txt` file with the content below and run `pip install -r requirements.txt`.
|
43 |
-
```
|
44 |
-
gradio
|
45 |
-
google-cloud-aiplatform
|
46 |
-
google-cloud-storage
|
47 |
-
moviepy
|
48 |
-
# For JAX and Quantitative Analysis
|
49 |
-
jax
|
50 |
-
jaxlib
|
51 |
-
librosa
|
52 |
-
speechrecognition
|
53 |
-
openai-whisper
|
54 |
-
# For YouTube support
|
55 |
-
yt-dlp
|
56 |
-
```
|
57 |
-
|
58 |
-
4. **Configure Project Details:**
|
59 |
-
* In this file, scroll down to the "CONFIGURATION" section.
|
60 |
-
* Set your `GCP_PROJECT_ID`, `GCP_LOCATION`, and `GCS_BUCKET_NAME`.
|
61 |
-
|
62 |
-
5. **Run the Application:**
|
63 |
-
```bash
|
64 |
-
python app.py
|
65 |
-
```
|
66 |
-
This will launch a Gradio web server. **Look for a public URL ending in `.gradio.live` in the output and open it in your browser.**
|
67 |
"""
|
68 |
|
69 |
-
# ==============================================================================
|
70 |
-
# IMPORTS
|
71 |
-
# ==============================================================================
|
72 |
-
import logging
|
73 |
-
import json
|
74 |
-
import uuid
|
75 |
import os
|
76 |
import re
|
77 |
-
import sys
|
78 |
-
import subprocess
|
79 |
-
from typing import Dict, Any, List
|
80 |
-
|
81 |
-
# Setup Logging
|
82 |
-
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
def check_gcloud_auth():
|
88 |
-
"""Checks for GCP authentication and provides instructions if missing."""
|
89 |
-
try:
|
90 |
-
# The 'gcloud auth print-access-token' command is a reliable way to check for active credentials.
|
91 |
-
# We capture stderr to suppress the verbose output on success.
|
92 |
-
subprocess.check_output("gcloud auth application-default print-access-token", shell=True, stderr=subprocess.DEVNULL)
|
93 |
-
logging.info("GCP authentication credentials found.")
|
94 |
-
return True
|
95 |
-
except subprocess.CalledProcessError:
|
96 |
-
error_message = """
|
97 |
-
================================================================================
|
98 |
-
CRITICAL ERROR: Google Cloud Authentication Not Found!
|
99 |
-
================================================================================
|
100 |
-
This application requires authentication to access Google Cloud services (Vertex AI, Cloud Storage).
|
101 |
-
|
102 |
-
To fix this, please run the following command in your terminal and follow the prompts to log in with your Google account:
|
103 |
-
|
104 |
-
gcloud auth application-default login
|
105 |
-
|
106 |
-
After authenticating, please restart this Python script.
|
107 |
-
================================================================================
|
108 |
-
"""
|
109 |
-
print(error_message, file=sys.stderr)
|
110 |
-
return False
|
111 |
-
|
112 |
-
# Run the auth check before trying to import heavy libraries
|
113 |
-
if not check_gcloud_auth():
|
114 |
-
sys.exit(1)
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
from typing import Dict, Any
|
119 |
import gradio as gr
|
120 |
-
import
|
121 |
-
|
122 |
-
from
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
#
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
|
139 |
-
|
140 |
-
#
|
141 |
-
#
|
142 |
-
#
|
143 |
-
|
144 |
-
|
145 |
-
|
146 |
-
# ---
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
"
|
162 |
-
"
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
}
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
"
|
183 |
-
|
184 |
-
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
PROMPT_HOLISTIC_VIDEO_ANALYSIS = """
|
189 |
-
You are an expert sales coach. Analyze the provided video and the supplementary quantitative metrics to generate a structured, holistic feedback report. Your output MUST strictly conform to the provided JSON schema, including the 1-10 score range.
|
190 |
-
|
191 |
-
**Quantitative Metrics (for additional context):**
|
192 |
-
{quantitative_metrics_json}
|
193 |
-
|
194 |
-
**Evaluation Framework (Analyze the video directly):**
|
195 |
-
1. **Content & Structure:** Analyze clarity, flow, value proposition, and the call to action.
|
196 |
-
2. **Vocal Delivery:** Analyze pacing, vocal variety, confidence, energy, and enunciation. Use the quantitative metrics to inform your qualitative assessment.
|
197 |
-
3. **Visual Delivery:** Analyze eye contact, body language, and facial expressions.
|
198 |
-
|
199 |
-
Provide specific examples from the video to support your points.
|
200 |
-
"""
|
201 |
-
|
202 |
-
PROMPT_FINAL_SYNTHESIS = """
|
203 |
-
You are a senior executive coach. Synthesize the provided detailed analysis data into a high-level summary. Your output MUST strictly conform to the provided JSON schema.
|
204 |
|
205 |
-
|
206 |
-
|
207 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
208 |
|
209 |
-
|
210 |
-
---
|
211 |
-
{full_analysis_json}
|
212 |
-
---
|
213 |
-
"""
|
214 |
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
if not os.path.exists(output_dir):
|
222 |
-
os.makedirs(output_dir)
|
223 |
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
}
|
230 |
-
with yt_dlp.YoutubeDL(ydl_opts) as ydl:
|
231 |
-
ydl.download([url])
|
232 |
-
return filepath
|
233 |
-
|
234 |
-
class QuantitativeAudioTool:
|
235 |
-
"""A tool for performing objective, numerical analysis on an audio track."""
|
236 |
-
class JAXAudioProcessor:
|
237 |
-
"""A nested class demonstrating JAX for high-performance audio processing."""
|
238 |
-
def __init__(self):
|
239 |
-
self.jit_rms_energy = jax.jit(self._calculate_rms_energy)
|
240 |
-
@staticmethod
|
241 |
-
@jax.jit
|
242 |
-
def _calculate_rms_energy(waveform: jnp.ndarray) -> jnp.ndarray:
|
243 |
-
return jnp.sqrt(jnp.mean(jnp.square(waveform)))
|
244 |
-
def analyze_energy_variation(self, waveform_np):
|
245 |
-
if waveform_np is None or waveform_np.size == 0: return 0.0
|
246 |
-
waveform_jnp = jnp.asarray(waveform_np)
|
247 |
-
frame_length, hop_length = 2048, 512
|
248 |
-
num_frames = (waveform_jnp.shape[0] - frame_length) // hop_length
|
249 |
-
start_positions = jnp.arange(num_frames) * hop_length
|
250 |
-
offsets = jnp.arange(frame_length)
|
251 |
-
frame_indices = start_positions[:, None] + offsets[None, :]
|
252 |
-
frames = waveform_jnp[frame_indices]
|
253 |
-
frame_energies = jax.vmap(self.jit_rms_energy)(frames)
|
254 |
-
return float(jnp.std(frame_energies))
|
255 |
-
|
256 |
-
def __init__(self):
|
257 |
-
self.jax_processor = self.JAXAudioProcessor()
|
258 |
-
self.whisper_model = whisper.load_model("base.en")
|
259 |
-
|
260 |
-
def run(self, video_path: str, output_dir: str = "temp_output"):
|
261 |
-
if not os.path.exists(output_dir): os.makedirs(output_dir)
|
262 |
-
video = None
|
263 |
-
try:
|
264 |
-
video = VideoFileClip(video_path)
|
265 |
-
|
266 |
-
if video.audio is None:
|
267 |
-
raise ValueError("The provided video file does not contain an audio track, or it could not be decoded. Analysis cannot proceed.")
|
268 |
-
|
269 |
-
audio_path = os.path.join(output_dir, f"audio_{uuid.uuid4()}.wav")
|
270 |
-
video.audio.write_audiofile(audio_path, codec='pcm_s16le', fps=16000)
|
271 |
-
|
272 |
-
transcript_result = self.whisper_model.transcribe(audio_path, fp16=False)
|
273 |
-
word_count = len(transcript_result['text'].split())
|
274 |
-
duration = video.duration
|
275 |
-
pace = (word_count / duration) * 60 if duration > 0 else 0
|
276 |
-
|
277 |
-
y, sr = librosa.load(audio_path, sr=16000)
|
278 |
-
energy_variation = self.jax_processor.analyze_energy_variation(y)
|
279 |
-
|
280 |
-
os.remove(audio_path)
|
281 |
-
|
282 |
-
return {
|
283 |
-
"speaking_pace_wpm": round(pace, 2),
|
284 |
-
"vocal_energy_variation": round(energy_variation, 4),
|
285 |
-
}
|
286 |
-
finally:
|
287 |
-
if video:
|
288 |
-
video.close()
|
289 |
-
|
290 |
-
# ==============================================================================
|
291 |
-
# VERTEX AI MANAGER CLASS
|
292 |
-
# ==============================================================================
|
293 |
-
class VertexAIManager:
|
294 |
-
def __init__(self):
|
295 |
-
vertexai.init(project=GCP_PROJECT_ID, location=GCP_LOCATION)
|
296 |
-
self.model = GenerativeModel(MODEL_GEMINI_PRO)
|
297 |
-
|
298 |
-
def run_multimodal_analysis(self, video_gcs_uri: str, prompt: str) -> dict:
|
299 |
-
video_part = Part.from_uri(uri=video_gcs_uri, mime_type="video/mp4")
|
300 |
-
contents = [video_part, prompt]
|
301 |
-
config = GenerationConfig(response_schema=HOLISTIC_ANALYSIS_SCHEMA, temperature=0.2, response_mime_type="application/json")
|
302 |
-
response = self.model.generate_content(contents, generation_config=config)
|
303 |
-
return json.loads(response.text)
|
304 |
-
|
305 |
-
def run_synthesis(self, prompt: str) -> dict:
|
306 |
-
config = GenerationConfig(response_schema=FINAL_SYNTHESIS_SCHEMA, temperature=0.3, response_mime_type="application/json")
|
307 |
-
response = self.model.generate_content(prompt, generation_config=config)
|
308 |
-
return json.loads(response.text)
|
309 |
-
|
310 |
-
# ==============================================================================
|
311 |
-
# AGENT CLASS
|
312 |
-
# ==============================================================================
|
313 |
-
class PitchAnalyzerAgent:
|
314 |
-
def __init__(self):
|
315 |
-
self.vertex_manager = VertexAIManager()
|
316 |
-
self.storage_client = storage.Client(project=GCP_PROJECT_ID)
|
317 |
-
self.youtube_tool = YouTubeDownloaderTool()
|
318 |
-
self.quant_tool = QuantitativeAudioTool()
|
319 |
-
self._check_bucket()
|
320 |
-
|
321 |
-
def _check_bucket(self):
|
322 |
-
self.storage_client.get_bucket(GCS_BUCKET_NAME)
|
323 |
-
|
324 |
-
def _upload_to_gcs(self, path: str) -> str:
|
325 |
-
bucket = self.storage_client.bucket(GCS_BUCKET_NAME)
|
326 |
-
blob_name = f"pitch-videos/{uuid.uuid4()}.mp4"
|
327 |
-
blob = bucket.blob(blob_name)
|
328 |
-
blob.upload_from_filename(path)
|
329 |
-
return f"gs://{GCS_BUCKET_NAME}/{blob_name}"
|
330 |
-
|
331 |
-
def _delete_from_gcs(self, gcs_uri: str):
|
332 |
-
bucket_name, blob_name = gcs_uri.replace("gs://", "").split("/", 1)
|
333 |
-
self.storage_client.bucket(bucket_name).blob(blob_name).delete()
|
334 |
-
|
335 |
-
def run_analysis_pipeline(self, video_path_or_url: str, progress_callback):
|
336 |
-
local_video_path = None
|
337 |
-
video_gcs_uri = None
|
338 |
-
try:
|
339 |
-
if re.match(r"^(https?://)?(www\.)?(youtube\.com|youtu\.?be)/.+$", video_path_or_url):
|
340 |
-
progress_callback(0.1, "Downloading video from YouTube...")
|
341 |
-
local_video_path = self.youtube_tool.run(video_path_or_url)
|
342 |
-
else:
|
343 |
-
local_video_path = video_path_or_url
|
344 |
-
|
345 |
-
progress_callback(0.3, "Performing JAX-based quantitative analysis...")
|
346 |
-
quant_metrics = self.quant_tool.run(local_video_path)
|
347 |
-
|
348 |
-
progress_callback(0.5, "Uploading video to secure Cloud Storage...")
|
349 |
-
video_gcs_uri = self._upload_to_gcs(local_video_path)
|
350 |
-
|
351 |
-
progress_callback(0.7, "Gemini 1.5 Pro is analyzing the video...")
|
352 |
-
analysis_prompt = PROMPT_HOLISTIC_VIDEO_ANALYSIS.format(quantitative_metrics_json=json.dumps(quant_metrics, indent=2))
|
353 |
-
multimodal_analysis = self.vertex_manager.run_multimodal_analysis(video_gcs_uri, analysis_prompt)
|
354 |
-
|
355 |
-
progress_callback(0.9, "Synthesizing final report...")
|
356 |
-
synthesis_prompt = PROMPT_FINAL_SYNTHESIS.format(full_analysis_json=json.dumps(multimodal_analysis, indent=2))
|
357 |
-
final_summary = self.vertex_manager.run_synthesis(synthesis_prompt)
|
358 |
-
|
359 |
-
return {"quantitative_metrics": quant_metrics, "multimodal_analysis": multimodal_analysis, "executive_summary": final_summary}
|
360 |
-
except Exception as e:
|
361 |
-
logging.error(f"Analysis pipeline failed: {e}", exc_info=True)
|
362 |
-
return {"error": str(e)}
|
363 |
-
finally:
|
364 |
-
if video_gcs_uri:
|
365 |
-
try: self._delete_from_gcs(video_gcs_uri)
|
366 |
-
except Exception as e: logging.warning(f"Failed to delete GCS object {video_gcs_uri}: {e}")
|
367 |
-
if local_video_path and video_path_or_url != local_video_path:
|
368 |
-
if os.path.exists(local_video_path): os.remove(local_video_path)
|
369 |
-
|
370 |
-
# ==============================================================================
|
371 |
-
# UI FORMATTING HELPER
|
372 |
-
# ==============================================================================
|
373 |
-
def format_feedback_markdown(analysis: dict) -> str:
|
374 |
-
if not analysis or "error" in analysis:
|
375 |
-
return f"## Analysis Failed π\n\n**Reason:** {analysis.get('error', 'Unknown error.')}"
|
376 |
|
377 |
-
|
378 |
-
metrics = analysis.get('quantitative_metrics', {})
|
379 |
-
ai_analysis = analysis.get('multimodal_analysis', {})
|
380 |
|
381 |
-
|
382 |
-
|
383 |
-
if
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
def get_energy_rating(variation):
|
388 |
-
if variation == 0: return "N/A"
|
389 |
-
if variation < 0.02: return "Consistent / Monotonous"
|
390 |
-
if variation <= 0.05: return "Moderately Dynamic"
|
391 |
-
return "Highly Dynamic & Engaging"
|
392 |
-
|
393 |
-
wpm = metrics.get('speaking_pace_wpm', 0)
|
394 |
-
energy_var = metrics.get('vocal_energy_variation', 0)
|
395 |
-
pace_rating = get_pace_rating(wpm)
|
396 |
-
energy_rating = get_energy_rating(energy_var)
|
397 |
-
|
398 |
-
metrics_md = f"""
|
399 |
-
- **Speaking Pace:** **{wpm} WPM** *(Rating: {pace_rating})*
|
400 |
-
- *This measures the number of words spoken per minute. A typical conversational pace is between 120-160 WPM.*
|
401 |
-
- **Vocal Energy Variation:** **{energy_var:.4f}** *(Rating: {energy_rating})*
|
402 |
-
- *This measures the standard deviation of your vocal loudness. A higher value indicates a more dynamic and engaging vocal range, while a very low value suggests a monotonous delivery.*
|
403 |
-
"""
|
404 |
-
|
405 |
-
# --- FIX: Revert to using bold text instead of headers for consistency ---
|
406 |
-
def format_ai_item(title, data):
|
407 |
-
if not data or "score" not in data: return f"**{title}:**\n> Analysis not available.\n\n"
|
408 |
-
raw_score = data.get('score', 0); score = max(1, min(10, raw_score))
|
409 |
-
stars = "π’" * score + "βͺοΈ" * (10 - score)
|
410 |
-
feedback = data.get('feedback', 'No feedback.').replace('\n', '\n> ')
|
411 |
-
return f"**{title}:** `{stars} [{score}/10]`\n\n> {feedback}\n\n"
|
412 |
|
413 |
-
|
414 |
|
415 |
-
# --- FIX: Use a more consistent structure for the final report ---
|
416 |
-
return f"""
|
417 |
-
# PitchPerfect AI Analysis Report π
|
418 |
-
## π Executive Summary
|
419 |
-
### Key Strengths
|
420 |
-
{summary.get('key_strengths', '- N/A')}
|
421 |
-
### High-Leverage Growth Opportunities
|
422 |
-
{summary.get('growth_opportunities', '- N/A')}
|
423 |
-
### Final Verdict
|
424 |
-
> {summary.get('executive_summary', 'N/A')}
|
425 |
-
---
|
426 |
-
## π Quantitative Metrics Explained (via JAX)
|
427 |
-
{metrics_md}
|
428 |
-
---
|
429 |
-
## π§ AI Multimodal Analysis (via Gemini 1.5 Pro)
|
430 |
-
### I. Content & Structure
|
431 |
-
{format_ai_item("Clarity", content.get('clarity'))}
|
432 |
-
{format_ai_item("Structure & Flow", content.get('structure'))}
|
433 |
-
{format_ai_item("Value Proposition", content.get('value_proposition'))}
|
434 |
-
{format_ai_item("Call to Action (CTA)", content.get('cta'))}
|
435 |
-
<hr style="border:1px solid #ddd">
|
436 |
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
|
441 |
-
|
442 |
-
<hr style="border:1px solid #ddd">
|
443 |
|
444 |
-
|
445 |
-
{format_ai_item("Eye Contact", visual.get('eye_contact'))}
|
446 |
-
{format_ai_item("Body Language", visual.get('body_language'))}
|
447 |
-
{format_ai_item("Facial Expressions", visual.get('facial_expressions'))}
|
448 |
-
"""
|
449 |
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
try:
|
457 |
-
pitch_agent = PitchAnalyzerAgent()
|
458 |
-
except Exception as e:
|
459 |
-
logging.fatal(f"Failed to initialize agent during startup: {e}", exc_info=True)
|
460 |
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
481 |
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
"""
|
2 |
+
π The Content Creator's Journey (AI-Powered Edition)
|
3 |
+
==================================================
|
4 |
+
This version replaces all mocked functions with real generative AI models
|
5 |
+
for image analysis, content generation, and translation.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
|
7 |
+
- Stage 1: Inspiration Hub (Summarization AI)
|
8 |
+
- Stage 2: Creative Studio (Image-to-Text and Text Generation AI)
|
9 |
+
- Stage 3: Globalization Suite (Translation AI)
|
10 |
|
11 |
+
Author: Gemini
|
12 |
+
Date: July 4, 2025
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
"""
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
import os
|
16 |
import re
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
import gradio as gr
|
18 |
+
import arxiv
|
19 |
+
import nltk
|
20 |
+
from transformers import pipeline
|
21 |
+
from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound
|
22 |
+
from youtube_transcript_api.formatters import TextFormatter
|
23 |
+
from PIL import Image
|
24 |
+
|
25 |
+
# --- Model & Pipeline Setup (Lazy Loading) ---
|
26 |
+
|
27 |
+
# Dictionary to hold our models, loaded only when needed.
|
28 |
+
models = {}
|
29 |
+
|
30 |
+
def get_pipeline(task, model_name):
|
31 |
+
"""Initializes and returns a pipeline, caching it for reuse."""
|
32 |
+
if model_name not in models:
|
33 |
+
print(f"π Initializing {task} pipeline with model {model_name}...")
|
34 |
+
models[model_name] = pipeline(task, model=model_name)
|
35 |
+
print(f"β
{model_name} loaded.")
|
36 |
+
return models[model_name]
|
37 |
+
|
38 |
+
# --- Stage 1: The Spark (Inspiration Hub) ---
|
39 |
+
# This section already uses a real summarization model, so no changes are needed here.
|
40 |
+
# (Functions search_arxiv_papers and summarize_youtube_from_url are omitted for brevity but remain the same)
|
41 |
+
|
42 |
+
# ... (Previous code for Stage 1 remains here) ...
|
43 |
+
|
44 |
+
# --- Stage 2: The Craft (Creative Studio) ---
|
45 |
+
|
46 |
+
def analyze_image_with_ai(image: Image.Image) -> (str, dict):
|
47 |
+
"""Uses a real AI model to generate a description of the image."""
|
48 |
+
captioner = get_pipeline("image-to-text", "Salesforce/blip-image-captioning-large")
|
49 |
+
description = captioner(image)[0]['generated_text']
|
50 |
+
|
51 |
+
analysis = {"description": description}
|
52 |
+
report = (
|
53 |
+
f"**π¨ AI Vision Analysis:**\n\n"
|
54 |
+
f"- **Image Content:** {description}"
|
55 |
+
)
|
56 |
+
return report, analysis
|
57 |
+
|
58 |
+
def generate_creative_content_with_ai(style: str, audience: str, image_analysis: dict, custom_prompt: str) -> (str, str):
|
59 |
+
"""Uses a real LLM to generate content based on a detailed prompt."""
|
60 |
+
generator = get_pipeline("text-generation", "gpt2")
|
61 |
+
image_desc = image_analysis.get("description", "a visual scene")
|
62 |
+
|
63 |
+
# Create a detailed prompt for the LLM
|
64 |
+
prompt = (
|
65 |
+
f"Create a '{style}' for a '{audience}' audience. "
|
66 |
+
f"The content should be inspired by the following scene: '{image_desc}'. "
|
67 |
+
f"Follow this specific instruction: '{custom_prompt if custom_prompt else 'Be creative and engaging'}'.\n\n"
|
68 |
+
f"Here is the content:"
|
69 |
+
)
|
70 |
+
|
71 |
+
# Generate text and clean it up
|
72 |
+
generated_outputs = generator(prompt, max_length=150, num_return_sequences=1, pad_token_id=generator.tokenizer.eos_token_id)
|
73 |
+
generated_text = generated_outputs[0]['generated_text']
|
74 |
+
|
75 |
+
# Clean the output by removing the initial prompt
|
76 |
+
clean_text = generated_text.replace(prompt, "").strip()
|
77 |
+
|
78 |
+
# The analytics are now informational rather than predictive
|
79 |
+
analytics_report = (
|
80 |
+
f"**π Generation Details:**\n\n"
|
81 |
+
f"- **Model Used:** gpt2\n"
|
82 |
+
f"- **Core Prompt:** Based on a photo of '{image_desc[:40]}...'"
|
83 |
+
)
|
84 |
+
|
85 |
+
return clean_text, analytics_report
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
|
87 |
+
def run_creative_studio(uploaded_image, style, audience, custom_prompt):
|
88 |
+
"""Interface function to run the full AI-powered 'Craft' stage."""
|
89 |
+
if uploaded_image is None:
|
90 |
+
return "β Please upload an image.", "", ""
|
91 |
+
try:
|
92 |
+
image = uploaded_image
|
93 |
+
analysis_report, image_analysis = analyze_image_with_ai(image)
|
94 |
+
generated_text, analytics = generate_creative_content_with_ai(style, audience, image_analysis, custom_prompt)
|
95 |
+
return analysis_report, generated_text, analytics
|
96 |
+
except Exception as e:
|
97 |
+
return f"β οΈ Error: {e}", "", ""
|
98 |
|
99 |
+
# --- Stage 3: The Reach (Globalization Suite) ---
|
|
|
|
|
|
|
|
|
100 |
|
101 |
+
def translate_content_with_ai(text: str, languages: list) -> str:
|
102 |
+
"""Translates content using real AI models."""
|
103 |
+
if not text:
|
104 |
+
return "β Please provide text to translate."
|
105 |
+
if not languages:
|
106 |
+
return "β Please select at least one language."
|
|
|
|
|
107 |
|
108 |
+
lang_model_map = {
|
109 |
+
"German π©πͺ": "Helsinki-NLP/opus-mt-en-de",
|
110 |
+
"Spanish πͺπΈ": "Helsinki-NLP/opus-mt-en-es",
|
111 |
+
"Japanese π―π΅": "Helsinki-NLP/opus-mt-en-jap",
|
112 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
|
114 |
+
translations = [f"### π Translated Content\n"]
|
|
|
|
|
115 |
|
116 |
+
for lang_name in languages:
|
117 |
+
model_name = lang_model_map.get(lang_name)
|
118 |
+
if model_name:
|
119 |
+
translator = get_pipeline("translation", model_name)
|
120 |
+
translated_text = translator(text)[0]['translation_text']
|
121 |
+
translations.append(f"**{lang_name.upper()} VERSION:**\n\n{translated_text}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
122 |
|
123 |
+
return "\n\n---\n\n".join(translations)
|
124 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
125 |
|
126 |
+
# --- Full Gradio UI ---
|
127 |
+
# The UI structure remains the same, but the functions it calls are now AI-powered.
|
128 |
+
# The code for create_ui(), search_arxiv_papers, and summarize_youtube_from_url is omitted here
|
129 |
+
# for brevity, as it doesn't change from the previous version. You can just plug the
|
130 |
+
# new functions above into your existing app.py file.
|
|
|
131 |
|
132 |
+
# --- Helper functions from previous version to make the file runnable ---
|
|
|
|
|
|
|
|
|
133 |
|
134 |
+
def search_arxiv_papers(topic: str) -> str:
|
135 |
+
if not topic: return "β Please enter a topic to search."
|
136 |
+
summarizer = get_pipeline("summarization", "sshleifer/distilbart-cnn-12-6")
|
137 |
+
search = arxiv.Search(query=topic, max_results=3, sort_by=arxiv.SortCriterion.Relevance)
|
138 |
+
results = [f"**π {res.title}**\n\n**Summary:** {summarizer(res.summary.replace(' ', ' '), max_length=80, min_length=20, do_sample=False)[0]['summary_text']}\n\n**π [Read Paper]({res.pdf_url})**" for res in search.results()]
|
139 |
+
return "\n\n---\n\n".join(results) if results else "No papers found."
|
|
|
|
|
|
|
|
|
140 |
|
141 |
+
def summarize_youtube_from_url(video_url: str) -> str:
|
142 |
+
if not video_url: return "β Please enter a YouTube URL."
|
143 |
+
video_id_match = re.search(r"(?:v=|\/)([0-9A-Za-z_-]{11}).*", video_url)
|
144 |
+
if not video_id_match: return "β Invalid YouTube URL."
|
145 |
+
video_id = video_id_match.group(1)
|
146 |
+
try:
|
147 |
+
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
|
148 |
+
transcript_text = " ".join([d['text'] for d in transcript_list])
|
149 |
+
if len(transcript_text) < 200: return "Transcript too short."
|
150 |
+
summarizer = get_pipeline("summarization", "sshleifer/distilbart-cnn-12-6")
|
151 |
+
summary = summarizer(transcript_text, max_length=100, min_length=30, do_sample=False)
|
152 |
+
return f"**β
Summary:**\n\n{summary[0]['summary_text']}"
|
153 |
+
except NoTranscriptFound: return "β No transcript available."
|
154 |
+
except Exception as e: return f"β οΈ Error: {e}"
|
155 |
+
|
156 |
+
def create_ui():
|
157 |
+
css = """.gradio-container { font-family: 'Inter', sans-serif; background: #f5f7fa; } .tab-item { background: white; border-radius: 12px; padding: 25px; border: 1px solid #e0e0e0; } footer { display: none !important }"""
|
158 |
+
with gr.Blocks(theme=gr.themes.Base(), css=css, title="The Content Creator's Journey") as app:
|
159 |
+
gr.Markdown("""<div style="text-align: center; padding: 20px; background: #1f2937; color: white; border-radius: 12px;"><h1 style="font-size: 2.5em; margin: 0; font-weight: 700;">π The Content Creator's Journey</h1><p style="font-size: 1.2em; margin-top: 5px;">From a spark of an idea to a global message, in three stages.</p></div>""")
|
160 |
+
with gr.Tabs() as tabs:
|
161 |
+
with gr.TabItem("1. The Spark: Inspiration Hub", id=0, elem_classes=["tab-item"]):
|
162 |
+
gr.Markdown("### Every great creation starts with an idea. Research any topic to get summarized insights from academia and popular culture.")
|
163 |
+
with gr.Row(variant="panel"):
|
164 |
+
with gr.Column(min_width=400):
|
165 |
+
gr.Markdown("#### π¬ Academic Insights (from arXiv)")
|
166 |
+
inspire_topic = gr.Textbox(label="Enter a Topic to Search Papers", placeholder="e.g., 'sustainable technology'")
|
167 |
+
arxiv_btn = gr.Button("Search arXiv")
|
168 |
+
inspire_arxiv_output = gr.Markdown()
|
169 |
+
with gr.Column(min_width=400):
|
170 |
+
gr.Markdown("#### πΊ Video Insights (from YouTube URL)")
|
171 |
+
inspire_yt_url = gr.Textbox(label="Paste a YouTube Video URL", placeholder="e.g., 'https://www.youtube.com/watch?v=...'")
|
172 |
+
yt_btn = gr.Button("Summarize Video")
|
173 |
+
inspire_yt_output = gr.Markdown()
|
174 |
+
arxiv_btn.click(fn=search_arxiv_papers, inputs=inspire_topic, outputs=inspire_arxiv_output)
|
175 |
+
yt_btn.click(fn=summarize_youtube_from_url, inputs=inspire_yt_url, outputs=inspire_yt_output)
|
176 |
+
with gr.TabItem("2. The Craft: Creative Studio", id=1, elem_classes=["tab-item"]):
|
177 |
+
gr.Markdown("### Transform your idea into a polished piece of content. Upload a visual anchor and let the AI help you write.")
|
178 |
+
with gr.Row(variant="panel"):
|
179 |
+
with gr.Column(scale=1):
|
180 |
+
craft_image = gr.Image(label="πΌοΈ Upload a Visual Anchor", type="pil")
|
181 |
+
craft_style = gr.Dropdown(choices=["βοΈ Blog Post", "π¬ Social Media Caption", "π‘ Video Script Hook"], value="βοΈ Blog Post", label="π Content Format")
|
182 |
+
craft_audience = gr.Dropdown(choices=["π Experts", "π§ General Audience", "π©βπ» Tech Enthusiasts"], value="π§ General Audience", label="π₯ Target Audience")
|
183 |
+
craft_prompt = gr.Textbox(label="π Key Message or Note", placeholder="e.g., 'Focus on the human element...'")
|
184 |
+
craft_btn = gr.Button("π¨ Craft My Content")
|
185 |
+
with gr.Column(scale=2):
|
186 |
+
craft_analysis_output = gr.Markdown(label="AI Vision Analysis")
|
187 |
+
craft_text_output = gr.Textbox(label="βοΈ Generated Content", lines=10)
|
188 |
+
craft_analytics_output = gr.Markdown(label="Performance Analytics")
|
189 |
+
craft_btn.click(fn=run_creative_studio, inputs=[craft_image, craft_style, craft_audience, craft_prompt], outputs=[craft_analysis_output, craft_text_output, craft_analytics_output])
|
190 |
+
with gr.TabItem("3. The Reach: Globalization Suite", id=2, elem_classes=["tab-item"]):
|
191 |
+
gr.Markdown("### Your masterpiece is ready. Now, adapt it for a global audience with our translation suite.")
|
192 |
+
with gr.Row(variant="panel"):
|
193 |
+
with gr.Column(scale=2):
|
194 |
+
reach_text_input = gr.Textbox(label="Paste Content Here (from Stage 2)", lines=8)
|
195 |
+
reach_lang_select = gr.CheckboxGroup(choices=["German π©πͺ", "Spanish πͺπΈ", "Japanese π―π΅"], label="π Select Languages")
|
196 |
+
reach_btn = gr.Button("π Globalize My Content")
|
197 |
+
with gr.Column(scale=3):
|
198 |
+
reach_output = gr.Markdown(label="Adapted for Global Audiences")
|
199 |
+
reach_btn.click(fn=translate_content_with_ai, inputs=[reach_text_input, reach_lang_select], outputs=reach_output)
|
200 |
+
return app
|
201 |
|
202 |
+
if __name__ == "__main__":
|
203 |
+
# To enable lazy loading of models, the UI is created first.
|
204 |
+
# The get_pipeline function will handle model loading upon first use.
|
205 |
+
app = create_ui()
|
206 |
+
app.launch(debug=True)
|