Spaces:

RollAI
/

ChatWithTranscriptDev

Running

App Files Files Community

AhmadMustafa commited on Jan 9

Commit

8fbf2ce

1 Parent(s): 170358b

update: reasoning prompt

Browse files

Files changed (1) hide show

app.py +59 -60

app.py CHANGED Viewed

@@ -336,6 +336,7 @@ class TranscriptProcessor:
         """Return the raw transcript data."""
         return self.transcript_data
 def setup_openai_key() -> None:
     """Set up OpenAI API key from file."""
     try:
@@ -488,76 +489,73 @@ CRITICAL: When analyzing timestamps, you must verify that in the duration specif
 2. The speaker talks continuously for at least 20 seconds
 3. The clip ends BEFORE any interruption or speaker change
 """
-            reasoning_prompt = f"""For each Speaker {" , ".join(speaker_mapping.values())}
-in the transcript: {transcript}
-Your job is to generate the thinking about the short social media clips for each speaker where they discuss. Think step by step and return a JSON at the end of the thinking.
-You can skip a speaker if they don't have enough content to generate the clips. Goal of the reasoning is to find the sentence of a specific speaker where they talk about that topic Since I need to make seperate clips of each speaker. SO all the sentences between the start and end of the topic must be from the same speaker.
-Return Format:
-- Name of the Speaker
-- Detailed Step by Step Thinking for each speaker from thier content and the topic they are talking about
-After you have completed the thinking, give me a JSON of the thinking.
 ```json
-[
-    {{
-        "Speaker Name1": [
-            {{
-                "Topic Title": "...",
-                "Starting Sentence of that speaker": "...",
-                "Ending Sentence where the topic ends": "...."
-            }},
-            {{
-                "Topic Title": "...",
-                "Starting Sentence of that speaker": "....",
-                "Ending Sentence of that speaker where the topic ends": "....."
-            }}
-        ]
-    }},
-    {{
-        "Speaker Name2": [
-            {{
-                "Topic Title": "....",
-                "Starting Sentence of that speaker": ".....",
-                "Ending Sentence of that speaker": "....."
-            }},
-            {{
-                "Topic Title": "......",
-                "Starting Sentence of that speaker": "....",
-                "Ending Sentence of that speaker": "....."
-            }}
-        ]
-    }},
-    ....
-]
-```
 """
-            thinking_completion = client.chat.completions.create(
                 model="gpt-4o",
                 messages=[
-                    {"role": "system", "content": reasoning_prompt},
                 ],
                 stream=False,
-                temperature=0.4,
             )
-            thinking = thinking_completion.choices[0].message.content
-            print("Thinking is:\n", thinking)
-            thinking_json = thinking[thinking.find("{") : thinking.rfind("}") + 1]
-            user_prompt = f"""User ID: {uid}
-Intelligent Thinking Context: {thinking_json}
-Your task is to generate the social media clips following these strict rules:
-1. TIMESTAMP SELECTION:
-- Make sure that a selected timestamp range of a clip has a single speaker only. We need to isolate each speaker's CLIPs.
-2. CLIP REQUIREMENTS:
-- Minimum 20 seconds of CONTINUOUS speech
-- Maximum 100 seconds
-- Single speaker only
-- Must end before any interruption
-- Complete thoughts/topics only
 Return Format requirements:
 SPEAKER FORMAT:
@@ -1054,6 +1052,7 @@ def create_chat_interface():
         # Handle initial loading with streaming
         def on_app_load(request: gr.Request):
             cid = request.query_params.get("cid", None)
             rsid = request.query_params.get("rsid", None)
             origin = request.query_params.get("origin", None)

         """Return the raw transcript data."""
         return self.transcript_data
 def setup_openai_key() -> None:
     """Set up OpenAI API key from file."""
     try:
 2. The speaker talks continuously for at least 20 seconds
 3. The clip ends BEFORE any interruption or speaker change
 """
+            start_end_sentence_prompt = f"""Given a transcript with speakers {" , ".join(speaker_mapping.values())}, analyze the content and identify segments that would make compelling social media clips. For each speaker, find complete topics that meet the following criteria:
+Key Requirements:
+1. Speaker Isolation
+- Each clip must contain only ONE speaker
+- No interruptions from other speakers allowed within the clip
+- Once another speaker interrupts, the previous speaker's clip must end
+2. Duration Guidelines
+- Minimum: 20 seconds of continuous speech
+- Maximum: 100 seconds
+- Must capture complete thoughts/topics
+3. Content Selection
+- Focus on interesting or noteworthy content
+- Topics should be self-contained and coherent
+- Must include both the starting and ending sentences that bound the topic
+- You can do 2 or 3 topics per speaker if there is more content for that speaker.
+Expected Output Format:
 ```json
+{{
+    "Speaker_Name": [
+        {{
+            "Topic_Title": "<descriptive title of the topic>",
+            "Starting_Sentence": "<exact first sentence of the topic>",
+            "Ending_Sentence": "<exact last sentence before any interruption or topic change>"
+        }},
+        // Additional topics for this speaker...
+    ],
+    // Additional speakers...
+}}
+Example:
+If a transcript contains:
+[10:13] Speaker1: "First sentence..."
+[10:15] Speaker1: "Second sentence..."
+[10:17] Speaker2: "Interruption..."
+[10:19] Speaker1: "Later sentence..."
+The valid clip for Speaker1 would only include the first two sentences, ending before Speaker2's interruption.
+Important:
+- Ensure each clip represents a single, uninterrupted segment from one speaker
+- Include only complete thoughts/statements
+- Verify that no other speakers appear between the selected start and end sentences
 """
+            sentence_finding_completion = client.chat.completions.create(
                 model="gpt-4o",
                 messages=[
+                    {"role": "system", "content": start_end_sentence_prompt},
                 ],
                 stream=False,
+                temperature=0.2,
             )
+            sentence_finding = sentence_finding_completion.choices[0].message.content
+            sentence_finding_json = sentence_finding[
+                sentence_finding.find("{") : sentence_finding.rfind("}") + 1
+            ]
+            print(sentence_finding_json)
+            user_prompt = f"""User ID: {uid}
+Short Listed Topics and Sentences: {sentence_finding_json}
+Your task is to find the starting time, ending time, and the duration for the each topic in the above Short Listed Topics. You need to return the answer in the following format.
 Return Format requirements:
 SPEAKER FORMAT:
         # Handle initial loading with streaming
         def on_app_load(request: gr.Request):
+            # print("App loaded")
             cid = request.query_params.get("cid", None)
             rsid = request.query_params.get("rsid", None)
             origin = request.query_params.get("origin", None)