AhmadMustafa commited on
Commit
8fbf2ce
·
1 Parent(s): 170358b

update: reasoning prompt

Browse files
Files changed (1) hide show
  1. app.py +59 -60
app.py CHANGED
@@ -336,6 +336,7 @@ class TranscriptProcessor:
336
  """Return the raw transcript data."""
337
  return self.transcript_data
338
 
 
339
  def setup_openai_key() -> None:
340
  """Set up OpenAI API key from file."""
341
  try:
@@ -488,76 +489,73 @@ CRITICAL: When analyzing timestamps, you must verify that in the duration specif
488
  2. The speaker talks continuously for at least 20 seconds
489
  3. The clip ends BEFORE any interruption or speaker change
490
  """
491
- reasoning_prompt = f"""For each Speaker {" , ".join(speaker_mapping.values())}
492
- in the transcript: {transcript}
493
-
494
- Your job is to generate the thinking about the short social media clips for each speaker where they discuss. Think step by step and return a JSON at the end of the thinking.
495
- You can skip a speaker if they don't have enough content to generate the clips. Goal of the reasoning is to find the sentence of a specific speaker where they talk about that topic Since I need to make seperate clips of each speaker. SO all the sentences between the start and end of the topic must be from the same speaker.
496
- Return Format:
497
- - Name of the Speaker
498
- - Detailed Step by Step Thinking for each speaker from thier content and the topic they are talking about
499
- After you have completed the thinking, give me a JSON of the thinking.
 
 
 
 
 
 
 
 
 
 
 
500
  ```json
501
- [
502
- {{
503
- "Speaker Name1": [
504
- {{
505
- "Topic Title": "...",
506
- "Starting Sentence of that speaker": "...",
507
- "Ending Sentence where the topic ends": "...."
508
- }},
509
- {{
510
- "Topic Title": "...",
511
- "Starting Sentence of that speaker": "....",
512
- "Ending Sentence of that speaker where the topic ends": "....."
513
- }}
514
- ]
515
- }},
516
- {{
517
- "Speaker Name2": [
518
- {{
519
- "Topic Title": "....",
520
- "Starting Sentence of that speaker": ".....",
521
- "Ending Sentence of that speaker": "....."
522
- }},
523
- {{
524
- "Topic Title": "......",
525
- "Starting Sentence of that speaker": "....",
526
- "Ending Sentence of that speaker": "....."
527
- }}
528
- ]
529
- }},
530
- ....
531
- ]
532
- ```
533
  """
534
- thinking_completion = client.chat.completions.create(
 
535
  model="gpt-4o",
536
  messages=[
537
- {"role": "system", "content": reasoning_prompt},
538
  ],
539
  stream=False,
540
- temperature=0.4,
541
  )
542
- thinking = thinking_completion.choices[0].message.content
543
- print("Thinking is:\n", thinking)
544
- thinking_json = thinking[thinking.find("{") : thinking.rfind("}") + 1]
545
-
546
- user_prompt = f"""User ID: {uid}
547
- Intelligent Thinking Context: {thinking_json}
548
-
549
- Your task is to generate the social media clips following these strict rules:
550
 
551
- 1. TIMESTAMP SELECTION:
552
- - Make sure that a selected timestamp range of a clip has a single speaker only. We need to isolate each speaker's CLIPs.
553
 
554
- 2. CLIP REQUIREMENTS:
555
- - Minimum 20 seconds of CONTINUOUS speech
556
- - Maximum 100 seconds
557
- - Single speaker only
558
- - Must end before any interruption
559
- - Complete thoughts/topics only
560
 
 
561
 
562
  Return Format requirements:
563
  SPEAKER FORMAT:
@@ -1054,6 +1052,7 @@ def create_chat_interface():
1054
 
1055
  # Handle initial loading with streaming
1056
  def on_app_load(request: gr.Request):
 
1057
  cid = request.query_params.get("cid", None)
1058
  rsid = request.query_params.get("rsid", None)
1059
  origin = request.query_params.get("origin", None)
 
336
  """Return the raw transcript data."""
337
  return self.transcript_data
338
 
339
+
340
  def setup_openai_key() -> None:
341
  """Set up OpenAI API key from file."""
342
  try:
 
489
  2. The speaker talks continuously for at least 20 seconds
490
  3. The clip ends BEFORE any interruption or speaker change
491
  """
492
+ start_end_sentence_prompt = f"""Given a transcript with speakers {" , ".join(speaker_mapping.values())}, analyze the content and identify segments that would make compelling social media clips. For each speaker, find complete topics that meet the following criteria:
493
+
494
+ Key Requirements:
495
+ 1. Speaker Isolation
496
+ - Each clip must contain only ONE speaker
497
+ - No interruptions from other speakers allowed within the clip
498
+ - Once another speaker interrupts, the previous speaker's clip must end
499
+
500
+ 2. Duration Guidelines
501
+ - Minimum: 20 seconds of continuous speech
502
+ - Maximum: 100 seconds
503
+ - Must capture complete thoughts/topics
504
+
505
+ 3. Content Selection
506
+ - Focus on interesting or noteworthy content
507
+ - Topics should be self-contained and coherent
508
+ - Must include both the starting and ending sentences that bound the topic
509
+ - You can do 2 or 3 topics per speaker if there is more content for that speaker.
510
+
511
+ Expected Output Format:
512
  ```json
513
+ {{
514
+ "Speaker_Name": [
515
+ {{
516
+ "Topic_Title": "<descriptive title of the topic>",
517
+ "Starting_Sentence": "<exact first sentence of the topic>",
518
+ "Ending_Sentence": "<exact last sentence before any interruption or topic change>"
519
+ }},
520
+ // Additional topics for this speaker...
521
+ ],
522
+ // Additional speakers...
523
+ }}
524
+
525
+ Example:
526
+ If a transcript contains:
527
+ [10:13] Speaker1: "First sentence..."
528
+ [10:15] Speaker1: "Second sentence..."
529
+ [10:17] Speaker2: "Interruption..."
530
+ [10:19] Speaker1: "Later sentence..."
531
+
532
+ The valid clip for Speaker1 would only include the first two sentences, ending before Speaker2's interruption.
533
+
534
+ Important:
535
+ - Ensure each clip represents a single, uninterrupted segment from one speaker
536
+ - Include only complete thoughts/statements
537
+ - Verify that no other speakers appear between the selected start and end sentences
 
 
 
 
 
 
 
538
  """
539
+
540
+ sentence_finding_completion = client.chat.completions.create(
541
  model="gpt-4o",
542
  messages=[
543
+ {"role": "system", "content": start_end_sentence_prompt},
544
  ],
545
  stream=False,
546
+ temperature=0.2,
547
  )
548
+ sentence_finding = sentence_finding_completion.choices[0].message.content
549
+ sentence_finding_json = sentence_finding[
550
+ sentence_finding.find("{") : sentence_finding.rfind("}") + 1
551
+ ]
 
 
 
 
552
 
553
+ print(sentence_finding_json)
 
554
 
555
+ user_prompt = f"""User ID: {uid}
556
+ Short Listed Topics and Sentences: {sentence_finding_json}
 
 
 
 
557
 
558
+ Your task is to find the starting time, ending time, and the duration for the each topic in the above Short Listed Topics. You need to return the answer in the following format.
559
 
560
  Return Format requirements:
561
  SPEAKER FORMAT:
 
1052
 
1053
  # Handle initial loading with streaming
1054
  def on_app_load(request: gr.Request):
1055
+ # print("App loaded")
1056
  cid = request.query_params.get("cid", None)
1057
  rsid = request.query_params.get("rsid", None)
1058
  origin = request.query_params.get("origin", None)