bluenevus commited on
Commit
ca79387
·
verified ·
1 Parent(s): 1f274e9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +38 -58
app.py CHANGED
@@ -16,47 +16,45 @@ import logging
16
  logging.basicConfig(level=logging.INFO)
17
  logger = logging.getLogger(__name__)
18
 
 
 
 
19
  # Set up device
20
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
21
 
22
- # Model name
 
23
  model_name = "canopylabs/orpheus-3b-0.1-ft"
24
 
25
- def load_model(hf_token):
26
- login(token=hf_token)
27
-
28
- print("Loading Orpheus model...")
29
- snapshot_download(
30
- repo_id=model_name,
31
- use_auth_token=hf_token,
32
- allow_patterns=[
33
- "config.json",
34
- "*.safetensors",
35
- "model.safetensors.index.json",
36
- ],
37
- ignore_patterns=[
38
- "optimizer.pt",
39
- "pytorch_model.bin",
40
- "training_args.bin",
41
- "scheduler.pt",
42
- "tokenizer.json",
43
- "tokenizer_config.json",
44
- "special_tokens_map.json",
45
- "vocab.json",
46
- "merges.txt",
47
- "tokenizer.*"
48
- ]
49
- )
50
-
51
- model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
52
- model.to(device)
53
- tokenizer = AutoTokenizer.from_pretrained(model_name)
54
- print(f"Orpheus model loaded to {device}")
55
- return model, tokenizer
56
-
57
- # Initialize as None, will be loaded when HF token is provided
58
- model = None
59
- tokenizer = None
60
 
61
  def generate_podcast_script(api_key, content, duration, num_hosts):
62
  genai.configure(api_key=api_key)
@@ -75,10 +73,6 @@ def generate_podcast_script(api_key, content, duration, num_hosts):
75
  Do not use any special characters or markdown. Only include the monologue with proper punctuation.
76
  Ensure the content flows naturally and stays relevant to the topic.
77
  Limit the script length to match the requested duration of {duration}.
78
- Use emotion tags naturally in generative AI speech, incorporate them sparingly at key moments to enhance the dialogue's emotional context.
79
- Place tags like <laugh> for joy, <sigh> for frustration or relief, <chuckle> for mild amusement, <cough> or <sniffle> for discomfort, <groan> for displeasure, <yawn> for tiredness, and <gasp> for surprise.
80
- For example: "I can't believe I stayed up all night <yawn> only to find out the meeting was canceled <groan>. Oh well, at least I finished the project <chuckle>."
81
- Remember, use tags judiciously to maintain a natural flow of conversation.
82
  """
83
  else:
84
  prompt = f"""
@@ -93,10 +87,6 @@ def generate_podcast_script(api_key, content, duration, num_hosts):
93
  Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
94
  Ensure the conversation flows naturally and stays relevant to the topic.
95
  Limit the script length to match the requested duration of {duration}.
96
- Use emotion tags naturally in generative AI speech, incorporate them sparingly at key moments to enhance the dialogue's emotional context.
97
- Place tags like <laugh> for joy, <sigh> for frustration or relief, <chuckle> for mild amusement, <cough> or <sniffle> for discomfort, <groan> for displeasure, <yawn> for tiredness, and <gasp> for surprise.
98
- For example: "I can't believe I stayed up all night <yawn> only to find out the meeting was canceled <groan>. Oh well, at least I finished the project <chuckle>."
99
- Remember, use tags judiciously to maintain a natural flow of conversation.
100
  """
101
 
102
  response = model.generate_content(prompt)
@@ -104,7 +94,6 @@ def generate_podcast_script(api_key, content, duration, num_hosts):
104
  return clean_text
105
 
106
  def text_to_speech(text, voice):
107
- global model, tokenizer
108
  inputs = tokenizer(text, return_tensors="pt").to(device)
109
  with torch.no_grad():
110
  output = model.generate(**inputs, max_new_tokens=256)
@@ -146,10 +135,6 @@ def render_podcast(api_key, script, voice1, voice2, num_hosts):
146
  with gr.Blocks() as demo:
147
  gr.Markdown("# AI Podcast Generator")
148
 
149
- hf_token_input = gr.Textbox(label="Enter your Hugging Face API Token", type="password")
150
- load_model_btn = gr.Button("Load Orpheus Model")
151
- model_status = gr.Markdown("Model not loaded")
152
-
153
  api_key_input = gr.Textbox(label="Enter your Gemini API Key", type="password")
154
 
155
  with gr.Row():
@@ -160,11 +145,13 @@ with gr.Blocks() as demo:
160
 
161
  num_hosts = gr.Radio([1, 2], label="Number of podcast hosts", value=2)
162
 
 
 
163
  with gr.Row():
164
- voice1_select = gr.Dropdown(label="Select Voice 1", choices=["Voice 1", "Voice 2", "Voice 3"], value="Voice 1")
165
 
166
  with gr.Row():
167
- voice2_select = gr.Dropdown(label="Select Voice 2", choices=["Voice 1", "Voice 2", "Voice 3"], value="Voice 2")
168
 
169
  generate_btn = gr.Button("Generate Script")
170
  script_output = gr.Textbox(label="Generated Script", lines=10)
@@ -172,13 +159,6 @@ with gr.Blocks() as demo:
172
  render_btn = gr.Button("Render Podcast")
173
  audio_output = gr.Audio(label="Generated Podcast")
174
 
175
- def load_model_wrapper(hf_token):
176
- global model, tokenizer
177
- model, tokenizer = load_model(hf_token)
178
- return "Model loaded successfully"
179
-
180
- load_model_btn.click(load_model_wrapper, inputs=[hf_token_input], outputs=[model_status])
181
-
182
  def generate_script_wrapper(api_key, content, duration, num_hosts):
183
  return generate_podcast_script(api_key, content, duration, num_hosts)
184
 
 
16
  logging.basicConfig(level=logging.INFO)
17
  logger = logging.getLogger(__name__)
18
 
19
+ # Initialize Gemini AI
20
+ genai.configure(api_key='YOUR_GEMINI_API_KEY')
21
+
22
  # Set up device
23
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
24
 
25
+ # Load Orpheus model
26
+ print("Loading Orpheus model...")
27
  model_name = "canopylabs/orpheus-3b-0.1-ft"
28
 
29
+ HF_TOKEN = "YOUR_HUGGINGFACE_TOKEN"
30
+ login(token=HF_TOKEN)
31
+
32
+ snapshot_download(
33
+ repo_id=model_name,
34
+ use_auth_token=HF_TOKEN,
35
+ allow_patterns=[
36
+ "config.json",
37
+ "*.safetensors",
38
+ "model.safetensors.index.json",
39
+ ],
40
+ ignore_patterns=[
41
+ "optimizer.pt",
42
+ "pytorch_model.bin",
43
+ "training_args.bin",
44
+ "scheduler.pt",
45
+ "tokenizer.json",
46
+ "tokenizer_config.json",
47
+ "special_tokens_map.json",
48
+ "vocab.json",
49
+ "merges.txt",
50
+ "tokenizer.*"
51
+ ]
52
+ )
53
+
54
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16)
55
+ model.to(device)
56
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
57
+ print(f"Orpheus model loaded to {device}")
 
 
 
 
 
 
58
 
59
  def generate_podcast_script(api_key, content, duration, num_hosts):
60
  genai.configure(api_key=api_key)
 
73
  Do not use any special characters or markdown. Only include the monologue with proper punctuation.
74
  Ensure the content flows naturally and stays relevant to the topic.
75
  Limit the script length to match the requested duration of {duration}.
 
 
 
 
76
  """
77
  else:
78
  prompt = f"""
 
87
  Do not use any special characters or markdown. Only include the alternating dialogue lines with proper punctuation.
88
  Ensure the conversation flows naturally and stays relevant to the topic.
89
  Limit the script length to match the requested duration of {duration}.
 
 
 
 
90
  """
91
 
92
  response = model.generate_content(prompt)
 
94
  return clean_text
95
 
96
  def text_to_speech(text, voice):
 
97
  inputs = tokenizer(text, return_tensors="pt").to(device)
98
  with torch.no_grad():
99
  output = model.generate(**inputs, max_new_tokens=256)
 
135
  with gr.Blocks() as demo:
136
  gr.Markdown("# AI Podcast Generator")
137
 
 
 
 
 
138
  api_key_input = gr.Textbox(label="Enter your Gemini API Key", type="password")
139
 
140
  with gr.Row():
 
145
 
146
  num_hosts = gr.Radio([1, 2], label="Number of podcast hosts", value=2)
147
 
148
+ voice_options = ["tara", "leah", "jess", "leo", "dan", "mia", "zac", "zoe"]
149
+
150
  with gr.Row():
151
+ voice1_select = gr.Dropdown(label="Select Voice 1", choices=voice_options, value="tara")
152
 
153
  with gr.Row():
154
+ voice2_select = gr.Dropdown(label="Select Voice 2", choices=voice_options, value="leo")
155
 
156
  generate_btn = gr.Button("Generate Script")
157
  script_output = gr.Textbox(label="Generated Script", lines=10)
 
159
  render_btn = gr.Button("Render Podcast")
160
  audio_output = gr.Audio(label="Generated Podcast")
161
 
 
 
 
 
 
 
 
162
  def generate_script_wrapper(api_key, content, duration, num_hosts):
163
  return generate_podcast_script(api_key, content, duration, num_hosts)
164