TDN-M commited on
Commit
c60ab48
·
verified ·
1 Parent(s): 0b45667

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -15,8 +15,7 @@ from TTS.tts.models.xtts import Xtts
15
  from vinorm import TTSnorm
16
  from langchain_community.llms import HuggingFacePipeline
17
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
18
- from diffusers import StableDiffusionPipeline
19
- from PIL import Image
20
  import cv2
21
  from moviepy.editor import AudioFileClip, ImageSequenceClip
22
  import gc
@@ -73,12 +72,6 @@ llm_chain = caption_chain.chain(llm=local_llm)
73
  sum_llm_chain = tag_chain.chain(llm=local_llm)
74
  pexels_api_key = os.getenv('pexels_api_key')
75
 
76
- # Initialize Stable Diffusion Pipeline with TDN-M/East-asian-beauty
77
- image_gen_model_id = "TDN-M/East-asian-beauty"
78
- device = "cuda" if torch.cuda.is_available() else "cpu"
79
- image_generator = StableDiffusionPipeline.from_pretrained(image_gen_model_id, torch_dtype=torch.float16)
80
- image_generator = image_generator.to(device)
81
-
82
  def normalize_vietnamese_text(text):
83
  text = (
84
  TTSnorm(text, unknown=False, lower=False, rule=True)
@@ -134,13 +127,22 @@ def truncate_prompt(prompt, tokenizer, max_length=512):
134
  prompt = tokenizer.convert_tokens_to_string(tokens)
135
  return prompt
136
 
137
- def generate_images_from_sentences(sentences, image_generator, folder_path):
138
  try:
 
139
  for i, sentence in enumerate(sentences):
140
  print(f"Generating image for sentence {i + 1}: {sentence}")
141
- image = image_generator(sentence, guidance_scale=7.5).images[0]
 
 
 
 
 
 
 
 
142
  image_path = os.path.join(folder_path, f"image_{i + 1}.png")
143
- image.save(image_path)
144
  print(f"Saved image at {image_path}")
145
  except Exception as e:
146
  print("Error! Failed generating images")
@@ -238,7 +240,7 @@ def predict(
238
  sentences = [x.strip() for x in re.split(r'[.!?]', prompt) if len(x.strip()) > 6]
239
 
240
  # Tạo ảnh minh họa cho từng câu
241
- images = generate_images_from_sentences(sentences, image_generator, folder_path)
242
 
243
  # Tạo video từ file audio và các ảnh
244
  video_path = os.path.join(folder_name, "Final_Ad_Video.mp4")
 
15
  from vinorm import TTSnorm
16
  from langchain_community.llms import HuggingFacePipeline
17
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
18
+ from gradio_client import Client
 
19
  import cv2
20
  from moviepy.editor import AudioFileClip, ImageSequenceClip
21
  import gc
 
72
  sum_llm_chain = tag_chain.chain(llm=local_llm)
73
  pexels_api_key = os.getenv('pexels_api_key')
74
 
 
 
 
 
 
 
75
  def normalize_vietnamese_text(text):
76
  text = (
77
  TTSnorm(text, unknown=False, lower=False, rule=True)
 
127
  prompt = tokenizer.convert_tokens_to_string(tokens)
128
  return prompt
129
 
130
+ def generate_images_from_sentences(sentences):
131
  try:
132
+ client = Client("ByteDance/Hyper-FLUX-8Steps-LoRA")
133
  for i, sentence in enumerate(sentences):
134
  print(f"Generating image for sentence {i + 1}: {sentence}")
135
+ result = client.predict(
136
+ height=1024,
137
+ width=1024,
138
+ steps=8,
139
+ scales=3.5,
140
+ prompt=sentence,
141
+ seed=3413,
142
+ api_name="/process_image"
143
+ )
144
  image_path = os.path.join(folder_path, f"image_{i + 1}.png")
145
+ result.save(image_path)
146
  print(f"Saved image at {image_path}")
147
  except Exception as e:
148
  print("Error! Failed generating images")
 
240
  sentences = [x.strip() for x in re.split(r'[.!?]', prompt) if len(x.strip()) > 6]
241
 
242
  # Tạo ảnh minh họa cho từng câu
243
+ images = generate_images_from_sentences(sentences)
244
 
245
  # Tạo video từ file audio và các ảnh
246
  video_path = os.path.join(folder_name, "Final_Ad_Video.mp4")