preston-cell commited on
Commit
5a007ee
·
verified ·
1 Parent(s): f9a1f04

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -6
app.py CHANGED
@@ -19,8 +19,8 @@ ocr_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
19
  ocr_model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", torch_dtype=ocr_dtype, trust_remote_code=True).to(ocr_device)
20
  ocr_processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
21
 
22
- # Load GPT-2 XL model for text generation
23
- gpt2_generator = pipeline('text-generation', model='gpt2-xl')
24
 
25
  # Load speaker embedding
26
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
@@ -43,9 +43,9 @@ def process_image(image):
43
  )
44
  extracted_text = ocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
45
 
46
- # Generate context using GPT-2 XL
47
  prompt = f"Determine the context of this image based on the caption and extracted text. Caption: {caption}. Extracted text: {extracted_text}. Context:"
48
- context_output = gpt2_generator(prompt, max_length=150, num_return_sequences=1)
49
  context = context_output[0]['generated_text']
50
 
51
  # Convert context to speech
@@ -75,8 +75,8 @@ iface = gr.Interface(
75
  gr.Textbox(label="Extracted Text (OCR)"),
76
  gr.Textbox(label="Generated Context")
77
  ],
78
- title="SeeSay Contextualizer with GPT-2 XL",
79
- description="Upload an image to generate a caption, extract text, create audio from context, and determine the context using GPT-2 XL."
80
  )
81
 
82
  iface.launch()
 
19
  ocr_model = AutoModelForCausalLM.from_pretrained("microsoft/Florence-2-large", torch_dtype=ocr_dtype, trust_remote_code=True).to(ocr_device)
20
  ocr_processor = AutoProcessor.from_pretrained("microsoft/Florence-2-large", trust_remote_code=True)
21
 
22
+ # Load GPT-2 Large model for text generation
23
+ gpt2_generator = pipeline('text-generation', model='gpt2-large')
24
 
25
  # Load speaker embedding
26
  embeddings_dataset = load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")
 
43
  )
44
  extracted_text = ocr_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
45
 
46
+ # Generate context using GPT-2 Large
47
  prompt = f"Determine the context of this image based on the caption and extracted text. Caption: {caption}. Extracted text: {extracted_text}. Context:"
48
+ context_output = gpt2_generator(prompt, max_length=100, num_return_sequences=1)
49
  context = context_output[0]['generated_text']
50
 
51
  # Convert context to speech
 
75
  gr.Textbox(label="Extracted Text (OCR)"),
76
  gr.Textbox(label="Generated Context")
77
  ],
78
+ title="SeeSay Contextualizer with GPT-2 Large",
79
+ description="Upload an image to generate a caption, extract text, create audio from context, and determine the context using GPT-2 Large."
80
  )
81
 
82
  iface.launch()