Spaces:
Paused
Paused
add logging
Browse files
app.py
CHANGED
|
@@ -12,10 +12,13 @@ def process_filename(filename, question):
|
|
| 12 |
|
| 13 |
|
| 14 |
def process_image(image, question):
|
| 15 |
-
|
| 16 |
-
|
|
|
|
|
|
|
| 17 |
|
| 18 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
|
| 19 |
model.to(device)
|
| 20 |
|
| 21 |
# prepare decoder inputs
|
|
@@ -35,6 +38,9 @@ def process_image(image, question):
|
|
| 35 |
return_dict_in_generate=True,
|
| 36 |
)
|
| 37 |
|
|
|
|
|
|
|
|
|
|
| 38 |
sequence = processor.batch_decode(outputs.sequences)[0]
|
| 39 |
sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
|
| 40 |
sequence = re.sub(r"<.*?>", "", sequence, count=1).strip() # remove first task start token
|
|
|
|
| 12 |
|
| 13 |
|
| 14 |
def process_image(image, question):
|
| 15 |
+
repo_id = "naver-clova-ix/donut-base-finetuned-docvqa"
|
| 16 |
+
print(f"Model repo: {repo_id}")
|
| 17 |
+
processor = DonutProcessor.from_pretrained(repo_id)
|
| 18 |
+
model = VisionEncoderDecoderModel.from_pretrained(repo_id)
|
| 19 |
|
| 20 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
| 21 |
+
print(f"Device used: {device}")
|
| 22 |
model.to(device)
|
| 23 |
|
| 24 |
# prepare decoder inputs
|
|
|
|
| 38 |
return_dict_in_generate=True,
|
| 39 |
)
|
| 40 |
|
| 41 |
+
print(outputs)
|
| 42 |
+
print(outputs.sequences)
|
| 43 |
+
|
| 44 |
sequence = processor.batch_decode(outputs.sequences)[0]
|
| 45 |
sequence = sequence.replace(processor.tokenizer.eos_token, "").replace(processor.tokenizer.pad_token, "")
|
| 46 |
sequence = re.sub(r"<.*?>", "", sequence, count=1).strip() # remove first task start token
|