Spaces:
Runtime error
Runtime error
File size: 2,266 Bytes
568ea9a 29cd488 568ea9a 23d75e8 c5be12e 23d75e8 29cd488 c5be12e 29cd488 23d75e8 568ea9a 3b8e1b8 568ea9a 3b8e1b8 568ea9a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import gradio as gr
from setfit import SetFitModel
def cortar_en_bloques(texto, longitud_bloque):
palabras = texto.split()
bloques = []
bloque_actual = []
for palabra in palabras:
bloque_actual.append(palabra)
if len(bloque_actual) == longitud_bloque:
bloques.append(" ".join(bloque_actual))
bloque_actual = []
# Si queda un bloque parcial al final, agregarlo
if bloque_actual:
bloques.append(" ".join(bloque_actual))
return bloques
# Cargar el modelo
model = SetFitModel.from_pretrained("desarrolloasesoreslocales/SetFitPruebaRecorte")
# Mapear las etiquetas
# Definir la función de predicción
def predict(payload):
recorte_general = ""
# Crear chunks
chunks = cortar_en_bloques(ocr_text, 150)
first = -1
margin = int(len(chunks) * 0.25)
chunks_removable = chunks[:margin] + chunks[-margin:]
for i in range(len(chunks)):
print('Recortando -', round((i/len(chunks))*100), '%')
if chunks[i] not in chunks_removable or trim_model.predict([chunks[i]]).item() == 1:
if first == -1:
first = i
recorte_general += chunks[i] + " "
if first > 0:
recorte_general = chunks[first-1] + recorte_general
print(100, '%')
recorte_final = ""
# Definir tamñano de fragmentos de texto
# text_splitter2 = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=0, length_function=len)
# Crear chunks
chunks2 = cortar_en_bloques(recorte_general, 80)
margin_s = int(len(chunks2) * 0.1)
margin_e = int(len(chunks2) * 0.1)
# if margin_s > 1:
chunks_removable2 = chunks2[:margin_s] + chunks2[-margin_e:]
# chunks_removable2 = chunks2[-margin_e:]
for i in range(len(chunks2)):
print('Recortando -', round((i/len(chunks2))*100), '%')
if chunks2[i] not in chunks_removable2 or trim_model.predict([chunks2[i]]).item() == 1:
recorte_final += chunks2[i] + " "
print(100, '%')
return recorte_final
# Crear una interfaz Gradio
iface = gr.Interface(
fn=predict,
inputs=gr.Textbox(),
outputs=gr.Textbox(),
live=False,
title="Recortador de Texto"
)
# Iniciar la interfaz Gradio
iface.launch() |