Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -133,7 +133,7 @@ class CustomDataset(torch.utils.data.Dataset):
|
|
133 |
|
134 |
def __getitem__(self, idx):
|
135 |
try:
|
136 |
-
text = self.data[idx]['content']
|
137 |
label = self.data[idx].get('label', 0)
|
138 |
|
139 |
encoding = self.tokenizer.encode_plus(
|
@@ -219,10 +219,8 @@ def deploy_model(model, tokenizer):
|
|
219 |
deployment_script = f'''
|
220 |
import torch
|
221 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
222 |
-
|
223 |
model = AutoModelForSequenceClassification.from_pretrained('./model')
|
224 |
tokenizer = AutoTokenizer.from_pretrained('./model')
|
225 |
-
|
226 |
def predict(text):
|
227 |
encoding = tokenizer.encode_plus(
|
228 |
text,
|
@@ -232,7 +230,6 @@ def deploy_model(model, tokenizer):
|
|
232 |
return_attention_mask=True,
|
233 |
return_tensors='pt',
|
234 |
)
|
235 |
-
|
236 |
input_ids = encoding['input_ids'].to('cuda' if torch.cuda.is_available() else 'cpu')
|
237 |
attention_mask = encoding['attention_mask'].to('cuda' if torch.cuda.is_available() else 'cpu')
|
238 |
outputs = model(input_ids, attention_mask=attention_mask)
|
@@ -266,13 +263,13 @@ iface = gr.Interface(
|
|
266 |
fn=gradio_interface,
|
267 |
inputs=[
|
268 |
gr.Textbox(lines=5, label="Enter comma-separated URLs"),
|
269 |
-
gr.File(label="Upload file (including zip files)", type="filepath"),
|
270 |
gr.Textbox(lines=10, label="Enter or paste large text"),
|
271 |
gr.Textbox(label="Model name", value="distilbert-base-uncased"),
|
272 |
gr.Number(label="Batch size", value=8),
|
273 |
gr.Number(label="Epochs", value=3),
|
274 |
],
|
275 |
-
|
276 |
title="Dataset Creation and Model Training",
|
277 |
description="Enter URLs, upload files (including zip files), and/or paste text to create a dataset and train a model.",
|
278 |
)
|
|
|
133 |
|
134 |
def __getitem__(self, idx):
|
135 |
try:
|
136 |
+
text = self.data[idx]['content ']
|
137 |
label = self.data[idx].get('label', 0)
|
138 |
|
139 |
encoding = self.tokenizer.encode_plus(
|
|
|
219 |
deployment_script = f'''
|
220 |
import torch
|
221 |
from transformers import AutoModelForSequenceClassification, AutoTokenizer
|
|
|
222 |
model = AutoModelForSequenceClassification.from_pretrained('./model')
|
223 |
tokenizer = AutoTokenizer.from_pretrained('./model')
|
|
|
224 |
def predict(text):
|
225 |
encoding = tokenizer.encode_plus(
|
226 |
text,
|
|
|
230 |
return_attention_mask=True,
|
231 |
return_tensors='pt',
|
232 |
)
|
|
|
233 |
input_ids = encoding['input_ids'].to('cuda' if torch.cuda.is_available() else 'cpu')
|
234 |
attention_mask = encoding['attention_mask'].to('cuda' if torch.cuda.is_available() else 'cpu')
|
235 |
outputs = model(input_ids, attention_mask=attention_mask)
|
|
|
263 |
fn=gradio_interface,
|
264 |
inputs=[
|
265 |
gr.Textbox(lines=5, label="Enter comma-separated URLs"),
|
266 |
+
gr.File(label="Upload file (including zip files)", type="filepath"),
|
267 |
gr.Textbox(lines=10, label="Enter or paste large text"),
|
268 |
gr.Textbox(label="Model name", value="distilbert-base-uncased"),
|
269 |
gr.Number(label="Batch size", value=8),
|
270 |
gr.Number(label="Epochs", value=3),
|
271 |
],
|
272 |
+
outputs=gr.File(label="Download Combined Dataset"),
|
273 |
title="Dataset Creation and Model Training",
|
274 |
description="Enter URLs, upload files (including zip files), and/or paste text to create a dataset and train a model.",
|
275 |
)
|