acecalisto3 commited on
Commit
98cf6a3
·
verified ·
1 Parent(s): 15f4f9d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -6
app.py CHANGED
@@ -133,7 +133,7 @@ class CustomDataset(torch.utils.data.Dataset):
133
 
134
  def __getitem__(self, idx):
135
  try:
136
- text = self.data[idx]['content']
137
  label = self.data[idx].get('label', 0)
138
 
139
  encoding = self.tokenizer.encode_plus(
@@ -219,10 +219,8 @@ def deploy_model(model, tokenizer):
219
  deployment_script = f'''
220
  import torch
221
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
222
-
223
  model = AutoModelForSequenceClassification.from_pretrained('./model')
224
  tokenizer = AutoTokenizer.from_pretrained('./model')
225
-
226
  def predict(text):
227
  encoding = tokenizer.encode_plus(
228
  text,
@@ -232,7 +230,6 @@ def deploy_model(model, tokenizer):
232
  return_attention_mask=True,
233
  return_tensors='pt',
234
  )
235
-
236
  input_ids = encoding['input_ids'].to('cuda' if torch.cuda.is_available() else 'cpu')
237
  attention_mask = encoding['attention_mask'].to('cuda' if torch.cuda.is_available() else 'cpu')
238
  outputs = model(input_ids, attention_mask=attention_mask)
@@ -266,13 +263,13 @@ iface = gr.Interface(
266
  fn=gradio_interface,
267
  inputs=[
268
  gr.Textbox(lines=5, label="Enter comma-separated URLs"),
269
- gr.File(label="Upload file (including zip files)", type="filepath"), # Removed max_size
270
  gr.Textbox(lines=10, label="Enter or paste large text"),
271
  gr.Textbox(label="Model name", value="distilbert-base-uncased"),
272
  gr.Number(label="Batch size", value=8),
273
  gr.Number(label="Epochs", value=3),
274
  ],
275
- outputs=gr.File(label="Download Combined Dataset"),
276
  title="Dataset Creation and Model Training",
277
  description="Enter URLs, upload files (including zip files), and/or paste text to create a dataset and train a model.",
278
  )
 
133
 
134
  def __getitem__(self, idx):
135
  try:
136
+ text = self.data[idx]['content ']
137
  label = self.data[idx].get('label', 0)
138
 
139
  encoding = self.tokenizer.encode_plus(
 
219
  deployment_script = f'''
220
  import torch
221
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
 
222
  model = AutoModelForSequenceClassification.from_pretrained('./model')
223
  tokenizer = AutoTokenizer.from_pretrained('./model')
 
224
  def predict(text):
225
  encoding = tokenizer.encode_plus(
226
  text,
 
230
  return_attention_mask=True,
231
  return_tensors='pt',
232
  )
 
233
  input_ids = encoding['input_ids'].to('cuda' if torch.cuda.is_available() else 'cpu')
234
  attention_mask = encoding['attention_mask'].to('cuda' if torch.cuda.is_available() else 'cpu')
235
  outputs = model(input_ids, attention_mask=attention_mask)
 
263
  fn=gradio_interface,
264
  inputs=[
265
  gr.Textbox(lines=5, label="Enter comma-separated URLs"),
266
+ gr.File(label="Upload file (including zip files)", type="filepath"),
267
  gr.Textbox(lines=10, label="Enter or paste large text"),
268
  gr.Textbox(label="Model name", value="distilbert-base-uncased"),
269
  gr.Number(label="Batch size", value=8),
270
  gr.Number(label="Epochs", value=3),
271
  ],
272
+ outputs=gr.File(label="Download Combined Dataset"),
273
  title="Dataset Creation and Model Training",
274
  description="Enter URLs, upload files (including zip files), and/or paste text to create a dataset and train a model.",
275
  )