Spaces:

acecalisto3
/

urld

Running

App Files Files Community

acecalisto3 commited on Dec 3, 2024

Commit

98cf6a3

verified ·

1 Parent(s): 15f4f9d

Update app.py

Browse files

Files changed (1) hide show

app.py +3 -6

app.py CHANGED Viewed

@@ -133,7 +133,7 @@ class CustomDataset(torch.utils.data.Dataset):
     def __getitem__(self, idx):
         try:
-            text = self.data[idx]['content']
             label = self.data[idx].get('label', 0)
             encoding = self.tokenizer.encode_plus(
@@ -219,10 +219,8 @@ def deploy_model(model, tokenizer):
         deployment_script = f'''
         import torch
         from transformers import AutoModelForSequenceClassification, AutoTokenizer
         model = AutoModelForSequenceClassification.from_pretrained('./model')
         tokenizer = AutoTokenizer.from_pretrained('./model')
         def predict(text):
             encoding = tokenizer.encode_plus(
                 text,
@@ -232,7 +230,6 @@ def deploy_model(model, tokenizer):
                 return_attention_mask=True,
                 return_tensors='pt',
             )
             input_ids = encoding['input_ids'].to('cuda' if torch.cuda.is_available() else 'cpu')
             attention_mask = encoding['attention_mask'].to('cuda' if torch.cuda.is_available() else 'cpu')
             outputs = model(input_ids, attention_mask=attention_mask)
@@ -266,13 +263,13 @@ iface = gr.Interface(
     fn=gradio_interface,
     inputs=[
         gr.Textbox(lines=5, label="Enter comma-separated URLs"),
-        gr.File(label="Upload file (including zip files)", type="filepath"),  # Removed max_size
         gr.Textbox(lines=10, label="Enter or paste large text"),
         gr.Textbox(label="Model name", value="distilbert-base-uncased"),
         gr.Number(label="Batch size", value=8),
         gr.Number(label="Epochs", value=3),
     ],
-    outputs=gr.File(label="Download Combined Dataset"),
     title="Dataset Creation and Model Training",
     description="Enter URLs, upload files (including zip files), and/or paste text to create a dataset and train a model.",
 )

     def __getitem__(self, idx):
         try:
+            text = self.data[idx]['content ']
             label = self.data[idx].get('label', 0)
             encoding = self.tokenizer.encode_plus(
         deployment_script = f'''
         import torch
         from transformers import AutoModelForSequenceClassification, AutoTokenizer
         model = AutoModelForSequenceClassification.from_pretrained('./model')
         tokenizer = AutoTokenizer.from_pretrained('./model')
         def predict(text):
             encoding = tokenizer.encode_plus(
                 text,
                 return_attention_mask=True,
                 return_tensors='pt',
             )
             input_ids = encoding['input_ids'].to('cuda' if torch.cuda.is_available() else 'cpu')
             attention_mask = encoding['attention_mask'].to('cuda' if torch.cuda.is_available() else 'cpu')
             outputs = model(input_ids, attention_mask=attention_mask)
     fn=gradio_interface,
     inputs=[
         gr.Textbox(lines=5, label="Enter comma-separated URLs"),
+        gr.File(label="Upload file (including zip files)", type="filepath"),
         gr.Textbox(lines=10, label="Enter or paste large text"),
         gr.Textbox(label="Model name", value="distilbert-base-uncased"),
         gr.Number(label="Batch size", value=8),
         gr.Number(label="Epochs", value=3),
     ],
+ outputs=gr.File(label="Download Combined Dataset"),
     title="Dataset Creation and Model Training",
     description="Enter URLs, upload files (including zip files), and/or paste text to create a dataset and train a model.",
 )