Spaces:

seemapatil
/

text_tagging

Build error

seemapatil commited on Jul 4, 2023

Commit

a3ff196

1 Parent(s): 6e1398d

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,23 +4,19 @@ import json
 # Read requirements.txt file
 with open('requirements.txt', 'r') as req_file:
-    requirements = req_file.read().splitlines()
-# Install the required libraries
-for requirement in requirements:
-    # Use your preferred method to install the libraries
-    # e.g., subprocess, pip, etc.
 # Load and preprocess the IMDB dataset in JSON format
 with open('IMDB Dataset.json', 'r') as json_file:
-    imdb_data = json.load(json_file)
 # Select only 30 words from the dataset
 preprocessed_data = []
 for entry in imdb_data:
-    text = entry['text']
-    words = text.split()[:30]
-    preprocessed_entry = {
         'text': ' '.join(words),
         'label': entry['label']
     }
@@ -32,7 +28,7 @@ dataset = load_dataset('json', data=preprocessed_data)
 # Tokenize the dataset
 tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
 def tokenize_function(examples):
-    return tokenizer(examples["text"], padding="max_length", truncation=True)
 tokenized_datasets = dataset.map(tokenize_function, batched=True)

 # Read requirements.txt file
 with open('requirements.txt', 'r') as req_file:
+        requirements = req_file.read().splitlines()
 # Load and preprocess the IMDB dataset in JSON format
 with open('IMDB Dataset.json', 'r') as json_file:
+        imdb_data = json.load(json_file)
 # Select only 30 words from the dataset
 preprocessed_data = []
 for entry in imdb_data:
+        text = entry['text']
+        words = text.split()[:30]
+        preprocessed_entry = {
         'text': ' '.join(words),
         'label': entry['label']
     }
 # Tokenize the dataset
 tokenizer = AutoTokenizer.from_pretrained("bigscience/bloom-560m")
 def tokenize_function(examples):
+        return tokenizer(examples["text"], padding="max_length", truncation=True)
 tokenized_datasets = dataset.map(tokenize_function, batched=True)