Spaces:

awacke1
/

TorchMLTrainerStreamlit

Runtime error

App Files Files Community

awacke1 commited on Mar 2, 2023

Commit

76df0cd

1 Parent(s): 0498318

Create app.py

Browse files

Files changed (1) hide show

app.py +69 -0

app.py ADDED Viewed

	@@ -0,0 +1,69 @@

+import pandas as pd
+import torch
+import streamlit as st
+from transformers import DistilBertForSequenceClassification, DistilBertTokenizerFast, Trainer, TrainingArguments
+# Define the health care sentiment classification data
+data = [
+    {"text": "The health care services were excellent and the staff was very friendly.", "label": 1},
+    {"text": "I had a bad experience with the health care services. The doctors were not knowledgeable and the staff was rude.", "label": 0},
+    {"text": "The health care services were okay, but the waiting time was too long.", "label": 1},
+    {"text": "I was very satisfied with the health care services. The doctors were very professional and the staff was helpful.", "label": 1},
+    {"text": "The health care services were average. The doctors were not exceptional and the staff was not very friendly.", "label": 0}
+]
+# Convert the data to a pandas dataframe
+df = pd.DataFrame(data)
+# Load the pre-trained model and tokenizer
+model = DistilBertForSequenceClassification.from_pretrained('distilbert-base-uncased-finetuned-sst-2-english')
+tokenizer = DistilBertTokenizerFast.from_pretrained('distilbert-base-uncased')
+# Tokenize the text and encode the labels
+tokenized_inputs = tokenizer(list(df.text), padding=True, truncation=True, max_length=512)
+tokenized_labels = torch.tensor(df.label)
+# Define the training arguments
+training_args = TrainingArguments(
+    output_dir='./results',
+    num_train_epochs=3,
+    per_device_train_batch_size=16,
+    per_device_eval_batch_size=64,
+    warmup_steps=500,
+    weight_decay=0.01,
+    logging_dir='./logs',
+    logging_steps=10,
+    load_best_model_at_end=True,
+    evaluation_strategy='steps',
+    eval_steps=100,
+    metric_for_best_model='accuracy'
+)
+# Define the trainer
+trainer = Trainer(
+    model=model,
+    args=training_args,
+    train_dataset=tokenized_inputs,
+    train_labels=tokenized_labels
+)
+# Train the model
+trainer.train()
+# Evaluate the model on a sample text
+sample_text = "I had a great experience with the health care services. The doctors were very knowledgeable and the staff was friendly."
+encoded_sample_text = tokenizer.encode(sample_text, return_tensors='pt')
+logits = model(encoded_sample_text)[0]
+probabilities = logits.softmax(dim=1)
+sentiment = 'positive' if probabilities[0][1] > probabilities[0][0] else 'negative'
+# Create the Streamlit app
+st.title("Health Care Sentiment Classifier")
+text_input = st.text_input("Enter some text to classify:")
+if st.button("Classify"):
+    encoded_text = tokenizer.encode(text_input, return_tensors='pt')
+    logits = model(encoded_text)[0]
+    probabilities = logits.softmax(dim=1)
+    sentiment = 'positive' if probabilities[0][1] > probabilities[0][0] else 'negative'
+    st.write(f"The sentiment of the text is {sentiment}.")
+st.write(f"For example, the sentiment of '{sample_text}' is {sentiment}.")