|
import subprocess |
|
import sys |
|
|
|
try: |
|
import sentencepiece |
|
except ImportError: |
|
subprocess.check_call([sys.executable, "-m", "pip", "install", "sentencepiece"]) |
|
import sentencepiece |
|
|
|
import gradio as gr |
|
import torch |
|
from transformers import XLNetTokenizer, XLNetModel |
|
import numpy as np |
|
import joblib |
|
|
|
|
|
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') |
|
tokenizer = XLNetTokenizer.from_pretrained('xlnet-base-cased') |
|
xlnet_model = XLNetModel.from_pretrained('xlnet-base-cased').to(device) |
|
|
|
random_forest_classifier = joblib.load("random_forest_model.pkl") |
|
|
|
def get_embedding(text): |
|
inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512).to(device) |
|
with torch.no_grad(): |
|
outputs = xlnet_model(**inputs) |
|
return outputs.last_hidden_state.mean(dim=1).squeeze().cpu().numpy() |
|
|
|
def predict_complexity(sentence, target_word): |
|
try: |
|
sentence_embedding = get_embedding(sentence) |
|
word_embedding = get_embedding(target_word) |
|
combined_embedding = np.concatenate([sentence_embedding, word_embedding]).reshape(1, -1) |
|
prediction = random_forest_classifier.predict(combined_embedding)[0] |
|
return f"π Predicted Complexity Level: **{prediction}**" |
|
except Exception as e: |
|
return f"β Error: {str(e)}" |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## β¨ Word Complexity Predictor") |
|
with gr.Row(): |
|
sentence_input = gr.Textbox(label="Full Sentence", placeholder="Type a full sentence...") |
|
word_input = gr.Textbox(label="Target Word", placeholder="Type the target word...") |
|
output = gr.Markdown() |
|
gr.Button("Predict Complexity").click(predict_complexity, [sentence_input, word_input], output) |
|
|
|
demo.launch() |
|
|