#实现功能：1忽略奇怪符号，直接删掉 2.怎么引用fine-tune的model

import streamlit as st
from transformers import pipeline
import re

# Function to remove strange characters from the input text
def clean_text(text):
    # Only keep alphanumeric characters and some punctuation
    return re.sub(r"[^a-zA-Z0-9\s.,!?']", "", text)

# Load the text summarization pipeline
try:
    summarizer = pipeline("summarization", model="syndi-models/titlewave-t5-base")
    summarizer_loaded = True
except ValueError as e:
    st.error(f"Error loading summarization model: {e}")
    summarizer_loaded = False

# Load the Question classification pipeline
model_name = "elozano/bert-base-cased-news-category"
try:
    classifier = pipeline("text-classification", model=model_name, return_all_scores=True)
    classifier_loaded = True
except ValueError as e:
    st.error(f"Error loading classification model: {e}")
    classifier_loaded = False

# Streamlit app title
st.title("Question Rephrase and Classification")

# Input text for summarization and classification
text_input = st.text_area("Enter long question to rephrase and classify:", "")

if st.button("Process"):
    if summarizer_loaded and classifier_loaded and text_input:
        try:
            # Clean the text input
            cleaned_text = clean_text(text_input)
            
            # Perform text summarization
            summary = summarizer(cleaned_text, max_length=130, min_length=30, do_sample=False)
            summarized_text = summary[0]['summary_text']
        except Exception as e:
            st.error(f"Error during summarization: {e}")
            summarized_text = ""

        if summarized_text:
            try:
                # Perform question classification on the summarized text
                results = classifier(summarized_text)[0]
                # Find the category with the highest score
                max_score = max(results, key=lambda x: x['score'])
                st.write("Rephrased Text:", summarized_text)
                st.write("Category:", max_score['label'])
                st.write("Score:", max_score['score'])
            except Exception as e:
                st.error(f"Error during classification: {e}")
    else:
        st.warning("Please enter text to process and ensure both models are loaded.")