Spaces:
Build error
Build error
| # Install necessary libraries | |
| import os | |
| import subprocess | |
| # Function to install a package if it is not already installed | |
| def install(package): | |
| subprocess.check_call([os.sys.executable, "-m", "pip", "install", package]) | |
| # Ensure the necessary packages are installed | |
| install("transformers") | |
| install("torch") | |
| install("pandas") | |
| install("scikit-learn") | |
| install("gradio") | |
| import os | |
| import pandas as pd | |
| import gradio as gr | |
| from transformers import AutoModel, AutoTokenizer | |
| import torch | |
| from sklearn.model_selection import train_test_split | |
| # Load your dataset | |
| def load_dataset(): | |
| file_path = "Valid-part-2.xlsx" | |
| print(f"Current working directory: {os.getcwd()}") | |
| if not os.path.exists(file_path): | |
| raise FileNotFoundError(f"Dataset not found. Please ensure that '{file_path}' exists.") | |
| try: | |
| df = pd.read_excel(file_path) | |
| print("Columns in the dataset:", df.columns.tolist()) | |
| return df | |
| except Exception as e: | |
| print(f"Error loading dataset: {e}") | |
| return None | |
| # Preprocess the data | |
| def preprocess_data(df): | |
| # Example preprocessing: You can add more steps as needed | |
| # For now, we're just returning the dataframe as is | |
| return df | |
| # Train your model | |
| def train_model(df): | |
| # Split the dataset into training and testing sets | |
| train_df, test_df = train_test_split(df, test_size=0.2, random_state=42) | |
| # Load your pre-trained model and tokenizer from Hugging Face | |
| tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base") | |
| model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base") | |
| # Normally, you would fine-tune the model here with your training data | |
| # Since this is an example, we're returning the model as is | |
| return model | |
| # Define the Gradio interface function | |
| def predict(input_text): | |
| # Load the model and tokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("Alibaba-NLP/gte-multilingual-base") | |
| model = AutoModel.from_pretrained("Alibaba-NLP/gte-multilingual-base") | |
| # Tokenize input and make predictions | |
| inputs = tokenizer(input_text, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model(**inputs) | |
| # Process the outputs as needed (e.g., extracting relevant information) | |
| return outputs.last_hidden_state | |
| # Build the Gradio interface | |
| def build_interface(): | |
| df = load_dataset() # Load your dataset | |
| if df is None: | |
| return None | |
| df = preprocess_data(df) # Preprocess the dataset | |
| model = train_model(df) # Train your model | |
| iface = gr.Interface( | |
| fn=predict, | |
| inputs=gr.inputs.Textbox(lines=2, placeholder="Enter text here..."), | |
| outputs="text" | |
| ) | |
| return iface | |
| # Run the Gradio interface | |
| if __name__ == "__main__": | |
| iface = build_interface() | |
| if iface: | |
| iface.launch() | |
| else: | |
| print("Failed to build the Gradio interface. Please check the dataset and model.") | |