Spaces:
Running
Running
File size: 10,171 Bytes
00e773a 018e46d 301eb87 018e46d c256c10 018e46d c256c10 018e46d c256c10 00e773a 018e46d c256c10 018e46d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 |
import gradio as gr
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import huggingface_hub
import os
import torch
# --- Configuration ---
MODEL_ID = "Fastweb/FastwebMIIA-7B"
HF_TOKEN = os.getenv("HF_TOKEN") # For Hugging Face Spaces, set this as a Secret
# Global variable to store the pipeline
text_generator_pipeline = None
model_load_error = None
# --- Hugging Face Login and Model Loading ---
def load_model_and_pipeline():
global text_generator_pipeline, model_load_error
if text_generator_pipeline is not None:
return True # Already loaded
if not HF_TOKEN:
model_load_error = "Hugging Face token (HF_TOKEN) not found in Space secrets. Please add it."
print(f"ERROR: {model_load_error}")
return False
try:
print(f"Attempting to login to Hugging Face Hub with token...")
huggingface_hub.login(token=HF_TOKEN)
print("Login successful.")
print(f"Loading tokenizer for {MODEL_ID}...")
# trust_remote_code is necessary for some models that define custom architectures/code
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
print("Tokenizer loaded.")
print(f"Loading model {MODEL_ID}...")
# For large models, specify dtype and device_map
# device_map="auto" will try to use GPU if available, otherwise CPU
# torch_dtype="auto" or torch.bfloat16 (if supported by hardware) can save memory
# On CPU Spaces (free tier), this will be VERY slow or might OOM.
# You might need to use quantization (e.g., bitsandbytes) for CPU, but that's more complex.
model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
trust_remote_code=True,
torch_dtype="auto", # or torch.bfloat16 if on A10G or similar
device_map="auto" # "auto" is good for single/multi GPU or CPU fallback
)
print("Model loaded.")
# MIIA is an instruct/chat model, so text-generation is the appropriate task
text_generator_pipeline = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
# device=0 if torch.cuda.is_available() else -1 # device_map handles this
)
print("Text generation pipeline created successfully.")
model_load_error = None
return True
except Exception as e:
model_load_error = f"Error loading model/pipeline: {str(e)}. Check model name, token, and Space resources (RAM/GPU)."
print(f"ERROR: {model_load_error}")
text_generator_pipeline = None # Ensure it's None on error
return False
# --- Text Analysis Function ---
def analyze_text(text_input, file_upload, custom_instruction, max_new_tokens, temperature, top_p):
global text_generator_pipeline, model_load_error
if text_generator_pipeline is None:
if model_load_error:
return f"Model not loaded. Error: {model_load_error}"
else:
return "Model is not loaded. Please ensure HF_TOKEN is set and the Space has enough resources."
content_to_analyze = ""
if file_upload is not None:
try:
# file_upload is a TemporaryFileWrapper object, .name gives the path
with open(file_upload.name, 'r', encoding='utf-8') as f:
content_to_analyze = f.read()
if not content_to_analyze.strip() and not text_input.strip(): # if file is empty and no text input
return "Uploaded file is empty and no direct text input provided. Please provide some text."
elif not content_to_analyze.strip() and text_input.strip(): # if file empty but text input has content
content_to_analyze = text_input
# If file has content, it will be used. If user also typed, file content takes precedence.
# We could add logic to concatenate or choose, but this is simpler.
except Exception as e:
return f"Error reading uploaded file: {str(e)}"
elif text_input:
content_to_analyze = text_input
else:
return "Please provide text directly or upload a document."
if not content_to_analyze.strip():
return "Input text is empty."
# FastwebMIIA is an instruct model. It expects prompts like Alpaca.
# Structure:
# Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
# ### Instruction:
# {your instruction}
# ### Input:
# {your text}
# ### Response:
# {model generates this}
prompt = f"""Di seguito è riportata un'istruzione che descrive un task, abbinata a un input che fornisce un contesto più ampio. Scrivi una risposta che completi la richiesta in modo appropriato.
### Istruzione:
{custom_instruction}
### Input:
{content_to_analyze}
### Risposta:"""
# For English, you might change the preamble:
# prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
# ### Instruction:
# {custom_instruction}
# ### Input:
# {content_to_analyze}
# ### Response:"""
print(f"\n--- Sending to Model ---")
print(f"Prompt:\n{prompt}")
print(f"Max New Tokens: {max_new_tokens}, Temperature: {temperature}, Top P: {top_p}")
print("------------------------\n")
try:
# Note: text-generation pipelines often return the prompt + completion.
# We might need to strip the prompt from the output if desired.
generated_outputs = text_generator_pipeline(
prompt,
max_new_tokens=int(max_new_tokens),
do_sample=True,
temperature=float(temperature) if float(temperature) > 0 else 0.7, # temp 0 means greedy
top_p=float(top_p),
num_return_sequences=1
)
response = generated_outputs[0]['generated_text']
# Often, the response includes the prompt. Let's try to return only the new part.
# The model should generate text after "### Risposta:"
answer_marker = "### Risposta:"
if answer_marker in response:
return response.split(answer_marker, 1)[1].strip()
else:
# Fallback if the marker isn't found (shouldn't happen with good prompting)
return response # Or you could try to remove the original prompt string
except Exception as e:
return f"Error during text generation: {str(e)}"
# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown(f"""
# 📝 Text Analysis with {MODEL_ID}
Test the capabilities of the `{MODEL_ID}` model for text analysis tasks on Italian or English texts.
Provide an instruction and your text (directly or via upload).
**Important:** Model loading can take a few minutes, especially on the first run or on CPU.
This app is best run on a Hugging Face Space with GPU resources for this model size.
""")
with gr.Row():
status_textbox = gr.Textbox(label="Model Status", value="Attempting to load model...", interactive=False)
with gr.Tab("Text Input & Analysis"):
with gr.Row():
with gr.Column(scale=2):
instruction_prompt = gr.Textbox(
label="Instruction for the Model (e.g., 'Riassumi questo testo', 'Identify main topics', 'Translate to English')",
value="Riassumi questo testo in 3 frasi concise.",
lines=3
)
text_area_input = gr.Textbox(label="Enter Text Directly", lines=10, placeholder="Paste your text here...")
file_input = gr.File(label="Or Upload a Document (.txt)", file_types=['.txt'])
with gr.Column(scale=3):
output_text = gr.Textbox(label="Model Output", lines=20, interactive=False)
with gr.Accordion("Advanced Generation Parameters", open=False):
max_new_tokens_slider = gr.Slider(minimum=50, maximum=1024, value=256, step=10, label="Max New Tokens")
temperature_slider = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature (higher is more creative)")
top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top P (nucleus sampling)")
analyze_button = gr.Button("🧠 Analyze Text", variant="primary")
analyze_button.click(
fn=analyze_text,
inputs=[text_area_input, file_input, instruction_prompt, max_new_tokens_slider, temperature_slider, top_p_slider],
outputs=output_text
)
# Load the model when the app starts.
# This will update the status_textbox after attempting to load.
def startup_load_model():
if load_model_and_pipeline():
return "Model loaded successfully and ready."
else:
return f"Failed to load model. Error: {model_load_error or 'Unknown error during startup.'}"
demo.load(startup_load_model, outputs=status_textbox)
if __name__ == "__main__":
# For local testing (ensure HF_TOKEN is set as an environment variable or you're logged in via CLI)
# You would run: HF_TOKEN="your_hf_token_here" python app.py
# If not set, it will fail unless you've done `huggingface-cli login`
if not HF_TOKEN and "HF_TOKEN" not in os.environ:
print("WARNING: HF_TOKEN environment variable not set.")
print("For local execution, either set HF_TOKEN or ensure you are logged in via 'huggingface-cli login'.")
# Attempt to use CLI login if available
try:
HF_TOKEN = huggingface_hub.HfApi().token
if HF_TOKEN:
print("Using token from huggingface-cli login.")
else:
print("Could not retrieve token from CLI login. Model access might fail.")
except Exception as e:
print(f"Could not check CLI login status: {e}. Model access might fail.")
demo.queue().launch(debug=True, share=False) # share=True for public link if local
|