File size: 10,171 Bytes
00e773a
018e46d
 
 
301eb87
018e46d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c256c10
018e46d
 
 
c256c10
018e46d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c256c10
00e773a
 
018e46d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c256c10
018e46d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
import gradio as gr
from transformers import pipeline, AutoModelForCausalLM, AutoTokenizer
import huggingface_hub
import os
import torch

# --- Configuration ---
MODEL_ID = "Fastweb/FastwebMIIA-7B"
HF_TOKEN = os.getenv("HF_TOKEN")  # For Hugging Face Spaces, set this as a Secret

# Global variable to store the pipeline
text_generator_pipeline = None
model_load_error = None

# --- Hugging Face Login and Model Loading ---
def load_model_and_pipeline():
    global text_generator_pipeline, model_load_error
    if text_generator_pipeline is not None:
        return True # Already loaded

    if not HF_TOKEN:
        model_load_error = "Hugging Face token (HF_TOKEN) not found in Space secrets. Please add it."
        print(f"ERROR: {model_load_error}")
        return False

    try:
        print(f"Attempting to login to Hugging Face Hub with token...")
        huggingface_hub.login(token=HF_TOKEN)
        print("Login successful.")

        print(f"Loading tokenizer for {MODEL_ID}...")
        # trust_remote_code is necessary for some models that define custom architectures/code
        tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)
        print("Tokenizer loaded.")

        print(f"Loading model {MODEL_ID}...")
        # For large models, specify dtype and device_map
        # device_map="auto" will try to use GPU if available, otherwise CPU
        # torch_dtype="auto" or torch.bfloat16 (if supported by hardware) can save memory
        # On CPU Spaces (free tier), this will be VERY slow or might OOM.
        # You might need to use quantization (e.g., bitsandbytes) for CPU, but that's more complex.
        model = AutoModelForCausalLM.from_pretrained(
            MODEL_ID,
            trust_remote_code=True,
            torch_dtype="auto", # or torch.bfloat16 if on A10G or similar
            device_map="auto" # "auto" is good for single/multi GPU or CPU fallback
        )
        print("Model loaded.")

        # MIIA is an instruct/chat model, so text-generation is the appropriate task
        text_generator_pipeline = pipeline(
            "text-generation",
            model=model,
            tokenizer=tokenizer,
            # device=0 if torch.cuda.is_available() else -1 # device_map handles this
        )
        print("Text generation pipeline created successfully.")
        model_load_error = None
        return True
    except Exception as e:
        model_load_error = f"Error loading model/pipeline: {str(e)}. Check model name, token, and Space resources (RAM/GPU)."
        print(f"ERROR: {model_load_error}")
        text_generator_pipeline = None # Ensure it's None on error
        return False

# --- Text Analysis Function ---
def analyze_text(text_input, file_upload, custom_instruction, max_new_tokens, temperature, top_p):
    global text_generator_pipeline, model_load_error

    if text_generator_pipeline is None:
        if model_load_error:
            return f"Model not loaded. Error: {model_load_error}"
        else:
            return "Model is not loaded. Please ensure HF_TOKEN is set and the Space has enough resources."

    content_to_analyze = ""
    if file_upload is not None:
        try:
            # file_upload is a TemporaryFileWrapper object, .name gives the path
            with open(file_upload.name, 'r', encoding='utf-8') as f:
                content_to_analyze = f.read()
            if not content_to_analyze.strip() and not text_input.strip(): # if file is empty and no text input
                 return "Uploaded file is empty and no direct text input provided. Please provide some text."
            elif not content_to_analyze.strip() and text_input.strip(): # if file empty but text input has content
                content_to_analyze = text_input
            # If file has content, it will be used. If user also typed, file content takes precedence.
            # We could add logic to concatenate or choose, but this is simpler.

        except Exception as e:
            return f"Error reading uploaded file: {str(e)}"
    elif text_input:
        content_to_analyze = text_input
    else:
        return "Please provide text directly or upload a document."

    if not content_to_analyze.strip():
        return "Input text is empty."

    # FastwebMIIA is an instruct model. It expects prompts like Alpaca.
    # Structure:
    # Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
    # ### Instruction:
    # {your instruction}
    # ### Input:
    # {your text}
    # ### Response:
    # {model generates this}

    prompt = f"""Di seguito è riportata un'istruzione che descrive un task, abbinata a un input che fornisce un contesto più ampio. Scrivi una risposta che completi la richiesta in modo appropriato.

### Istruzione:
{custom_instruction}

### Input:
{content_to_analyze}

### Risposta:"""

    # For English, you might change the preamble:
    # prompt = f"""Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
    # ### Instruction:
    # {custom_instruction}
    # ### Input:
    # {content_to_analyze}
    # ### Response:"""


    print(f"\n--- Sending to Model ---")
    print(f"Prompt:\n{prompt}")
    print(f"Max New Tokens: {max_new_tokens}, Temperature: {temperature}, Top P: {top_p}")
    print("------------------------\n")

    try:
        # Note: text-generation pipelines often return the prompt + completion.
        # We might need to strip the prompt from the output if desired.
        generated_outputs = text_generator_pipeline(
            prompt,
            max_new_tokens=int(max_new_tokens),
            do_sample=True,
            temperature=float(temperature) if float(temperature) > 0 else 0.7, # temp 0 means greedy
            top_p=float(top_p),
            num_return_sequences=1
        )
        response = generated_outputs[0]['generated_text']

        # Often, the response includes the prompt. Let's try to return only the new part.
        # The model should generate text after "### Risposta:"
        answer_marker = "### Risposta:"
        if answer_marker in response:
            return response.split(answer_marker, 1)[1].strip()
        else:
            # Fallback if the marker isn't found (shouldn't happen with good prompting)
            return response # Or you could try to remove the original prompt string

    except Exception as e:
        return f"Error during text generation: {str(e)}"

# --- Gradio Interface ---
with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(f"""
    # 📝 Text Analysis with {MODEL_ID}
    Test the capabilities of the `{MODEL_ID}` model for text analysis tasks on Italian or English texts.
    Provide an instruction and your text (directly or via upload).
    **Important:** Model loading can take a few minutes, especially on the first run or on CPU.
    This app is best run on a Hugging Face Space with GPU resources for this model size.
    """)

    with gr.Row():
        status_textbox = gr.Textbox(label="Model Status", value="Attempting to load model...", interactive=False)

    with gr.Tab("Text Input & Analysis"):
        with gr.Row():
            with gr.Column(scale=2):
                instruction_prompt = gr.Textbox(
                    label="Instruction for the Model (e.g., 'Riassumi questo testo', 'Identify main topics', 'Translate to English')",
                    value="Riassumi questo testo in 3 frasi concise.",
                    lines=3
                )
                text_area_input = gr.Textbox(label="Enter Text Directly", lines=10, placeholder="Paste your text here...")
                file_input = gr.File(label="Or Upload a Document (.txt)", file_types=['.txt'])
            with gr.Column(scale=3):
                output_text = gr.Textbox(label="Model Output", lines=20, interactive=False)

        with gr.Accordion("Advanced Generation Parameters", open=False):
            max_new_tokens_slider = gr.Slider(minimum=50, maximum=1024, value=256, step=10, label="Max New Tokens")
            temperature_slider = gr.Slider(minimum=0.1, maximum=1.5, value=0.7, step=0.1, label="Temperature (higher is more creative)")
            top_p_slider = gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top P (nucleus sampling)")

        analyze_button = gr.Button("🧠 Analyze Text", variant="primary")

    analyze_button.click(
        fn=analyze_text,
        inputs=[text_area_input, file_input, instruction_prompt, max_new_tokens_slider, temperature_slider, top_p_slider],
        outputs=output_text
    )

    # Load the model when the app starts.
    # This will update the status_textbox after attempting to load.
    def startup_load_model():
        if load_model_and_pipeline():
            return "Model loaded successfully and ready."
        else:
            return f"Failed to load model. Error: {model_load_error or 'Unknown error during startup.'}"

    demo.load(startup_load_model, outputs=status_textbox)


if __name__ == "__main__":
    # For local testing (ensure HF_TOKEN is set as an environment variable or you're logged in via CLI)
    # You would run: HF_TOKEN="your_hf_token_here" python app.py
    # If not set, it will fail unless you've done `huggingface-cli login`
    if not HF_TOKEN and "HF_TOKEN" not in os.environ:
        print("WARNING: HF_TOKEN environment variable not set.")
        print("For local execution, either set HF_TOKEN or ensure you are logged in via 'huggingface-cli login'.")
        # Attempt to use CLI login if available
        try:
            HF_TOKEN = huggingface_hub.HfApi().token
            if HF_TOKEN:
                print("Using token from huggingface-cli login.")
            else:
                print("Could not retrieve token from CLI login. Model access might fail.")
        except Exception as e:
            print(f"Could not check CLI login status: {e}. Model access might fail.")

    demo.queue().launch(debug=True, share=False) # share=True for public link if local