import gradio as gr from llama_cpp import Llama from huggingface_hub import hf_hub_download import json import re def load_model(): repo_id = "forestav/gguf_lora_model" model_file = "unsloth.F16.gguf" local_path = hf_hub_download( repo_id=repo_id, filename=model_file ) print(f"Loading model from: {local_path}") model = Llama( model_path=local_path, n_ctx=2048, n_threads=8 ) return model # Enhanced generation with multiple modes def generate_response(message, history, mode='chat'): # Preprocessing based on mode if mode == 'code': system_prompt = "You are an expert coding assistant. Provide clean, efficient code solutions." elif mode == 'creative': system_prompt = "You are a creative writing assistant. Generate imaginative and engaging content." elif mode == 'analytical': system_prompt = "You are an analytical assistant. Provide deep, structured insights and reasoning." else: system_prompt = "You are a helpful AI assistant." # Prepare messages with system context messages = [ {"role": "system", "content": system_prompt}, *[{"role": "user" if i % 2 == 0 else "assistant", "content": msg} for i, msg in enumerate(sum(history, []))], {"role": "user", "content": message} ] # Generate response response = model.create_chat_completion( messages=messages, max_tokens=512, temperature=0.7, top_p=0.95, ) return response['choices'][0]['message']['content'] # Extract structured data from text def extract_structured_data(text): try: # Try to extract JSON-like structures json_match = re.search(r'\{.*\}', text, re.DOTALL) if json_match: try: return json.loads(json_match.group(0)) except json.JSONDecodeError: pass # Fall back to custom parsing for key-value pairs data = {} for line in text.split('\n'): if ':' in line: key, value = line.split(':', 1) data[key.strip()] = value.strip() return data except Exception as e: return {"error": str(e)} # Create Gradio interface with multiple tabs def create_interface(): with gr.Blocks() as demo: gr.Markdown("# Multi-Mode AI Assistant") with gr.Tabs(): # Chat Interface with gr.TabItem("Conversational Chat"): chat_interface = gr.ChatInterface( fn=lambda message, history: generate_response(message, history, 'chat'), title="Conversational AI", description="General-purpose conversation mode" ) # Code Generation Tab with gr.TabItem("Code Assistant"): code_interface = gr.ChatInterface( fn=lambda message, history: generate_response(message, history, 'code'), title="AI Code Generator", description="Generate code snippets and solve programming challenges" ) # Creative Writing Tab with gr.TabItem("Creative Writing"): creative_interface = gr.ChatInterface( fn=lambda message, history: generate_response(message, history, 'creative'), title="Creative Writing Assistant", description="Generate stories, poems, and creative content" ) # Data Extraction Tab with gr.TabItem("Data Extractor"): with gr.Row(): text_input = gr.Textbox(label="Input Text") extract_btn = gr.Button("Extract Structured Data") json_output = gr.JSON(label="Extracted Data") extract_btn.click( fn=extract_structured_data, inputs=text_input, outputs=json_output ) return demo # Load model globally print("Starting model loading...") model = load_model() print("Model loaded successfully!") # Create and launch the interface demo = create_interface() demo.launch( server_name="0.0.0.0", # Necessary for Spaces server_port=7860, # Standard port for Spaces share=True # Don't need share link in Spaces )