Spaces:

ankanpy
/

Qwen3xOllama

Runtime error

App Files Files Community

ankanpy commited on 21 days ago

Commit

fffe03d

verified ·

1 Parent(s): 31ed265

Upload 4 files

Browse files

Files changed (4) hide show

Dockerfile +48 -0
app.py +280 -0
requirements.txt +1 -0
startup.sh +65 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,48 @@

+# 1. Base Image
+FROM python:3.11-slim
+# Set the volume for Ollama data
+# This is where Ollama will store its models and data
+# VOLUME /root/.ollama
+# 2. Set Environment Variables
+ENV PYTHONUNBUFFERED=1
+ENV GRADIO_SERVER_NAME="0.0.0.0"
+ENV OLLAMA_HOST="0.0.0.0:11434"
+# 3. Set Working Directory
+WORKDIR /app
+# 4. Install System Dependencies
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+    curl \
+    ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+# 5. Install Ollama
+RUN curl -fsSL https://ollama.com/install.sh | sh
+# 6. Copy Application Requirements
+COPY requirements.txt .
+# 7. Install Python Dependencies
+RUN pip install --no-cache-dir -r requirements.txt
+# 8. Copy Your Application Code
+COPY app.py .
+COPY startup.sh .
+# 9. Define Models to Pull (as an Argument with a default list)
+ARG OLLAMA_PULL_MODELS="qwen3:4b qwen3:1.7b qwen3:0.6b" # Default models if not overridden
+# Make the ARG available as an ENVironment variable for startup.sh
+ENV OLLAMA_PULL_MODELS=${OLLAMA_PULL_MODELS}
+# 10. Expose Ports
+EXPOSE 11434
+EXPOSE 7860
+# 11. Entrypoint/Startup Script - NOW USING EXEC FORM FOR THE SCRIPT
+# CMD ["./startup.sh"] # <-- CHANGE TO THIS
+ENTRYPOINT ["./startup.sh"]

app.py ADDED Viewed

	@@ -0,0 +1,280 @@

+import gradio as gr
+import subprocess
+import time
+# import os # Not strictly needed in *this* version of app.py as no env vars are read
+# --- Ollama Helper Functions ---
+def check_ollama_running():
+    """Checks if the Ollama service is accessible."""
+    try:
+        subprocess.run(["ollama", "ps"], check=True, capture_output=True, timeout=5)
+        return True
+    except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired):
+        return False
+def get_ollama_models():
+    """Gets a list of locally available Ollama models."""
+    # Removed the 'if not check_ollama_running(): return []'
+    # because it's called after AVAILABLE_MODELS is determined,
+    # and check_ollama_running is implicitly done by the initial AVAILABLE_MODELS load.
+    # However, in a container, Ollama should be running.
+    try:
+        result = subprocess.run(["ollama", "list"], check=True, capture_output=True, text=True, timeout=10)
+        models = []
+        lines = result.stdout.strip().split("\n")
+        if len(lines) > 1:
+            for line in lines[1:]:
+                parts = line.split()
+                if parts:
+                    models.append(parts[0])
+        # Ensure models are sorted and unique for consistent dropdown
+        return sorted(list(set(models)))
+    except (subprocess.CalledProcessError, FileNotFoundError, subprocess.TimeoutExpired) as e:
+        print(f"Error in get_ollama_models: {e}")  # Added a print for debugging
+        return []
+# --- Core Logic ---
+# Typing speed simulation
+CHAR_DELAY = 0.02  # Adjust for desired speed (0.01 is fast, 0.05 is slower)
+def reasoning_ollama_stream(model_name, prompt, mode):  # Renamed prompt_text back to prompt
+    """
+    Streams response from an Ollama model with simulated typing speed.
+    """
+    if not model_name:
+        yield "Error: No model selected. Please choose a model."
+        return
+    if not prompt.strip():  # Using original 'prompt' variable name
+        yield "Error: Prompt cannot be empty."
+        return
+    # This check is good for robustness, even in Docker.
+    if not check_ollama_running():
+        yield "Error: Ollama service does not seem to be running or accessible. Please start Ollama."
+        return
+    # This is a runtime check. The Dockerfile aims to pull models, but this confirms.
+    available_models_runtime = get_ollama_models()
+    if model_name not in available_models_runtime:
+        yield f"Error: Model '{model_name}' selected, but not found by Ollama at runtime. Available: {available_models_runtime}. Please ensure it was pulled."
+        return
+    # Using original 'prompt' and 'mode'
+    prompt_with_mode = f"{prompt.strip()} /{mode}"
+    command = ["ollama", "run", model_name]
+    displayed_response = ""
+    try:
+        process = subprocess.Popen(
+            command,
+            stdin=subprocess.PIPE,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.PIPE,
+            text=True,
+            bufsize=1,
+            universal_newlines=True,
+        )
+        process.stdin.write(prompt_with_mode + "\n")
+        process.stdin.close()
+        for line_chunk in iter(process.stdout.readline, ""):
+            if not line_chunk and process.poll() is not None:  # Check if process ended
+                break
+            for char in line_chunk:
+                displayed_response += char
+                yield displayed_response
+                if char.strip():  # Only sleep for non-whitespace characters
+                    time.sleep(CHAR_DELAY)
+        process.stdout.close()
+        return_code = process.wait(timeout=10)  # Added timeout to wait
+        if return_code != 0:
+            error_output = process.stderr.read()
+            error_message = f"\n\n--- Ollama Error (code {return_code}) ---\n{error_output.strip()}"
+            if displayed_response and not displayed_response.endswith(error_message):
+                displayed_response += error_message
+            elif not displayed_response:
+                displayed_response = error_message.strip()
+            yield displayed_response
+            return
+        if not displayed_response.strip() and return_code == 0:
+            yield "Model returned an empty response."
+        elif displayed_response:
+            yield displayed_response
+    except FileNotFoundError:
+        yield "Error: 'ollama' command not found. Please ensure Ollama is installed and in your PATH (or Dockerfile is correct)."
+    except subprocess.TimeoutExpired:  # Catch timeout from process.wait()
+        yield "Error: Ollama process timed out while waiting for completion."
+        if displayed_response:
+            yield displayed_response
+    except Exception as e:
+        yield f"An unexpected error occurred: {str(e)}"
+        if displayed_response:
+            yield displayed_response
+# --- Gradio UI ---
+# This runs once when the script starts.
+# In Docker, this will query the Ollama instance inside the container AFTER models are pulled by CMD.
+AVAILABLE_MODELS = get_ollama_models()
+QWEN_MODELS = [m for m in AVAILABLE_MODELS if "qwen" in m.lower()]
+INITIAL_MODEL = None
+# Prioritize qwen3:4b if available - This logic is from your original app.py
+if "qwen3:4b" in AVAILABLE_MODELS:
+    INITIAL_MODEL = "qwen3:4b"
+elif QWEN_MODELS:
+    INITIAL_MODEL = QWEN_MODELS[0]
+elif AVAILABLE_MODELS:
+    INITIAL_MODEL = AVAILABLE_MODELS[0]
+# If no models, INITIAL_MODEL remains None, and dropdown will show "No models found..."
+with gr.Blocks(title="Qwen3 x Ollama", theme=gr.themes.Soft()) as demo:
+    gr.HTML(
+        """
+        <h1 style='text-align: center'>
+        Qwen3 Reasoning with Ollama
+        </h1>
+    """
+    )
+    gr.HTML(
+        """
+        <h3 style='text-align: center'>
+        <a href='https://opencv.org/university/' target='_blank'>OpenCV Courses</a> | <a href='https://github.com/OpenCV-University' target='_blank'>Github</a>
+        </h3>
+        """
+    )
+    gr.Markdown(
+        """
+        - Interact with a Qwen3 model hosted on Ollama.
+        - Switch between `/think` and `/no_think` modes to explore the thinking process.
+        - The response will stream with a simulated typing effect.
+        """
+    )
+    with gr.Row():
+        with gr.Column(scale=1):
+            model_selector = gr.Dropdown(
+                label="Select Model",
+                choices=AVAILABLE_MODELS if AVAILABLE_MODELS else ["No models found - check Ollama setup"],
+                value=INITIAL_MODEL,
+                interactive=True,
+            )
+            prompt_input = gr.Textbox(
+                label="Enter your prompt",
+                placeholder="e.g., Explain quantum entanglement in simple terms.",
+                lines=5,
+                elem_id="prompt-input",
+            )
+            mode_radio = gr.Radio(
+                ["think", "no_think"],  # Kept original modes from your app.py
+                label="Reasoning Mode",
+                value="think",
+                info="`/think` encourages step-by-step reasoning. `/no_think` aims for a direct answer.",
+            )
+            with gr.Row():
+                submit_button = gr.Button("Generate Response", variant="primary")
+                clear_button = gr.ClearButton()
+        with gr.Column(scale=2):
+            status_output = gr.Textbox(
+                label="Status",
+                interactive=False,
+                lines=1,
+                placeholder="Awaiting submission...",
+                elem_id="status-output",
+            )
+            response_output = gr.Textbox(  # Kept as gr.Textbox as requested
+                label="Model Response", lines=20, interactive=False, show_copy_button=True, elem_id="response-output"
+            )
+    def handle_submit_wrapper(model, prompt, mode):
+        yield {status_output: "Processing... Preparing to stream response.", response_output: ""}
+        final_chunk = ""
+        # Using original variable names 'prompt' and 'mode' for reasoning_ollama_stream
+        for chunk in reasoning_ollama_stream(model, prompt, mode):
+            final_chunk = chunk
+            yield {status_output: "Streaming response...", response_output: chunk}
+        if "Error:" in final_chunk or "--- Ollama Error ---" in final_chunk:
+            yield {status_output: "Completed with issues.", response_output: final_chunk}
+        elif "Model returned an empty response." in final_chunk:
+            yield {status_output: "Model returned an empty response.", response_output: final_chunk}
+        elif not final_chunk.strip() and ("Error:" not in final_chunk and "--- Ollama Error ---" not in final_chunk):
+            yield {status_output: "Completed, but no substantive output received.", response_output: ""}
+        else:
+            yield {status_output: "Response generated successfully!", response_output: final_chunk}
+    submit_button.click(
+        fn=handle_submit_wrapper,
+        inputs=[model_selector, prompt_input, mode_radio],
+        outputs=[status_output, response_output],
+    )
+    clear_button.add([prompt_input, response_output, status_output])
+    # Example model determination logic from your original app.py
+    # Note: This might select a model not actually available if AVAILABLE_MODELS is empty
+    # and the fallback "qwen3:4b" is used.
+    # A safer approach is to ensure example_model is from AVAILABLE_MODELS if possible.
+    example_model_for_ui = INITIAL_MODEL
+    if not example_model_for_ui and AVAILABLE_MODELS:
+        example_model_for_ui = AVAILABLE_MODELS[0]
+    elif not example_model_for_ui:  # Fallback if no models and INITIAL_MODEL is None
+        example_model_for_ui = "qwen3:4b"  # Default example model
+    gr.Examples(
+        examples=[
+            [example_model_for_ui, "What are the main pros and cons of using nuclear energy?", "think"],
+            # Fallback for the second example if qwen3:4b isn't a primary choice
+            [
+                (
+                    example_model_for_ui
+                    if example_model_for_ui != "qwen3:4b"
+                    else (INITIAL_MODEL if INITIAL_MODEL and INITIAL_MODEL != "qwen3:4b" else "qwen3:1.7b")
+                ),
+                "Write a short poem about a rainy day.",
+                "no_think",
+            ],
+            [example_model_for_ui, "Plan a 3-day trip to Paris, focusing on historical sites.", "think"],
+        ],
+        inputs=[model_selector, prompt_input, mode_radio],
+        outputs=[status_output, response_output],
+        fn=handle_submit_wrapper,
+        cache_examples=False,  # Cache examples can be True if inputs are static and fn is pure
+    )
+    gr.HTML(
+        """
+        <h3 style='text-align: center'>
+        Developed with ❤️ by OpenCV
+        </h3>
+        """
+    )
+if __name__ == "__main__":
+    print("--- Gradio App Starting ---")  # Simplified print
+    print(f"Attempting to fetch Ollama models (initial load)... Result: {AVAILABLE_MODELS}")
+    print(f"Initial model for UI (if any): {INITIAL_MODEL}")
+    print(f"Gradio version: {gr.__version__}")
+    print(f"---------------------------")
+    # For local Docker testing, server_name="0.0.0.0" is important.
+    # For Hugging Face Spaces, demo.launch() is usually enough as it handles proxying.
+    demo.queue().launch(
+        server_name="0.0.0.0",
+        server_port=7860,
+        share=False,  # Set to True if you need a public link for local testing (requires internet)
+        # share=os.getenv("GRADIO_SHARE", "False").lower() == "true" # If using env var for share
+    )

requirements.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ gradio==5.31.0

startup.sh ADDED Viewed

	@@ -0,0 +1,65 @@

+#!/bin/bash
+# startup.sh
+set -e # Exit immediately if a command exits with a non-zero status.
+echo "Starting Ollama server in the background..."
+ollama serve > /tmp/ollama.log 2>&1 &
+OLLAMA_PID=$! # Get PID of the backgrounded ollama serve
+echo "Waiting for Ollama to be ready (http://127.0.0.1:11434)..."
+timeout_seconds=120
+start_time=$(date +%s)
+while ! curl -s --fail -o /dev/null http://127.0.0.1:11434; do
+    current_time=$(date +%s)
+    elapsed_time=$((current_time - start_time))
+    if [ "$elapsed_time" -ge "$timeout_seconds" ]; then
+        echo "Ollama failed to start within $timeout_seconds seconds. Check /tmp/ollama.log."
+        cat /tmp/ollama.log
+        exit 1
+    fi
+    echo -n "."
+    sleep 2
+done
+echo ""
+echo "Ollama server started successfully."
+# OLLAMA_PULL_MODELS will be passed as an environment variable from Dockerfile
+echo "Models to pull from ENV: ${OLLAMA_PULL_MODELS}"
+for model_name in ${OLLAMA_PULL_MODELS}; do
+    echo "Pulling model: ${model_name} (this may take several minutes)..."
+    ollama pull "${model_name}"
+    if [ $? -eq 0 ]; then
+        echo "Model ${model_name} pulled successfully."
+    else
+        echo "Failed to pull model ${model_name}. Check logs or model name."
+    fi
+done
+# Define a function to clean up (stop Ollama) when the script exits
+cleanup() {
+    echo "Caught signal, shutting down Ollama (PID: $OLLAMA_PID)..."
+    if kill -0 $OLLAMA_PID > /dev/null 2>&1; then # Check if process exists
+        kill $OLLAMA_PID
+        wait $OLLAMA_PID # Wait for Ollama to actually terminate
+        echo "Ollama shut down."
+    else
+        echo "Ollama process (PID: $OLLAMA_PID) not found or already stopped."
+    fi
+}
+# Trap signals to call the cleanup function
+# SIGINT is Ctrl+C, SIGTERM is `docker stop`
+trap cleanup SIGINT SIGTERM
+echo "Starting Gradio application (python app.py)..."
+# Run python app.py in the foreground. It will now be PID 1 (or close to it)
+# relative to this script, and signals will be handled by this script.
+python app.py &
+PYTHON_APP_PID=$!
+wait $PYTHON_APP_PID # Wait for the python app to exit
+# After python app exits, perform cleanup (this will also be called by trap)
+cleanup
+echo "Gradio application exited."