Spaces:

MicoGuild
/

Olubakka

Sleeping

App Files Files Community

Sachi Wagaarachchi commited on 29 days ago

Commit

8483978

1 Parent(s): 24e6560

refactor update

Browse files

Files changed (15) hide show

app.py +0 -64
pyproject.toml +19 -0
requirements.txt +0 -1
run.py +10 -0
src/__init__.py +0 -0
src/app.py +72 -0
src/chat_logic.py +56 -0
src/gradio_qwen_app.egg-info/PKG-INFO +10 -0
src/gradio_qwen_app.egg-info/SOURCES.txt +7 -0
src/gradio_qwen_app.egg-info/dependency_links.txt +1 -0
src/gradio_qwen_app.egg-info/requires.txt +5 -0
src/gradio_qwen_app.egg-info/top_level.txt +1 -0
src/models.py +30 -0
src/utils.py +13 -0
src/vector_db.py +9 -0

app.py DELETED Viewed

@@ -1,64 +0,0 @@
-import gradio as gr
-from huggingface_hub import InferenceClient
-"""
-For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
-"""
-client = InferenceClient("HuggingFaceH4/zephyr-7b-beta")
-def respond(
-    message,
-    history: list[tuple[str, str]],
-    system_message,
-    max_tokens,
-    temperature,
-    top_p,
-):
-    messages = [{"role": "system", "content": system_message}]
-    for val in history:
-        if val[0]:
-            messages.append({"role": "user", "content": val[0]})
-        if val[1]:
-            messages.append({"role": "assistant", "content": val[1]})
-    messages.append({"role": "user", "content": message})
-    response = ""
-    for message in client.chat_completion(
-        messages,
-        max_tokens=max_tokens,
-        stream=True,
-        temperature=temperature,
-        top_p=top_p,
-    ):
-        token = message.choices[0].delta.content
-        response += token
-        yield response
-"""
-For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
-"""
-demo = gr.ChatInterface(
-    respond,
-    additional_inputs=[
-        gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
-        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
-        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
-        gr.Slider(
-            minimum=0.1,
-            maximum=1.0,
-            value=0.95,
-            step=0.05,
-            label="Top-p (nucleus sampling)",
-        ),
-    ],
-)
-if __name__ == "__main__":
-    demo.launch()

pyproject.toml ADDED Viewed

	@@ -0,0 +1,19 @@

+[build-system]
+requires = ["setuptools", "wheel"]
+build-backend = "setuptools.build_meta"
+[project]
+name = "gradio-qwen-app"
+version = "0.1.0"
+description = "Gradio app with Qwen models"
+requires-python = ">=3.12"
+dependencies = [
+    "gradio>=4.0.0",
+    "transformers>=4.38.0",
+    "torch>=2.0.0",
+    "accelerate>=0.25.0",
+    "huggingface_hub==0.25.2",
+]
+[tool.setuptools]
+packages.find.where = ["src"]

requirements.txt DELETED Viewed

	@@ -1 +0,0 @@
1	- huggingface_hub==0.25.2

run.py ADDED Viewed

	@@ -0,0 +1,10 @@

+#!/usr/bin/env python3
+"""
+Run script for the Gradio Qwen application.
+This script imports and launches the Gradio app from the src package.
+"""
+from src.app import demo
+if __name__ == "__main__":
+    demo.launch()

src/__init__.py ADDED Viewed

File without changes

src/app.py ADDED Viewed

	@@ -0,0 +1,72 @@

+import gradio as gr
+from src.models import ModelManager
+from src.chat_logic import ChatProcessor
+from src.vector_db import VectorDBHandler
+import logging
+# Initialize components
+model_manager = ModelManager()
+vector_db = VectorDBHandler()
+chat_processor = ChatProcessor(model_manager, vector_db)
+# Configure logging
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+def respond(
+    message,
+    history: list[tuple[str, str]],
+    model_name: str,
+    system_message: str = "You are a Qwen3 assistant.",
+    max_new_tokens: int = 512,
+    temperature: float = 0.7,
+    top_p: float = 0.9,
+    top_k: int = 50,
+    repetition_penalty: float = 1.2
+):
+    """Process chat using the ChatProcessor with streaming support"""
+    try:
+        # Process chat through ChatProcessor
+        response_generator = chat_processor.process_chat(
+            message=message,
+            history=history,
+            model_name=model_name,
+            temperature=temperature,
+            max_new_tokens=max_new_tokens,
+            top_p=top_p,
+            top_k=top_k,
+            repetition_penalty=repetition_penalty
+        )
+        # Stream response tokens
+        response = ""
+        for token in response_generator:
+            response += token
+            yield response
+    except Exception as e:
+        logger.error(f"Chat response error: {str(e)}")
+        yield f"Error: {str(e)}"
+# Create Gradio interface
+demo = gr.ChatInterface(
+    respond,
+    additional_inputs=[
+        gr.Dropdown(
+            choices=["Qwen3-14B", "Qwen3-7B"],
+            value="Qwen3-7B",
+            label="Model Selection"
+        ),
+        gr.Textbox(value="You are a Qwen3 assistant.", label="System message"),
+        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
+        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
+        gr.Slider(minimum=0.1, maximum=1.0, value=0.9, step=0.05, label="Top-p"),
+        gr.Slider(minimum=1, maximum=100, value=50, step=1, label="Top-k"),
+        gr.Slider(minimum=1.0, maximum=2.0, value=1.2, step=0.1, label="Repetition penalty")
+    ],
+)
+if __name__ == "__main__":
+    demo.launch()

src/chat_logic.py ADDED Viewed

	@@ -0,0 +1,56 @@

+from transformers import TextIteratorStreamer
+import threading
+from src.utils import format_prompt
+import logging
+class ChatProcessor:
+    """Processes chat interactions using Qwen models"""
+    def __init__(self, model_manager, vector_db):
+        self.model_manager = model_manager
+        self.vector_db = vector_db
+        self.logger = logging.getLogger(__name__)
+    def process_chat(self, message, history, model_name, temperature=0.7,
+                    max_new_tokens=512, top_p=0.9, top_k=50, repetition_penalty=1.2):
+        """Process chat input and generate streaming response"""
+        try:
+            # Format prompt with history
+            prompt = format_prompt(message, history)
+            # Get model pipeline
+            pipe = self.model_manager.get_pipeline(model_name)
+            # Set up streamer
+            streamer = TextIteratorStreamer(
+                pipe.tokenizer,
+                skip_prompt=True,
+                skip_special_tokens=True
+            )
+            # Prepare generation kwargs
+            generate_kwargs = {
+                "input_ids": pipe.tokenizer(prompt, return_tensors="pt").input_ids,
+                "max_new_tokens": max_new_tokens,
+                "temperature": temperature,
+                "top_p": top_p,
+                "top_k": top_k,
+                "repetition_penalty": repetition_penalty,
+                "streamer": streamer
+            }
+            # Start generation thread
+            thread = threading.Thread(target=pipe.model.generate, kwargs=generate_kwargs)
+            thread.start()
+            # Stream response
+            response = ""
+            for token in streamer:
+                response += token
+                yield token
+            # Update history (handled by Gradio UI)
+            return response
+        except Exception as e:
+            self.logger.error(f"Chat processing error: {str(e)}")
+            yield f"Error: {str(e)}"

src/gradio_qwen_app.egg-info/PKG-INFO ADDED Viewed

	@@ -0,0 +1,10 @@

+Metadata-Version: 2.4
+Name: gradio-qwen-app
+Version: 0.1.0
+Summary: Gradio app with Qwen models
+Requires-Python: >=3.12
+Requires-Dist: gradio>=4.0.0
+Requires-Dist: transformers>=4.38.0
+Requires-Dist: torch>=2.0.0
+Requires-Dist: accelerate>=0.25.0
+Requires-Dist: huggingface_hub==0.25.2

src/gradio_qwen_app.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,7 @@

+README.md
+pyproject.toml
+src/gradio_qwen_app.egg-info/PKG-INFO
+src/gradio_qwen_app.egg-info/SOURCES.txt
+src/gradio_qwen_app.egg-info/dependency_links.txt
+src/gradio_qwen_app.egg-info/requires.txt
+src/gradio_qwen_app.egg-info/top_level.txt

src/gradio_qwen_app.egg-info/dependency_links.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/gradio_qwen_app.egg-info/requires.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=4.0.0
+transformers>=4.38.0
+torch>=2.0.0
+accelerate>=0.25.0
+huggingface_hub==0.25.2

src/gradio_qwen_app.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+

src/models.py ADDED Viewed

	@@ -0,0 +1,30 @@

+from transformers import pipeline
+import logging
+class ModelManager:
+    """Manages loading and caching of Qwen models"""
+    def __init__(self):
+        self.models = {
+            "Qwen3-14B": "Qwen/Qwen3-14B",
+            "Qwen3-7B": "Qwen/Qwen3-7B"
+        }
+        self._pipelines = {}
+        self.logger = logging.getLogger(__name__)
+    def get_pipeline(self, model_name):
+        """Get or create a model pipeline"""
+        if model_name in self._pipelines:
+            return self._pipelines[model_name]
+        try:
+            model_id = self.models[model_name]
+            self.logger.info(f"Loading model: {model_id}")
+            pipe = pipeline(
+                "text-generation",
+                model=model_id,
+                device_map="auto"
+            )
+            self._pipelines[model_name] = pipe
+            return pipe
+        except KeyError:
+            raise ValueError(f"Model {model_name} not found in available models")

src/utils.py ADDED Viewed

	@@ -0,0 +1,13 @@

+def format_prompt(message, history):
+    """Format message and history into a prompt for Qwen models"""
+    if not history:
+        return message
+    # Convert history to string format
+    prompt = ""
+    for user_msg, assistant_msg in history:
+        prompt += f"<|User|>: {user_msg}\n<|Assistant|>: {assistant_msg}\n"
+    # Add current message
+    prompt += f"<|User|>: {message}\n<|Assistant|>:"
+    return prompt

src/vector_db.py ADDED Viewed

	@@ -0,0 +1,9 @@

+class VectorDBHandler:
+    """Placeholder for vector database operations"""
+    def __init__(self):
+        pass
+    def retrieve(self, query, k=5):
+        """Retrieve relevant documents from vector database"""
+        # Placeholder implementation
+        return []