yagpt

Sleeping

App Files Files Community

kopilk commited on May 13

Commit

1b8cff4

verified ·

1 Parent(s): 91ff806

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -14

app.py CHANGED Viewed

@@ -1,19 +1,55 @@
-import gradio as gr
-import ollama
-# Function to generate responses using the Deepseek-r1 model
-def generate_response(question):
-    response = ollama.chat(model='YandexGPT-5-Lite-8B-instruct-GGUF', messages=[{'role': 'user', 'content': question}])
-    return response['message']['content']
-# Gradio interface
 iface = gr.Interface(
-    fn=generate_response,
-    inputs="text",
-    outputs="text",
-    title="Ask Yandex Anything",
-    description="Type your question and get answers directly from the Deepseek-r1 model."
-)
 # Launch the app
 if __name__ == "__main__":
     iface.launch(server_name="0.0.0.0", server_port=7860, pwa=True)

+from langchain_community.llms import Ollama
+from langchain.callbacks.manager import CallbackManager
+from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
+def get_llm():
+    callback_manager = CallbackManager([StreamingStdOutCallbackHandler()])
+    return Ollama(model='YandexGPT-5-Lite-8B-instruct-GGUF', callback_manager=callback_manager)
+# Interface Gradio
+def gradio_interface(input, history):
+    # Ajouter la requête de l'utilisateur à l'historique une seule fois
+    history.append((input, ""))
+    yield history, history
+    def worker():
+        llm = get_llm()
+        response = ""
+        for token in llm.stream(input, max_tokens=2048, temperature=0.7):
+            response += token
+            history[-1] = (input, response)
+            yield history, history
+    # Utiliser un thread pour gérer le streaming
+    for result in worker():
+        yield result
 iface = gr.Interface(
+    fn=gradio_interface,
+    inputs=[
+        gr.Textbox(label="Input"),
+        gr.State(value=[])  # Ajout de l'historique
+    ],
+    outputs=[
+        gr.Chatbot(label="History"),  # Utilisation de Chatbot pour l'historique
+        gr.State()  # Ajout de l'historique
+    ],
+    title="Ollama Demo 🐳 🦙 🤗",
+    description="""
+    Bienvenue sur Docker_Ollama, un espace dédié à l'exploration et au test des modèles Ollama.
+    Cette Démo permet aux utilisateurs de tester tous les modèles Ollama dont la taille est inférieure à 10 milliards de paramètres directement depuis cette interface.
+    L'Application tourne sur une machine Hugging Face Free Space : 2 CPU - 16Gb RAM
+    Soyez patient. Chaque Nouveau Modèle Selectionner Devra Etre Télécharger Avant de Répondre.
+    Il faut compter 2 a 3 minutes pour la réponse d'un modèle 7b alors que quelques dizaines de secondes suffisent pour un modèle 1b.
+    """)
 # Launch the app
 if __name__ == "__main__":
     iface.launch(server_name="0.0.0.0", server_port=7860, pwa=True)