YOUSEF2434 commited on
Commit
e355996
·
verified ·
1 Parent(s): 8c83dc0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -40
app.py CHANGED
@@ -1,42 +1,43 @@
1
  import os
2
  import requests
3
- from transformers import pipeline
4
- from flask import Flask, render_template, request, jsonify
5
-
6
- app = Flask(__name__)
7
-
8
- # Define model URL and local path
9
- MODEL_URL = "https://huggingface.co/unsloth/Qwen3-4B-128K-GGUF/resolve/main/Qwen3-4B-128K-UD-IQ1_M.gguf"
10
- MODEL_PATH = "Qwen3-4B-128K-UD-IQ1_M.gguf"
11
-
12
- # Function to download the model
13
- def download_model():
14
- if not os.path.exists(MODEL_PATH):
15
- print("Downloading the model...")
16
- response = requests.get(MODEL_URL, stream=True)
17
- with open(MODEL_PATH, 'wb') as model_file:
18
- for chunk in response.iter_content(chunk_size=128):
19
- model_file.write(chunk)
20
- print("Model downloaded successfully.")
21
-
22
- # Load the model with Hugging Face Transformers pipeline
23
- def load_model():
24
- download_model()
25
- model = pipeline("text-generation", model=MODEL_PATH)
26
- return model
27
-
28
- # Global model object
29
- model = load_model()
30
-
31
- @app.route('/')
32
- def index():
33
- return render_template('index.html')
34
-
35
- @app.route('/generate', methods=['POST'])
36
- def generate():
37
- user_input = request.form['message']
38
- response = model(user_input, max_length=100)
39
- return jsonify({"response": response[0]['generated_text']})
40
-
41
- if __name__ == '__main__':
42
- app.run(debug=True)
 
 
1
  import os
2
  import requests
3
+ import gradio as gr
4
+ from llama_cpp import Llama
5
+
6
+ MODEL_URL = "https://cas-bridge.xethub.hf.co/xet-bridge-us/680f85f7f3cf2673404f897f/d6cd862a0b513dd35067121f0413e369b37aa9cacde526e31fc016c68ce0d305?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Content-Sha256=UNSIGNED-PAYLOAD&X-Amz-Credential=cas%2F20250503%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250503T191219Z&X-Amz-Expires=3600&X-Amz-Signature=2721d6c78a7db9b4d9b0ed1452ce3017d82b3392a190ae72b3755d873bc6c3df&X-Amz-SignedHeaders=host&X-Xet-Cas-Uid=67e6d6e1648b9f55980fa892&response-content-disposition=attachment%3B+filename*%3DUTF-8%27%27Qwen3-4B-128K-Q4_K_M.gguf%3B+filename%3D%22Qwen3-4B-128K-Q4_K_M.gguf%22%3B&x-id=GetObject&Expires=1746303139&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0NjMwMzEzOX19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2FzLWJyaWRnZS54ZXRodWIuaGYuY28veGV0LWJyaWRnZS11cy82ODBmODVmN2YzY2YyNjczNDA0Zjg5N2YvZDZjZDg2MmEwYjUxM2RkMzUwNjcxMjFmMDQxM2UzNjliMzdhYTljYWNkZTUyNmUzMWZjMDE2YzY4Y2UwZDMwNSoifV19&Signature=lxo1PdbqSDERIivvWyQ88rq6oOx0DF5aefiy1LClNYXtBexSV476eNjAEl0wwE1nf7rW%7EcAIctjqzl%7ElVTEFSCosTEACuRrgqtGjyP94xL0xBFMDv1lqJ6E5UFokq0FTRNNd84Xt3GthKDMYanseX9kGpHBNpCwvlx-BRrNF6cbd2XWcCcmetP-NUrHtmoEHY89LVAFb72EXx7edlsOIMOWGzOYcgHI-IfuM2U4m%7Epquxo429CjrEbUi9xpQIQFVBvWHThIJN2LT5NqXpUpPyXswEJyuQDZ0sDTgBEh5gdHtwAWf0cF0i285VgitHIty8eYLEeYNu7J%7EqEDBu7RrYQ__&Key-Pair-Id=K2L8F4GPSG1IFC" # truncated for clarity
7
+ MODEL_PATH = "Qwen3-4B-128K-Q4_K_M.gguf"
8
+
9
+ # Download model if not already downloaded
10
+ if not os.path.exists(MODEL_PATH):
11
+ print("Downloading model...")
12
+ with requests.get(MODEL_URL, stream=True) as r:
13
+ r.raise_for_status()
14
+ with open(MODEL_PATH, 'wb') as f:
15
+ for chunk in r.iter_content(chunk_size=8192):
16
+ f.write(chunk)
17
+ print("Model downloaded.")
18
+
19
+ # Load the model
20
+ llm = Llama(
21
+ model_path=MODEL_PATH,
22
+ n_ctx=8192,
23
+ n_threads=4,
24
+ n_gpu_layers=20, # Adjust for HF GPU environment
25
+ chat_format="chatml"
26
+ )
27
+
28
+ def chat_interface(message, history):
29
+ if history is None:
30
+ history = []
31
+
32
+ chat_prompt = []
33
+ for user_msg, bot_msg in history:
34
+ chat_prompt.append({"role": "user", "content": user_msg})
35
+ chat_prompt.append({"role": "assistant", "content": bot_msg})
36
+ chat_prompt.append({"role": "user", "content": message})
37
+
38
+ response = llm.create_chat_completion(messages=chat_prompt, stream=False)
39
+ reply = response["choices"][0]["message"]["content"]
40
+ history.append((message, reply))
41
+ return reply, history
42
+
43
+ gr.ChatInterface(fn=chat_interface, title="Qwen3-4B Chat").launch()