rahul7star commited on
Commit
d20382c
·
verified ·
1 Parent(s): a905a0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +33 -32
app.py CHANGED
@@ -1,16 +1,25 @@
1
  import gradio as gr
2
  import torch
3
- import asyncio
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
5
 
6
  # ---------------- CONFIG ----------------
7
  REPO_ID = "goonsai-com/civitaiprompts"
8
  SUBFOLDER = "gemma3-1B-goonsai-nsfw-100k"
9
  MODEL_NAME = "Qwen3-1.7B-CivitAI"
10
 
11
- # ---------------- LOAD TOKENIZER & MODEL ----------------
 
 
 
 
 
 
12
  tokenizer = AutoTokenizer.from_pretrained(REPO_ID, subfolder=SUBFOLDER, trust_remote_code=True)
 
13
  dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
 
14
  model = AutoModelForCausalLM.from_pretrained(
15
  REPO_ID,
16
  subfolder=SUBFOLDER,
@@ -18,21 +27,23 @@ model = AutoModelForCausalLM.from_pretrained(
18
  device_map="auto",
19
  trust_remote_code=True
20
  )
 
21
 
22
  # ---------------- CHAT FUNCTION ----------------
23
- def chat_fn_sync(message, history=None):
24
- if history is None:
25
- history = []
26
-
27
- # Convert history tuples to lists for Gradio Chatbot
28
- history = [list(turn) for turn in history]
29
-
30
- chat_history = ""
31
- for turn in history:
32
- chat_history += f"User: {turn[0]}\nAssistant: {turn[1]}\n"
33
- full_text = f"{chat_history}User: {message}\nAssistant:"
34
 
 
 
35
  inputs = tokenizer([full_text], return_tensors="pt", truncation=True, max_length=1024).to(model.device)
 
 
 
 
36
  reply_ids = model.generate(
37
  **inputs,
38
  max_new_tokens=512,
@@ -42,35 +53,25 @@ def chat_fn_sync(message, history=None):
42
  )
43
  response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
44
  assistant_reply = response.split("Assistant:")[-1].strip()
 
 
45
 
46
- # Append as a list (not tuple)
47
- history.append([message, assistant_reply])
48
- return assistant_reply, history
49
-
50
- # Async wrapper for Gradio 5.x
51
- async def chat_fn(message, history=None):
52
- return await asyncio.to_thread(chat_fn_sync, message, history)
53
 
54
  # ---------------- GRADIO BLOCKS UI ----------------
55
  with gr.Blocks() as demo:
56
- gr.Markdown(f"# 🤖 {MODEL_NAME}")
57
 
58
  with gr.Row():
59
  with gr.Column():
60
  message = gr.Textbox(label="Type your message...", placeholder="Hello!")
61
  send_btn = gr.Button("Send")
62
- clear_btn = gr.Button("Clear Chat")
63
  with gr.Column():
64
- # Use 'tuples' for legacy list-of-lists format
65
- chatbot = gr.Chatbot(type="tuples")
66
-
67
- state = gr.State([])
68
-
69
- # Send button / Enter key triggers async chat
70
- send_btn.click(chat_fn, inputs=[message, state], outputs=[chatbot, message])
71
- message.submit(chat_fn, inputs=[message, state], outputs=[chatbot, message])
72
 
73
- # Clear chat
74
- clear_btn.click(lambda: ([], ""), None, [state, chatbot])
 
75
 
 
76
  demo.launch()
 
1
  import gradio as gr
2
  import torch
3
+ import logging
4
  from transformers import AutoTokenizer, AutoModelForCausalLM
5
+ import time
6
 
7
  # ---------------- CONFIG ----------------
8
  REPO_ID = "goonsai-com/civitaiprompts"
9
  SUBFOLDER = "gemma3-1B-goonsai-nsfw-100k"
10
  MODEL_NAME = "Qwen3-1.7B-CivitAI"
11
 
12
+ # ---------------- LOGGING ----------------
13
+ logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
14
+ logger = logging.getLogger(__name__)
15
+ logger.info("Starting Gradio chatbot...")
16
+
17
+ # ---------------- LOAD MODEL ----------------
18
+ logger.info(f"Loading tokenizer from {REPO_ID}/{SUBFOLDER}")
19
  tokenizer = AutoTokenizer.from_pretrained(REPO_ID, subfolder=SUBFOLDER, trust_remote_code=True)
20
+
21
  dtype = torch.bfloat16 if torch.cuda.is_available() else torch.float32
22
+ logger.info(f"Loading model with dtype {dtype}")
23
  model = AutoModelForCausalLM.from_pretrained(
24
  REPO_ID,
25
  subfolder=SUBFOLDER,
 
27
  device_map="auto",
28
  trust_remote_code=True
29
  )
30
+ logger.info("Model loaded successfully.")
31
 
32
  # ---------------- CHAT FUNCTION ----------------
33
+ def chat_fn(message):
34
+ logger.info(f"Received message: {message}")
35
+
36
+ # Build prompt directly from user input
37
+ full_text = f"User: {message}\nAssistant:"
38
+ logger.info(f"Full prompt for generation:\n{full_text}")
 
 
 
 
 
39
 
40
+ start_time = time.time()
41
+ # Tokenize input
42
  inputs = tokenizer([full_text], return_tensors="pt", truncation=True, max_length=1024).to(model.device)
43
+ logger.info("Tokenized input.")
44
+
45
+ # Generate response
46
+ logger.info("Generating response...")
47
  reply_ids = model.generate(
48
  **inputs,
49
  max_new_tokens=512,
 
53
  )
54
  response = tokenizer.batch_decode(reply_ids, skip_special_tokens=True)[0]
55
  assistant_reply = response.split("Assistant:")[-1].strip()
56
+ logger.info(f"Assistant reply: {assistant_reply}")
57
+ logger.info(f"Generation time: {time.time() - start_time:.2f}s")
58
 
59
+ return assistant_reply
 
 
 
 
 
 
60
 
61
  # ---------------- GRADIO BLOCKS UI ----------------
62
  with gr.Blocks() as demo:
63
+ gr.Markdown(f"# 🤖 {MODEL_NAME} (Stateless)")
64
 
65
  with gr.Row():
66
  with gr.Column():
67
  message = gr.Textbox(label="Type your message...", placeholder="Hello!")
68
  send_btn = gr.Button("Send")
 
69
  with gr.Column():
70
+ output = gr.Textbox(label="Assistant Response", lines=10)
 
 
 
 
 
 
 
71
 
72
+ # Connect button
73
+ send_btn.click(chat_fn, inputs=[message], outputs=[output])
74
+ message.submit(chat_fn, inputs=[message], outputs=[output])
75
 
76
+ logger.info("Launching Gradio app...")
77
  demo.launch()