Spaces:
				
			
			
	
			
			
		Running
		
			on 
			
			Zero
	
	
	
			
			
	
	
	
	
		
		
		Running
		
			on 
			
			Zero
	Update app.py
Browse files
    	
        app.py
    CHANGED
    
    | 
         @@ -57,13 +57,18 @@ tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) 
     | 
|
| 57 | 
         | 
| 58 | 
         
             
            @spaces.GPU
         
     | 
| 59 | 
         
             
            def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int):
         
     | 
| 
         | 
|
| 
         | 
|
| 60 | 
         
             
                conversation = []
         
     | 
| 61 | 
         
             
                for prompt, answer in history:
         
     | 
| 62 | 
         
             
                    conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
         
     | 
| 63 | 
         | 
| 64 | 
         
             
                conversation.append({"role": "user", "content": message})
         
     | 
| 65 | 
         | 
| 
         | 
|
| 
         | 
|
| 66 | 
         
             
                input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
         
     | 
| 
         | 
|
| 67 | 
         
             
                prompt_padded_len = len(input_ids[0])
         
     | 
| 68 | 
         | 
| 69 | 
         
             
                gen_tokens= model.generate(
         
     | 
| 
         | 
|
| 57 | 
         | 
| 58 | 
         
             
            @spaces.GPU
         
     | 
| 59 | 
         
             
            def stream_chat(message: str, history: list, temperature: float, max_new_tokens: int):
         
     | 
| 60 | 
         
            +
                print("message:"+ message)
         
     | 
| 61 | 
         
            +
                print("history:"+ history)
         
     | 
| 62 | 
         
             
                conversation = []
         
     | 
| 63 | 
         
             
                for prompt, answer in history:
         
     | 
| 64 | 
         
             
                    conversation.extend([{"role": "user", "content": prompt}, {"role": "assistant", "content": answer}])
         
     | 
| 65 | 
         | 
| 66 | 
         
             
                conversation.append({"role": "user", "content": message})
         
     | 
| 67 | 
         | 
| 68 | 
         
            +
                print("conversation:"+ conversation)
         
     | 
| 69 | 
         
            +
                
         
     | 
| 70 | 
         
             
                input_ids = tokenizer.apply_chat_template(conversation, tokenize=True, add_generation_prompt=True, return_tensors="pt").to(model.device)
         
     | 
| 71 | 
         
            +
                
         
     | 
| 72 | 
         
             
                prompt_padded_len = len(input_ids[0])
         
     | 
| 73 | 
         | 
| 74 | 
         
             
                gen_tokens= model.generate(
         
     |