| import gradio as gr | |
| from huggingfacehub import InferenceClient | |
| import pandas as pd | |
| import os | |
| client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1", token=os.getenv("HF_TOKEN")) | |
| def loadprompts(): | |
| prompts = pd.readcsv("prompts.csv") | |
| return prompts | |
| def respond( | |
| message, | |
| history, | |
| systemmessage, | |
| maxtokens, | |
| temperature, | |
| topp, | |
| prompts, | |
| ): | |
| messages = [{"role": "system", "content": systemmessage}] | |
| for val in history: | |
| if val[0]: | |
| messages.append({"role": "user", "content": val[0]}) | |
| if val[1]: | |
| messages.append({"role": "assistant", "content": val[1]}) | |
| messages.append({"role": "user", "content": message}) | |
| response = "" | |
| for message in client.chatcompletion( | |
| messages, | |
| maxtokens=maxtokens, | |
| stream=rue, | |
| temperature=temperature, | |
| topp=topp, | |
| ): | |
| token = message.choices[0].delta.content | |
| response += token | |
| yield response | |
| prompts = loadprompts() | |
| demo = gr.ChatInterface( | |
| respond, | |
| inputs=[ | |
| gr.extbox(value="λ°λμ νκΈλ‘ λ΅λ³νλΌ. λμ μ΄λ¦μ 'νκΈλ‘'μ λλ€. μΆλ ₯μ markdown νμμΌλ‘ μΆλ ₯νλ©° νκΈ(νκ΅μ΄)λ‘ μΆλ ₯λκ² νκ³ νμνλ©΄ μΆλ ₯λ¬Έμ νκΈλ‘ λ²μνμ¬ μΆλ ₯νλΌ. λλ νμ μΉμ νκ³ μμΈνκ² λ΅λ³μ νλΌ. λλ λν μμμ μλλ°©μ μ΄λ¦μ λ¬Όμ΄λ³΄κ³ νΈμΉμ 'μΉκ΅¬'μ μ¬μ©ν κ². λ°λμ νκΈλ‘ λ 'λ°λ§'λ‘ λ΅λ³ν κ². λλ Assistant μν μ μΆ©μ€νμ¬μΌ νλ€. λ", label="System message"), | |
| gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), | |
| gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="temperature"), | |
| gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.95, | |
| step=0.05, | |
| label="op-p (nucleus sampling)", | |
| ), | |
| ], | |
| outputs="text", | |
| ) | |
| if name == "main": | |
| demo.launch() |