Spaces:
Sleeping
Sleeping
| import os | |
| os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "0" | |
| import torch | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| from transformers import AutoTokenizer, AutoModelForCausalLM | |
| import gradio as gr | |
| # --- Модель --- | |
| model_id = "sberbank-ai/rugpt3medium_based_on_gpt2" | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| model = AutoModelForCausalLM.from_pretrained(model_id) | |
| device = "cuda" if torch.cuda.is_available() else "cpu" | |
| model.to(device) | |
| context = ( | |
| "Университет Иннополис был основан в 2012 году. " | |
| "Это современный вуз в России, специализирующийся на IT и робототехнике, " | |
| "расположенный в городе Иннополис, Татарстан.\n" | |
| ) | |
| def respond(message, history=None): | |
| prompt = f"Прочитай текст и ответь на вопрос:\n\n{context}\n\nВопрос: {message}\nОтвет:" | |
| input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to(device) | |
| with torch.no_grad(): | |
| output_ids = model.generate( | |
| input_ids, | |
| max_new_tokens=100, | |
| temperature=0.8, | |
| top_p=0.9, | |
| do_sample=True, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| output = tokenizer.decode(output_ids[0], skip_special_tokens=True) | |
| if "Ответ:" in output: | |
| answer = output.split("Ответ:")[-1].strip() | |
| else: | |
| answer = output[len(prompt):].strip() | |
| return answer | |
| # --- Gradio UI --- | |
| chat = gr.ChatInterface(fn=respond, title="Иннополис Бот") | |
| # --- FastAPI для API доступа --- | |
| app = FastAPI() | |
| class QuestionRequest(BaseModel): | |
| question: str | |
| def ask(request: QuestionRequest): | |
| answer = respond(request.question) | |
| return {"answer": answer} | |
| # --- Подключаем FastAPI к Gradio --- | |
| gr.mount_gradio_app(app, chat, path="/") | |
| # --- Не нужно писать iface.launch()! Hugging Face сам запустит Uvicorn --- | |