Spaces:

GabrielSalem
/

RealTimeAnswer

Sleeping

App Files Files Community

RealTimeAnswer / app.py

GabrielSalem

Create app.py

d5e59b9 verified 9 months ago

raw

history blame

5.22 kB

	from flask import Flask, render_template, request, redirect, url_for, send_file, jsonify
	from transformers import GPT2LMHeadModel, GPT2Tokenizer
	import os
	import torch
	import zipfile
	import pandas as pd
	from utils import preprocess_data, train_model

	app = Flask(__name__)
	app.config["UPLOAD_FOLDER"] = "uploads"
	app.config["MODEL_FOLDER"] = "models"

	# Initialize device
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

	# Load tokenizer and set padding if needed
	model_name = "gpt2"
	tokenizer = GPT2Tokenizer.from_pretrained(model_name)
	tokenizer.add_special_tokens({'pad_token': '[PAD]'})

	# Cache for loaded models to avoid redundant loading
	loaded_models = {}


	@app.route("/")
	def home():
	# List available models
	models = [model for model in os.listdir(app.config["MODEL_FOLDER"]) if
	os.path.isdir(os.path.join(app.config["MODEL_FOLDER"], model))]
	return render_template("home.html", models=models)


	@app.route("/upload", methods=["POST"])
	def upload_file():
	if "file" not in request.files or "model_name" not in request.form:
	return redirect(request.url)

	file = request.files["file"]
	model_name = request.form["model_name"]

	if not file.filename or not model_name:
	return redirect(request.url)

	# Prepare directories and paths
	model_path = os.path.join(app.config["MODEL_FOLDER"], model_name)
	os.makedirs(model_path, exist_ok=True)
	filepath = os.path.join(app.config["UPLOAD_FOLDER"], file.filename)
	file.save(filepath)

	# Load and preprocess data
	try:
	df = pd.read_csv(filepath)
	dataset = preprocess_data(df, tokenizer)
	except Exception as e:
	return f"Data processing error: {e}", 500

	# Train and save model
	try:
	# Clear any previous GPU memory allocation
	torch.cuda.empty_cache()

	model = GPT2LMHeadModel.from_pretrained("gpt2")
	model.resize_token_embeddings(len(tokenizer))
	model.to(device)

	# Train the model
	train_model(model, tokenizer, dataset, model_path)

	# Clear GPU memory right after training
	del model
	torch.cuda.empty_cache()
	except torch.cuda.OutOfMemoryError:
	# Clear memory in case of OOM error and return an appropriate message
	torch.cuda.empty_cache()
	return "CUDA out of memory error. Try a smaller model or reduce batch size.", 500
	except Exception as e:
	return f"Model training error: {e}", 500

	# Zip the model files for download
	model_zip_path = os.path.join(model_path, f"{model_name}.zip")
	with zipfile.ZipFile(model_zip_path, 'w') as model_zip:
	for folder, _, files in os.walk(model_path):
	for file_name in files:
	file_path = os.path.join(folder, file_name)
	model_zip.write(file_path, os.path.relpath(file_path, app.config["MODEL_FOLDER"]))

	return redirect(url_for("home"))


	@app.route("/download/<model_name>")
	def download_model(model_name):
	model_path = os.path.join(app.config["MODEL_FOLDER"], model_name, f"{model_name}.zip")
	if os.path.exists(model_path):
	return send_file(model_path, as_attachment=True)
	else:
	return "Model not found", 404


	@app.route("/chat/<model_name>")
	def chat(model_name):
	return render_template("chat.html", model_name=model_name)


	@app.route("/generate/<model_name>", methods=["POST"])
	def generate_response(model_name):
	prompt = request.json.get("prompt")
	if not prompt:
	return jsonify({"error": "No prompt provided"}), 400

	# Load the model if not already in cache
	if model_name not in loaded_models:
	model_path = os.path.join(app.config["MODEL_FOLDER"], model_name)
	if not os.path.exists(model_path):
	return jsonify({"error": f"Model '{model_name}' not found"}), 404
	try:
	# Clear GPU memory and load the model
	torch.cuda.empty_cache()
	model = GPT2LMHeadModel.from_pretrained(model_path)
	model.to(device)
	loaded_models[model_name] = model
	except Exception as e:
	return jsonify({"error": f"Failed to load model '{model_name}': {str(e)}"}), 500

	# Generate response
	model = loaded_models[model_name]
	try:
	inputs = tokenizer.encode(prompt, return_tensors="pt").to(device)
	outputs = model.generate(
	inputs,
	max_length=50,
	num_return_sequences=1,
	no_repeat_ngram_size=2,
	pad_token_id=tokenizer.eos_token_id
	)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	except torch.cuda.OutOfMemoryError:
	torch.cuda.empty_cache()
	return jsonify({"error": "Out of memory. Try a smaller model or shorter prompt."}), 500
	except Exception as e:
	return jsonify({"error": str(e)}), 500
	finally:
	# Clear GPU memory after generation to avoid leaks
	torch.cuda.empty_cache()

	return jsonify({"response": response})


	if __name__ == "__main__":
	os.makedirs(app.config["UPLOAD_FOLDER"], exist_ok=True)
	os.makedirs(app.config["MODEL_FOLDER"], exist_ok=True)
	app.run(debug=True)