Spaces:
Sleeping
Sleeping
# import gradio as gr | |
import os | |
import torch | |
from transformers import AutoProcessor, MllamaForConditionalGeneration, TextIteratorStreamer | |
from PIL import Image | |
import spaces | |
import tempfile | |
import requests | |
from PyPDF2 import PdfReader | |
from threading import Thread | |
from flask import Flask, request, jsonify | |
# Check if we're running in a Hugging Face Space and if SPACES_ZERO_GPU is enabled | |
# IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1" | |
# IS_SPACE = os.environ.get("SPACE_ID", None) is not None | |
# Determine the device (GPU if available, else CPU) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1" | |
print(f"Using device: {device}") | |
print(f"Low memory mode: {LOW_MEMORY}") | |
app = Flask(__name__) | |
# Get Hugging Face token from environment variables | |
HF_TOKEN = os.environ.get('HF_TOKEN') | |
# Load the model and processor | |
model_name = "meta-llama/Llama-3.2-11B-Vision-Instruct" | |
model = MllamaForConditionalGeneration.from_pretrained( | |
model_name, | |
use_auth_token=HF_TOKEN, | |
torch_dtype=torch.bfloat16 if device == "cuda" else torch.float32, | |
device_map="auto" if device == "cuda" else None, # Use device mapping if CUDA is available | |
) | |
# Move the model to the appropriate device (GPU if available) | |
# model.to(device) | |
processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN) | |
# @spaces.GPU # Use the free GPU provided by Hugging Face Spaces | |
# def predict(image, text): | |
# # Prepare the input messages | |
# messages = [ | |
# {"role": "user", "content": [ | |
# {"type": "image"}, # Specify that an image is provided | |
# {"type": "text", "text": text} # Add the user-provided text input | |
# ]} | |
# ] | |
# # Create the input text using the processor's chat template | |
# input_text = processor.apply_chat_template(messages, add_generation_prompt=True) | |
# # Process the inputs and move to the appropriate device | |
# inputs = processor(image, input_text, return_tensors="pt").to(device) | |
# # Generate a response from the model | |
# outputs = model.generate(**inputs, max_new_tokens=100) | |
# # Decode the output to return the final response | |
# response = processor.decode(outputs[0], skip_special_tokens=True) | |
# return response | |
def extract_text_from_pdf(pdf_url): | |
try: | |
response = requests.get(pdf_url) | |
response.raise_for_status() | |
with tempfile.NamedTemporaryFile(delete=False) as temp_pdf: | |
temp_pdf.write(response.content) | |
temp_pdf_path = temp_pdf.name | |
reader = PdfReader(temp_pdf_path) | |
text = "" | |
for page in reader.pages: | |
text += page.extract_text() | |
os.remove(temp_pdf_path) | |
return text | |
except Exception as e: | |
raise ValueError(f"Error extracting text from PDF: {str(e)}") | |
# raise HTTPException(status_code=400, detail=f"Error extracting text from PDF: {str(e)}") | |
def predict_text(text): | |
# pdf_text = extract_text_from_pdf('https://arinsight.co/2024_FA_AEC_1200_GR1_GR2.pdf') | |
text_combined = text # + "\n\nExtracted Text from PDF:\n" + pdf_text | |
# Prepare the input messages | |
messages = [{"role": "user", "content": [{"type": "text", "text": text_combined}]}] | |
# Create the input text using the processor's chat template | |
input_text = processor.apply_chat_template(messages, add_generation_prompt=True) | |
# Process the inputs and move to the appropriate device | |
# inputs = processor(image, input_text, return_tensors="pt").to(device) | |
inputs = processor(text=input_text, return_tensors="pt").to("cuda") | |
# Generate a response from the model | |
# outputs = model.generate(**inputs, max_new_tokens=1024) | |
# # Decode the output to return the final response | |
# response = processor.decode(outputs[0], skip_special_tokens=True, skip_prompt=True) | |
streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True) | |
generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048) | |
generated_text = "" | |
thread = Thread(target=model.generate, kwargs=generation_kwargs) | |
thread.start() | |
buffer = "" | |
for new_text in streamer: | |
buffer += new_text | |
# generated_text_without_prompt = buffer | |
# # time.sleep(0.01) | |
# yield buffer | |
return buffer | |
PROMPT = ( | |
"Extract the following information from the provided text ONLY " | |
"Course Code, Course Name, Credit, Delivery method, Course description, and Topical outline and do not add anything else except the information available in this text. " | |
) | |
def home(): | |
return jsonify({"message": "Welcome to the PDF Extraction API. Use the /extract endpoint to extract information."}) | |
def favicon(): | |
return "", 204 | |
def extract_info(): | |
data = request.json | |
if not data or "url" not in data: | |
return jsonify({"error": "Please provide a PDF URL in the request body."}), 400 | |
pdf_url = data["url"] | |
try: | |
pdf_text = extract_text_from_pdf(pdf_url) | |
prompt = f"{PROMPT}\n\n{pdf_text}" | |
response = predict_text(prompt) | |
return jsonify({"extracted_info": response}) | |
except Exception as e: | |
return jsonify({"error": str(e)}), 500 | |
if __name__ == "__main__": | |
app.run(host="0.0.0.0", port=7860) | |
# # Define the Gradio interface | |
# interface = gr.Interface( | |
# fn=predict_text, | |
# inputs=[ | |
# # gr.Image(type="pil", label="Image Input"), # Image input with label | |
# gr.Textbox(label="Text Input") # Textbox input with label | |
# ], | |
# outputs=gr.Textbox(label="Generated Response"), # Output with a more descriptive label | |
# title="Llama 3.2 11B Vision Instruct Demo", # Title of the interface | |
# description="This demo uses Meta's Llama 3.2 11B Vision model to generate responses based on an image and text input.", # Short description | |
# theme="compact" # Using a compact theme for a cleaner look | |
# ) | |
# # Launch the interface | |
# interface.launch(debug=True) |