testing-groq

Sleeping

App Files Files Community

khurrameycon commited on Nov 27, 2024

Commit

b5c4f55

verified ·

1 Parent(s): ea3c5b4

Image added

Browse files

Files changed (1) hide show

app.py +64 -1

app.py CHANGED Viewed

@@ -62,6 +62,56 @@ processor = AutoProcessor.from_pretrained(model_name, use_auth_token=HF_TOKEN)
 #     response = processor.decode(outputs[0], skip_special_tokens=True)
 #     return response
 def extract_text_from_pdf(pdf_url):
     try:
         response = requests.get(pdf_url)
@@ -128,6 +178,11 @@ PROMPT = (
 PROMPT_SKILLS = (
     "Extract the Course name and Primary Skills from this text. "
 )
 @app.route("/", methods=["GET"])
 def home():
     return jsonify({"message": "Welcome to the PDF Extraction API. Use the /extract endpoint to extract information."})
@@ -147,10 +202,18 @@ def extract_info():
         pdf_text = extract_text_from_pdf(pdf_url)
         prompt = f"{PROMPT}\n\n{pdf_text}"
         response = predict_text(prompt)
         if data["skills"] == True:
             prompt_skills = f"{PROMPT_SKILLS}\n\n{pdf_text}"
             response_skills = predict_text(prompt_skills)
-        return jsonify({"extracted_info": response + response_skills})
     except Exception as e:
         return jsonify({"error": str(e)}), 500

 #     response = processor.decode(outputs[0], skip_special_tokens=True)
 #     return response
+def predict_image(image_url, text):
+    try:
+        # Download the image from the URL
+        response = requests.get(image_url)
+        response.raise_for_status()  # Raise an error for invalid responses
+        image = Image.open(io.BytesIO(response.content)).convert("RGB")
+        # Prepare the input messages
+        messages = [
+            {"role": "user", "content": [
+                {"type": "image"},  # Specify that an image is provided
+                {"type": "text", "text": text}  # Add the user-provided text input
+            ]}
+        ]
+        # Create the input text using the processor's chat template
+        input_text = processor.apply_chat_template(messages, add_generation_prompt=True)
+        # Process the inputs and move to the appropriate device
+        inputs = processor(image=image, text=input_text, return_tensors="pt").to("cuda")
+        # Generate a response from the model
+        # outputs = model.generate(**inputs, max_new_tokens=100)
+        # # Decode the output to return the final response
+        # response = processor.decode(outputs[0], skip_special_tokens=True)
+        streamer = TextIteratorStreamer(processor, skip_special_tokens=True, skip_prompt=True)
+        generation_kwargs = dict(inputs, streamer=streamer, max_new_tokens=2048)
+        generated_text = ""
+        thread = Thread(target=model.generate, kwargs=generation_kwargs)
+        thread.start()
+        buffer = ""
+        for new_text in streamer:
+            buffer += new_text
+            # generated_text_without_prompt = buffer
+            # # time.sleep(0.01)
+            # yield buffer
+        return buffer
+        # return response
+    except Exception as e:
+        raise ValueError(f"Error during prediction: {str(e)}")
 def extract_text_from_pdf(pdf_url):
     try:
         response = requests.get(pdf_url)
 PROMPT_SKILLS = (
     "Extract the Course name and Primary Skills from this text. "
 )
+PROMPT_IMAGE = (
+    "Extract the Student Name and transferred credits from this image "
+)
 @app.route("/", methods=["GET"])
 def home():
     return jsonify({"message": "Welcome to the PDF Extraction API. Use the /extract endpoint to extract information."})
         pdf_text = extract_text_from_pdf(pdf_url)
         prompt = f"{PROMPT}\n\n{pdf_text}"
         response = predict_text(prompt)
         if data["skills"] == True:
             prompt_skills = f"{PROMPT_SKILLS}\n\n{pdf_text}"
             response_skills = predict_text(prompt_skills)
+        if data["img_url"] is not None:
+            prompt_skills = f"{PROMPT_IMAGE}\n"
+            response_image = predict_text(prompt_skills)
+        else
+            response_image = ''
+        return jsonify({"extracted_info": response + response_skills + response_image})
     except Exception as e:
         return jsonify({"error": str(e)}), 500