Nutri-Label / app.py
YourAIEngineer's picture
Upload 4 files
7037b42 verified
raw
history blame
11.4 kB
import streamlit as st
import cv2
import numpy as np
import re
import os
import pandas as pd
from PIL import Image
import time
from ultralytics import YOLO
from paddleocr import PaddleOCR, draw_ocr
st.title("Nutri-Grade Label Detection & Grade Calculator")
# -----------------------------------------------
# Info & Petunjuk Penggunaan
# -----------------------------------------------
with st.expander("Info & Petunjuk Penggunaan"):
st.markdown("""
**Deskripsi Aplikasi:**
Aplikasi ini membantu Anda mendeteksi dan mengekstrak informasi tabel gizi dari gambar label nutrisi, melakukan normalisasi nilai nutrisi per 100 g/ml, dan menghitung Nutri-Grade sesuai dengan standar resmi (Rev. Juni 2023).
**Fitur Utama:**
- Deteksi objek label nutrisi dengan YOLO.
- Ekstraksi teks dengan PaddleOCR, mendukung format "key: value".
- Normalisasi nilai nutrisi (Gula dan Lemak Jenuh) per 100 g/ml.
- Perhitungan grade berdasarkan threshold:
• Gula: Grade A ≤ 1g, B: >1-5g, C: >5-10g, D: >10g per 100 ml.
• Lemak Jenuh: Grade A ≤ 0.7g, B: >0.7-1.2g, C: >1.2-2.8g, D: >2.8g per 100 ml.
• **Grade akhir diambil dari nilai terburuk antara gula dan lemak jenuh.**
**Cara Penggunaan:**
1. Upload gambar label nutrisi (JPG/PNG).
2. Sistem mendeteksi objek dan mengekstrak nilai nutrisi.
3. Periksa dan koreksi nilai secara manual jika diperlukan.
4. Klik *Hitung* untuk melihat tabel normalisasi dan grade.
""")
with st.expander("!! Tolong Diperhatikan !!"):
st.markdown("""
Labelisasi di bawah hanya sebagai gambaran umum. Perlu riset lebih lanjut untuk akurasi.
**Pengembangan:**
- Konsultasi dengan nutritionist untuk parameter yang lebih tepat.
- Integrasi informasi halal, kalori, dan fitur interaktif (misal: chatbot).
""")
# Fungsi untuk membersihkan nilai numerik (contoh: "15g" → 15.0)
def parse_numeric_value(text):
cleaned = re.sub(r"[^\d\.\-]", "", text)
try:
return float(cleaned)
except ValueError:
return 0.0
# Inisialisasi model YOLO dan PaddleOCR
trained_model_path = "best.pt" # Pastikan file model YOLO ada di working directory
yolo_model = YOLO(trained_model_path)
ocr_model = PaddleOCR(use_gpu=True, lang='en', cls=True)
# --- STEP 1: Upload Gambar ---
uploaded_file = st.file_uploader("Upload Gambar (JPG/PNG)", type=["jpg", "jpeg", "png"])
if uploaded_file is not None:
file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8)
img = cv2.imdecode(file_bytes, 1)
st.image(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), caption="Gambar yang diupload", use_column_width=True)
img_path = "uploaded_image.jpg"
cv2.imwrite(img_path, img)
# --- STEP 2: Object Detection & Crop dengan YOLO ---
st.write("Melakukan object detection dengan YOLO dan crop region...")
yolo_results = yolo_model.predict(source=img_path, conf=0.5)
crop_images = []
boxes = yolo_results[0].boxes
for i, box in enumerate(boxes):
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int)
cropped = img[y1:y2, x1:x2]
crop_filename = f"crop_{i}.jpg"
cv2.imwrite(crop_filename, cropped)
crop_images.append((crop_filename, cropped))
st.success("Proses crop bounding box selesai!")
st.write("Jumlah crop yang ditemukan:", len(crop_images))
for crop_filename, cropped in crop_images:
st.image(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB), caption=f"Crop: {crop_filename}", use_column_width=True)
# --- STEP 3: OCR pada Gambar Penuh ---
st.write("Melakukan OCR pada gambar penuh dengan PaddleOCR...")
start_time = time.time()
ocr_result = ocr_model.ocr(img_path, cls=True)
ocr_time = time.time() - start_time
st.write(f"Waktu pemrosesan OCR: {ocr_time:.2f} detik")
if not ocr_result or len(ocr_result[0]) == 0:
st.error("OCR tidak menemukan teks pada gambar!")
else:
# Ekstrak data OCR
ocr_data = ocr_result[0]
ocr_list = []
for line in ocr_data:
box = line[0]
text = line[1][0]
score = line[1][1]
xs = [pt[0] for pt in box]
ys = [pt[1] for pt in box]
center_x = sum(xs) / len(xs)
center_y = sum(ys) / len(ys)
ocr_list.append({
"text": text,
"box": box,
"score": score,
"center_x": center_x,
"center_y": center_y,
"height": max(ys) - min(ys)
})
# Urutkan berdasarkan posisi vertikal
ocr_list = sorted(ocr_list, key=lambda x: x["center_y"])
# Ekstrak pasangan key-value dengan format "key: value"
# Hanya ekstrak gula, takaran saji, dan lemak jenuh
target_keys = {
"gula": ["gula"],
"takaran saji": ["takaran saji", "serving size"],
"lemak jenuh": ["lemak jenuh"]
}
extracted = {}
# Pass 1: Ekstraksi menggunakan tanda titik dua
for item in ocr_list:
txt_lower = item["text"].lower()
if ":" in txt_lower:
parts = txt_lower.split(":")
key_candidate = parts[0].strip()
value_candidate = parts[-1].strip()
for canonical, variants in target_keys.items():
for variant in variants:
if variant in key_candidate and canonical not in extracted:
clean_value = re.sub(r"[^\d\.\-]", "", value_candidate)
if clean_value and clean_value != ".":
extracted[canonical.capitalize()] = clean_value
break
# Pass 2: Fallback untuk key yang belum diekstrak
for item in ocr_list:
txt_lower = item["text"].lower()
for canonical, variants in target_keys.items():
if canonical not in extracted:
for variant in variants:
if variant in txt_lower:
key_center = (item["center_x"], item["center_y"])
key_height = item["height"]
best_candidate = None
min_dx = float('inf')
for other in ocr_list:
if other == item:
continue
if other["center_x"] > key_center[0] and abs(other["center_y"] - key_center[1]) < 0.5 * key_height:
dx = other["center_x"] - key_center[0]
if dx < min_dx:
min_dx = dx
best_candidate = other
if best_candidate:
raw_value = best_candidate["text"]
clean_value = re.sub(r"[^\d\.\-]", "", raw_value)
if clean_value and clean_value != ".":
extracted[canonical.capitalize()] = clean_value
break
if extracted:
st.write("**Hasil Ekstraksi Key-Value:**")
for k, v in extracted.items():
st.write(f"{k}: {v}")
else:
st.warning("Tidak ditemukan pasangan key-value yang cocok.")
# Tampilkan hasil OCR dengan bounding box untuk referensi
boxes_ocr = [line["box"] for line in ocr_list]
texts_ocr = [line["text"] for line in ocr_list]
scores_ocr = [line["score"] for line in ocr_list]
im_show = draw_ocr(Image.open(img_path).convert("RGB"), boxes_ocr, texts_ocr, scores_ocr, font_path="simfang.ttf")
im_show = Image.fromarray(im_show)
st.image(im_show, caption="Hasil OCR dengan Bounding Boxes", use_column_width=True)
# --- Koreksi Manual dengan st.form ---
with st.form("correction_form"):
st.write("Silakan koreksi nilai jika diperlukan (hanya angka, tanpa satuan):")
corrected_data = {}
for key in target_keys.keys():
key_cap = key.capitalize()
current_val = str(parse_numeric_value(extracted.get(key_cap, ""))) if key_cap in extracted else ""
new_val = st.text_input(f"{key_cap}", value=current_val)
corrected_data[key_cap] = new_val
submit_button = st.form_submit_button("Hitung")
if submit_button:
try:
serving_size = parse_numeric_value(corrected_data.get("Takaran saji", "100"))
except:
serving_size = 0.0
# Ambil nilai nutrisi (hanya gula dan lemak jenuh)
sugar_value = parse_numeric_value(corrected_data.get("Gula", "0"))
fat_value = parse_numeric_value(corrected_data.get("Lemak jenuh", "0"))
if serving_size > 0:
sugar_norm = (sugar_value / serving_size) * 100
fat_norm = (fat_value / serving_size) * 100
else:
st.error("Takaran saji tidak valid untuk normalisasi.")
sugar_norm, fat_norm = sugar_value, fat_value
st.write("**Tabel Hasil Normalisasi per 100 g/ml**")
data_tabel = {
"Nutrisi": ["Gula", "Lemak jenuh"],
"Nilai (per 100 g/ml)": [sugar_norm, fat_norm]
}
df_tabel = pd.DataFrame(data_tabel)
st.table(df_tabel)
# Fungsi untuk menghitung grade berdasarkan threshold
def grade_from_value(value, thresholds):
if value <= thresholds["A"]:
return "Grade A"
elif value <= thresholds["B"]:
return "Grade B"
elif value <= thresholds["C"]:
return "Grade C"
else:
return "Grade D"
# Threshold sesuai panduan Nutri-Grade (g/100ml)
thresholds_sugar = {"A": 1.0, "B": 5.0, "C": 10.0}
thresholds_fat = {"A": 0.7, "B": 1.2, "C": 2.8}
sugar_grade = grade_from_value(sugar_norm, thresholds_sugar)
fat_grade = grade_from_value(fat_norm, thresholds_fat)
# Grade akhir diambil dari nilai terburuk (nilai maksimum skor)
grade_scores = {"Grade A": 1, "Grade B": 2, "Grade C": 3, "Grade D": 4}
worst_score = max(grade_scores[sugar_grade], grade_scores[fat_grade])
inverse_scores = {v: k for k, v in grade_scores.items()}
final_grade = inverse_scores[worst_score]
st.write(f"**Grade Gula:** {sugar_grade}")
st.write(f"**Grade Lemak Jenuh:** {fat_grade}")
st.write(f"**Grade Akhir:** {final_grade}")
def color_grade(grade_text):
if grade_text == "Grade A":
bg_color = "#2ecc71"
elif grade_text == "Grade B":
bg_color = "#f1c40f"
elif grade_text == "Grade C":
bg_color = "#e67e22"
else:
bg_color = "#e74c3c"
return f"""
<div style="
background-color: {bg_color};
padding: 10px;
border-radius: 5px;
margin-top: 10px;
font-weight: bold;
color: white;
text-align: center;
">
{grade_text}
</div>
"""
st.markdown(color_grade(final_grade), unsafe_allow_html=True)