Spaces:
Running
Running
import streamlit as st | |
import cv2 | |
import numpy as np | |
import re | |
import os | |
import pandas as pd | |
from PIL import Image | |
import time | |
from ultralytics import YOLO | |
from paddleocr import PaddleOCR, draw_ocr | |
st.title("Nutri-Grade Label Detection & Grade Calculator") | |
# ----------------------------------------------- | |
# Info & Petunjuk Penggunaan | |
# ----------------------------------------------- | |
with st.expander("Info & Petunjuk Penggunaan"): | |
st.markdown(""" | |
**Deskripsi Aplikasi:** | |
Aplikasi ini membantu Anda mendeteksi dan mengekstrak informasi tabel gizi dari gambar label nutrisi, melakukan normalisasi nilai nutrisi per 100 g/ml, dan menghitung Nutri-Grade sesuai dengan standar resmi (Rev. Juni 2023). | |
**Fitur Utama:** | |
- Deteksi objek label nutrisi dengan YOLO. | |
- Ekstraksi teks dengan PaddleOCR, mendukung format "key: value". | |
- Normalisasi nilai nutrisi (Gula dan Lemak Jenuh) per 100 g/ml. | |
- Perhitungan grade berdasarkan threshold: | |
• Gula: Grade A ≤ 1g, B: >1-5g, C: >5-10g, D: >10g per 100 ml. | |
• Lemak Jenuh: Grade A ≤ 0.7g, B: >0.7-1.2g, C: >1.2-2.8g, D: >2.8g per 100 ml. | |
• **Grade akhir diambil dari nilai terburuk antara gula dan lemak jenuh.** | |
**Cara Penggunaan:** | |
1. Upload gambar label nutrisi (JPG/PNG). | |
2. Sistem mendeteksi objek dan mengekstrak nilai nutrisi. | |
3. Periksa dan koreksi nilai secara manual jika diperlukan. | |
4. Klik *Hitung* untuk melihat tabel normalisasi dan grade. | |
""") | |
with st.expander("!! Tolong Diperhatikan !!"): | |
st.markdown(""" | |
Labelisasi di bawah hanya sebagai gambaran umum. Perlu riset lebih lanjut untuk akurasi. | |
**Pengembangan:** | |
- Konsultasi dengan nutritionist untuk parameter yang lebih tepat. | |
- Integrasi informasi halal, kalori, dan fitur interaktif (misal: chatbot). | |
""") | |
# Fungsi untuk membersihkan nilai numerik (contoh: "15g" → 15.0) | |
def parse_numeric_value(text): | |
cleaned = re.sub(r"[^\d\.\-]", "", text) | |
try: | |
return float(cleaned) | |
except ValueError: | |
return 0.0 | |
# Inisialisasi model YOLO dan PaddleOCR | |
trained_model_path = "best.pt" # Pastikan file model YOLO ada di working directory | |
yolo_model = YOLO(trained_model_path) | |
ocr_model = PaddleOCR(use_gpu=True, lang='en', cls=True) | |
# --- STEP 1: Upload Gambar --- | |
uploaded_file = st.file_uploader("Upload Gambar (JPG/PNG)", type=["jpg", "jpeg", "png"]) | |
if uploaded_file is not None: | |
file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8) | |
img = cv2.imdecode(file_bytes, 1) | |
st.image(cv2.cvtColor(img, cv2.COLOR_BGR2RGB), caption="Gambar yang diupload", use_column_width=True) | |
img_path = "uploaded_image.jpg" | |
cv2.imwrite(img_path, img) | |
# --- STEP 2: Object Detection & Crop dengan YOLO --- | |
st.write("Melakukan object detection dengan YOLO dan crop region...") | |
yolo_results = yolo_model.predict(source=img_path, conf=0.5) | |
crop_images = [] | |
boxes = yolo_results[0].boxes | |
for i, box in enumerate(boxes): | |
x1, y1, x2, y2 = box.xyxy[0].cpu().numpy().astype(int) | |
cropped = img[y1:y2, x1:x2] | |
crop_filename = f"crop_{i}.jpg" | |
cv2.imwrite(crop_filename, cropped) | |
crop_images.append((crop_filename, cropped)) | |
st.success("Proses crop bounding box selesai!") | |
st.write("Jumlah crop yang ditemukan:", len(crop_images)) | |
for crop_filename, cropped in crop_images: | |
st.image(cv2.cvtColor(cropped, cv2.COLOR_BGR2RGB), caption=f"Crop: {crop_filename}", use_column_width=True) | |
# --- STEP 3: OCR pada Gambar Penuh --- | |
st.write("Melakukan OCR pada gambar penuh dengan PaddleOCR...") | |
start_time = time.time() | |
ocr_result = ocr_model.ocr(img_path, cls=True) | |
ocr_time = time.time() - start_time | |
st.write(f"Waktu pemrosesan OCR: {ocr_time:.2f} detik") | |
if not ocr_result or len(ocr_result[0]) == 0: | |
st.error("OCR tidak menemukan teks pada gambar!") | |
else: | |
# Ekstrak data OCR | |
ocr_data = ocr_result[0] | |
ocr_list = [] | |
for line in ocr_data: | |
box = line[0] | |
text = line[1][0] | |
score = line[1][1] | |
xs = [pt[0] for pt in box] | |
ys = [pt[1] for pt in box] | |
center_x = sum(xs) / len(xs) | |
center_y = sum(ys) / len(ys) | |
ocr_list.append({ | |
"text": text, | |
"box": box, | |
"score": score, | |
"center_x": center_x, | |
"center_y": center_y, | |
"height": max(ys) - min(ys) | |
}) | |
# Urutkan berdasarkan posisi vertikal | |
ocr_list = sorted(ocr_list, key=lambda x: x["center_y"]) | |
# Ekstrak pasangan key-value dengan format "key: value" | |
# Hanya ekstrak gula, takaran saji, dan lemak jenuh | |
target_keys = { | |
"gula": ["gula"], | |
"takaran saji": ["takaran saji", "serving size"], | |
"lemak jenuh": ["lemak jenuh"] | |
} | |
extracted = {} | |
# Pass 1: Ekstraksi menggunakan tanda titik dua | |
for item in ocr_list: | |
txt_lower = item["text"].lower() | |
if ":" in txt_lower: | |
parts = txt_lower.split(":") | |
key_candidate = parts[0].strip() | |
value_candidate = parts[-1].strip() | |
for canonical, variants in target_keys.items(): | |
for variant in variants: | |
if variant in key_candidate and canonical not in extracted: | |
clean_value = re.sub(r"[^\d\.\-]", "", value_candidate) | |
if clean_value and clean_value != ".": | |
extracted[canonical.capitalize()] = clean_value | |
break | |
# Pass 2: Fallback untuk key yang belum diekstrak | |
for item in ocr_list: | |
txt_lower = item["text"].lower() | |
for canonical, variants in target_keys.items(): | |
if canonical not in extracted: | |
for variant in variants: | |
if variant in txt_lower: | |
key_center = (item["center_x"], item["center_y"]) | |
key_height = item["height"] | |
best_candidate = None | |
min_dx = float('inf') | |
for other in ocr_list: | |
if other == item: | |
continue | |
if other["center_x"] > key_center[0] and abs(other["center_y"] - key_center[1]) < 0.5 * key_height: | |
dx = other["center_x"] - key_center[0] | |
if dx < min_dx: | |
min_dx = dx | |
best_candidate = other | |
if best_candidate: | |
raw_value = best_candidate["text"] | |
clean_value = re.sub(r"[^\d\.\-]", "", raw_value) | |
if clean_value and clean_value != ".": | |
extracted[canonical.capitalize()] = clean_value | |
break | |
if extracted: | |
st.write("**Hasil Ekstraksi Key-Value:**") | |
for k, v in extracted.items(): | |
st.write(f"{k}: {v}") | |
else: | |
st.warning("Tidak ditemukan pasangan key-value yang cocok.") | |
# Tampilkan hasil OCR dengan bounding box untuk referensi | |
boxes_ocr = [line["box"] for line in ocr_list] | |
texts_ocr = [line["text"] for line in ocr_list] | |
scores_ocr = [line["score"] for line in ocr_list] | |
im_show = draw_ocr(Image.open(img_path).convert("RGB"), boxes_ocr, texts_ocr, scores_ocr, font_path="simfang.ttf") | |
im_show = Image.fromarray(im_show) | |
st.image(im_show, caption="Hasil OCR dengan Bounding Boxes", use_column_width=True) | |
# --- Koreksi Manual dengan st.form --- | |
with st.form("correction_form"): | |
st.write("Silakan koreksi nilai jika diperlukan (hanya angka, tanpa satuan):") | |
corrected_data = {} | |
for key in target_keys.keys(): | |
key_cap = key.capitalize() | |
current_val = str(parse_numeric_value(extracted.get(key_cap, ""))) if key_cap in extracted else "" | |
new_val = st.text_input(f"{key_cap}", value=current_val) | |
corrected_data[key_cap] = new_val | |
submit_button = st.form_submit_button("Hitung") | |
if submit_button: | |
try: | |
serving_size = parse_numeric_value(corrected_data.get("Takaran saji", "100")) | |
except: | |
serving_size = 0.0 | |
# Ambil nilai nutrisi (hanya gula dan lemak jenuh) | |
sugar_value = parse_numeric_value(corrected_data.get("Gula", "0")) | |
fat_value = parse_numeric_value(corrected_data.get("Lemak jenuh", "0")) | |
if serving_size > 0: | |
sugar_norm = (sugar_value / serving_size) * 100 | |
fat_norm = (fat_value / serving_size) * 100 | |
else: | |
st.error("Takaran saji tidak valid untuk normalisasi.") | |
sugar_norm, fat_norm = sugar_value, fat_value | |
st.write("**Tabel Hasil Normalisasi per 100 g/ml**") | |
data_tabel = { | |
"Nutrisi": ["Gula", "Lemak jenuh"], | |
"Nilai (per 100 g/ml)": [sugar_norm, fat_norm] | |
} | |
df_tabel = pd.DataFrame(data_tabel) | |
st.table(df_tabel) | |
# Fungsi untuk menghitung grade berdasarkan threshold | |
def grade_from_value(value, thresholds): | |
if value <= thresholds["A"]: | |
return "Grade A" | |
elif value <= thresholds["B"]: | |
return "Grade B" | |
elif value <= thresholds["C"]: | |
return "Grade C" | |
else: | |
return "Grade D" | |
# Threshold sesuai panduan Nutri-Grade (g/100ml) | |
thresholds_sugar = {"A": 1.0, "B": 5.0, "C": 10.0} | |
thresholds_fat = {"A": 0.7, "B": 1.2, "C": 2.8} | |
sugar_grade = grade_from_value(sugar_norm, thresholds_sugar) | |
fat_grade = grade_from_value(fat_norm, thresholds_fat) | |
# Grade akhir diambil dari nilai terburuk (nilai maksimum skor) | |
grade_scores = {"Grade A": 1, "Grade B": 2, "Grade C": 3, "Grade D": 4} | |
worst_score = max(grade_scores[sugar_grade], grade_scores[fat_grade]) | |
inverse_scores = {v: k for k, v in grade_scores.items()} | |
final_grade = inverse_scores[worst_score] | |
st.write(f"**Grade Gula:** {sugar_grade}") | |
st.write(f"**Grade Lemak Jenuh:** {fat_grade}") | |
st.write(f"**Grade Akhir:** {final_grade}") | |
def color_grade(grade_text): | |
if grade_text == "Grade A": | |
bg_color = "#2ecc71" | |
elif grade_text == "Grade B": | |
bg_color = "#f1c40f" | |
elif grade_text == "Grade C": | |
bg_color = "#e67e22" | |
else: | |
bg_color = "#e74c3c" | |
return f""" | |
<div style=" | |
background-color: {bg_color}; | |
padding: 10px; | |
border-radius: 5px; | |
margin-top: 10px; | |
font-weight: bold; | |
color: white; | |
text-align: center; | |
"> | |
{grade_text} | |
</div> | |
""" | |
st.markdown(color_grade(final_grade), unsafe_allow_html=True) | |