Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files
llm.py
ADDED
@@ -0,0 +1,126 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
from transformers import pipeline
|
3 |
+
|
4 |
+
class StyleSavvy:
|
5 |
+
def __init__(
|
6 |
+
self,
|
7 |
+
model_name: str = "google/flan-t5-large",
|
8 |
+
device: int = -1, # -1 = CPU, or GPU index
|
9 |
+
max_length: int = 150,
|
10 |
+
):
|
11 |
+
# A local instruction-tuned T5 model
|
12 |
+
self.pipe = pipeline(
|
13 |
+
"text2text-generation",
|
14 |
+
model=model_name,
|
15 |
+
tokenizer=model_name,
|
16 |
+
device=device,
|
17 |
+
)
|
18 |
+
self.max_length = max_length
|
19 |
+
# TODO: Modification: Add more prompts to the advise function
|
20 |
+
# to make it more specific to the user's needs.
|
21 |
+
# The function now takes in the user's body type, face shape, and occasion
|
22 |
+
# and generates style tips accordingly.
|
23 |
+
|
24 |
+
def advise(self, items, body_type, face_shape, occasion):
|
25 |
+
prompt = (
|
26 |
+
f"The user is {body_type}-shaped with a {face_shape} face, "
|
27 |
+
f"attending a {occasion}. They are wearing: "
|
28 |
+
+ ", ".join(i["label"] for i in items)
|
29 |
+
+ ".\n\nPlease list 5 concise style tips as bullet points:"
|
30 |
+
)
|
31 |
+
|
32 |
+
# Generate with supported args only
|
33 |
+
result = self.pipe(
|
34 |
+
prompt,
|
35 |
+
max_length=self.max_length,
|
36 |
+
num_beams=4,
|
37 |
+
early_stopping=True,
|
38 |
+
do_sample=False
|
39 |
+
)[0]["generated_text"].strip()
|
40 |
+
|
41 |
+
return result
|
42 |
+
|
43 |
+
|
44 |
+
|
45 |
+
|
46 |
+
# import torch
|
47 |
+
|
48 |
+
# # models/llm.py
|
49 |
+
|
50 |
+
# # models/llm.py
|
51 |
+
|
52 |
+
# import os
|
53 |
+
# from typing import List
|
54 |
+
# from transformers import pipeline, Pipeline
|
55 |
+
|
56 |
+
# # Force CPU modes (avoid any MPS/CUDA issues on macOS)
|
57 |
+
# os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
58 |
+
# os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "0"
|
59 |
+
|
60 |
+
# class StyleSavvy:
|
61 |
+
# def __init__(
|
62 |
+
# self,
|
63 |
+
# model_name: str = "openlm-research/open_llama_3b_v2",
|
64 |
+
# device: int = -1, # -1 = CPU
|
65 |
+
# max_new_tokens: int = 100,
|
66 |
+
# temperature: float = 0.7,
|
67 |
+
# top_p: float = 0.9,
|
68 |
+
# ):
|
69 |
+
# """
|
70 |
+
# Uses OpenLLaMA-3B-v2 (≈3B params) for fast, local inference.
|
71 |
+
# """
|
72 |
+
# # Setup a causal text-generation pipeline
|
73 |
+
# self.pipe: Pipeline = pipeline(
|
74 |
+
# "text-generation",
|
75 |
+
# model=model_name,
|
76 |
+
# tokenizer=model_name,
|
77 |
+
# device=device,
|
78 |
+
# )
|
79 |
+
# # GPT‐style models need a pad token to avoid warnings
|
80 |
+
# if self.pipe.tokenizer.pad_token_id is None:
|
81 |
+
# self.pipe.tokenizer.pad_token = self.pipe.tokenizer.eos_token
|
82 |
+
|
83 |
+
# self.max_new_tokens = max_new_tokens
|
84 |
+
# self.temperature = temperature
|
85 |
+
# self.top_p = top_p
|
86 |
+
|
87 |
+
# def advise(
|
88 |
+
# self,
|
89 |
+
# items: List[str],
|
90 |
+
# body_type: str,
|
91 |
+
# face_shape: str,
|
92 |
+
# occasion: str
|
93 |
+
# ) -> List[str]:
|
94 |
+
# """
|
95 |
+
# Builds a strict instruction prompt and returns exactly five "- " bullets.
|
96 |
+
# """
|
97 |
+
# labels = ", ".join(items) if items else "an outfit"
|
98 |
+
# prompt = (
|
99 |
+
# "You are a professional fashion consultant.\n"
|
100 |
+
# f"The user is {body_type}-shaped with a {face_shape} face, attending {occasion}.\n"
|
101 |
+
# f"They are wearing: {labels}.\n\n"
|
102 |
+
# "Please provide exactly five concise style tips, each on its own line, "
|
103 |
+
# "and starting with \"- \". No extra text."
|
104 |
+
# )
|
105 |
+
|
106 |
+
# # Generate
|
107 |
+
# output = self.pipe(
|
108 |
+
# prompt,
|
109 |
+
# max_new_tokens=self.max_new_tokens,
|
110 |
+
# do_sample=True,
|
111 |
+
# temperature=self.temperature,
|
112 |
+
# top_p=self.top_p,
|
113 |
+
# return_full_text=False,
|
114 |
+
# )[0]["generated_text"]
|
115 |
+
|
116 |
+
# # Extract bullets
|
117 |
+
# tips = [ln.strip() for ln in output.splitlines() if ln.strip().startswith("- ")]
|
118 |
+
# # Fallback: split on sentences if fewer than 5 bullets
|
119 |
+
# if len(tips) < 5:
|
120 |
+
# candidates = [s.strip() for s in output.replace("\n"," ").split(".") if s.strip()]
|
121 |
+
# tips = [f"- {candidates[i]}" for i in range(min(5, len(candidates)))]
|
122 |
+
|
123 |
+
# return tips[:5]
|
124 |
+
|
125 |
+
|
126 |
+
|
vision.py
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
# models/vision.py -- Working
|
3 |
+
|
4 |
+
from transformers import pipeline
|
5 |
+
from PIL import Image
|
6 |
+
|
7 |
+
class VisionModel:
|
8 |
+
def __init__(
|
9 |
+
self,
|
10 |
+
model_name: str = "valentinafeve/yolos-fashionpedia",
|
11 |
+
threshold: float = 0.7
|
12 |
+
):
|
13 |
+
self.pipe = pipeline("object-detection", model=model_name)
|
14 |
+
self.threshold = threshold
|
15 |
+
|
16 |
+
def detect(self, image: Image.Image):
|
17 |
+
# 1) Ensure RGB
|
18 |
+
if image.mode != "RGB":
|
19 |
+
image = image.convert("RGB")
|
20 |
+
|
21 |
+
# 2) Run detection
|
22 |
+
results = self.pipe(image)
|
23 |
+
|
24 |
+
# 3) Process & filter
|
25 |
+
processed = []
|
26 |
+
for r in results:
|
27 |
+
score = float(r["score"])
|
28 |
+
if score < self.threshold:
|
29 |
+
continue
|
30 |
+
|
31 |
+
# r["box"] is a dict: {"xmin":..., "ymin":..., "xmax":..., "ymax":...}
|
32 |
+
box = r["box"]
|
33 |
+
coords = [
|
34 |
+
float(box["xmin"]),
|
35 |
+
float(box["ymin"]),
|
36 |
+
float(box["xmax"]),
|
37 |
+
float(box["ymax"]),
|
38 |
+
]
|
39 |
+
|
40 |
+
processed.append({
|
41 |
+
"label": r["label"],
|
42 |
+
"score": score,
|
43 |
+
"box": coords
|
44 |
+
})
|
45 |
+
|
46 |
+
return processed
|
47 |
+
|
48 |
+
|
49 |
+
|
50 |
+
|