TBMOPS_GENAI / app.py
VishalD1234's picture
Update app.py
f325eee verified
raw
history blame
9.83 kB
import gradio as gr
import io
import numpy as np
import torch
from decord import cpu, VideoReader, bridge
from transformers import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig
MODEL_PATH = "THUDM/cogvlm2-llama3-caption"
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
TORCH_TYPE = torch.bfloat16 if torch.cuda.is_available() and torch.cuda.get_device_capability()[0] >= 8 else torch.float16
DELAY_REASONS = {
"Step 1": ["Delay in Bead Insertion","Lack of raw material"],
"Step 2": ["Inner Liner Adjustment by Technician","Person rebuilding defective Tire Sections"],
"Step 3": ["Manual Adjustment in Ply1 apply","Technician repairing defective Tire Sections"],
"Step 4": ["Delay in Bead set","Lack of raw material"],
"Step 5": ["Delay in Turnup","Lack of raw material"],
"Step 6": ["Person Repairing sidewall","Person rebuilding defective Tire Sections"],
"Step 7": ["Delay in sidewall stitching","Lack of raw material"],
"Step 8": ["No person available to load Carcass","No person available to collect tire"]
}
def load_video(video_data, strategy='chat'):
"""Loads and processes video data into a format suitable for model input."""
bridge.set_bridge('torch')
num_frames = 24
if isinstance(video_data, str):
decord_vr = VideoReader(video_data, ctx=cpu(0))
else:
decord_vr = VideoReader(io.BytesIO(video_data), ctx=cpu(0))
frame_id_list = []
total_frames = len(decord_vr)
timestamps = [i[0] for i in decord_vr.get_frame_timestamp(np.arange(total_frames))]
max_second = round(max(timestamps)) + 1
for second in range(max_second):
closest_num = min(timestamps, key=lambda x: abs(x - second))
index = timestamps.index(closest_num)
frame_id_list.append(index)
if len(frame_id_list) >= num_frames:
break
video_data = decord_vr.get_batch(frame_id_list)
video_data = video_data.permute(3, 0, 1, 2)
return video_data
def load_model():
"""Loads the pre-trained model and tokenizer with quantization configurations."""
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=TORCH_TYPE,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4"
)
tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
MODEL_PATH,
torch_dtype=TORCH_TYPE,
trust_remote_code=True,
quantization_config=quantization_config,
device_map="auto"
).eval()
return model, tokenizer
def predict(prompt, video_data, temperature, model, tokenizer):
"""Generates predictions based on the video and textual prompt."""
video = load_video(video_data, strategy='chat')
inputs = model.build_conversation_input_ids(
tokenizer=tokenizer,
query=prompt,
images=[video],
history=[],
template_version='chat'
)
inputs = {
'input_ids': inputs['input_ids'].unsqueeze(0).to(DEVICE),
'token_type_ids': inputs['token_type_ids'].unsqueeze(0).to(DEVICE),
'attention_mask': inputs['attention_mask'].unsqueeze(0).to(DEVICE),
'images': [[inputs['images'][0].to(DEVICE).to(TORCH_TYPE)]],
}
gen_kwargs = {
"max_new_tokens": 2048,
"pad_token_id": 128002,
"top_k": 1,
"do_sample": False,
"top_p": 0.1,
"temperature": temperature,
}
with torch.no_grad():
outputs = model.generate(**inputs, **gen_kwargs)
outputs = outputs[:, inputs['input_ids'].shape[1]:]
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
return response
def get_analysis_prompt(step_number, possible_reasons):
"""
Constructs the prompt for analyzing delay reasons based on the selected step.
Args:
step_number (int): The manufacturing step number being analyzed.
possible_reasons (list): A list of possible delay reasons for this step.
Returns:
str: A detailed analysis prompt tailored to the given step and reasons.
"""
return f"""
You are a highly advanced AI expert system specialized in analyzing manufacturing processes to diagnose production delays. Your task is to review video footage from Step {step_number} of a tire manufacturing process, where a delay has been identified. Based on visual evidence, your objective is to determine the most accurate reason for the delay from the provided options.
### Task Context:
- Manufacturing Step: {step_number}
- Delay Detected: Yes
- Possible Reasons for Delay: {', '.join(possible_reasons)}
### Required Analysis:
Carefully observe the video footage frame by frame, focusing on the following key areas to determine the cause of the delay:
1. **Technician Presence and Activity:**
- Verify whether a technician or worker is visible in the footage.
- If no technician is observed, the delay may be due to their absence.
- If a technician is visible, analyze their actions:
- If they are collecting or loading a carcass, ensure the process is efficient and within standard time limits.
- If they are observed repairing the inner liner or sidewall, this indicates material or application issues as the cause of the delay.
2. **Material or Process Anomalies:**
- Look for visible defects, such as misaligned layers, improperly applied materials, or damaged components.
- Check for any signs of manual intervention, such as a technician adjusting or repatching layers.
- Identify issues with the machine operation, such as pauses, misfeeds, or alignment problems.
3. **Equipment Functionality:**
- Detect if machinery is operating below standard speed, stopping unexpectedly, or failing to perform its task (e.g., applying materials, stitching).
4. **Process-Specific Observations:**
- Determine if the technician is waiting for materials, which may indicate supply chain interruptions.
- Check for excessive manual handling, which could signal inadequate automation or equipment failure.
### Output Requirements:
Provide your analysis in the following structured format:
1. **Selected Reason**: [Clearly state the most likely reason for the delay from the given options.]
2. **Visual Evidence**: [Detail specific observations that support your conclusion, such as technician actions, material issues, or machine behavior.]
3. **Reasoning**: [Explain why this reason is most consistent with the evidence. Include links between specific observations and the identified delay reason.]
4. **Alternative Analysis**: [Summarize why other possible reasons are less likely, referencing specific observations or lack of evidence.]
5. **Recommendations**: [Provide actionable suggestions to resolve the delay cause, such as adjustments to machinery, improved training, or material quality checks.]
### Important Considerations:
- Base your analysis strictly on observable evidence from the video.
- Do not make assumptions not supported by visual data.
- If the evidence is inconclusive, state this explicitly and suggest further investigative actions.
Note: Pay particular attention to technician interactions with the inner liner repairing, sidewall repairing, and carcass handling, as these are critical indicators of delay causes.
"""
# Load model globally
model, tokenizer = load_model()
def inference(video, step_number):
"""Analyzes video to predict the most likely cause of delay in the selected manufacturing step."""
try:
if not video:
return "Please upload a video first."
possible_reasons = DELAY_REASONS[step_number]
prompt = get_analysis_prompt(step_number, possible_reasons)
temperature = 0.8
response = predict(prompt, video, temperature, model, tokenizer)
return response
except Exception as e:
return f"An error occurred during analysis: {str(e)}"
def create_interface():
"""Creates the Gradio interface for the Manufacturing Delay Analysis System with examples."""
with gr.Blocks() as demo:
gr.Markdown("""
# Manufacturing Delay Analysis System
Upload a video of the manufacturing step and select the step number.
The system will analyze the video and determine the most likely cause of delay.
""")
with gr.Row():
with gr.Column():
video = gr.Video(label="Upload Manufacturing Video", sources=["upload"])
step_number = gr.Dropdown(
choices=list(DELAY_REASONS.keys()),
label="Manufacturing Step"
)
analyze_btn = gr.Button("Analyze Delay", variant="primary")
with gr.Column():
output = gr.Textbox(label="Analysis Result", lines=10)
# Add examples
examples = [
["7838_step2_2_eval.mp4", "Step 2"],
["7838_step6_2_eval.mp4", "Step 6"],
["7838_step8_1_eval.mp4", "Step 8"],
["7993_step6_3_eval.mp4", "Step 6"],
["7993_step8_3_eval.mp4", "Step 8"]
]
gr.Examples(
examples=examples,
inputs=[video, step_number],
cache_examples=False
)
analyze_btn.click(
fn=inference,
inputs=[video, step_number],
outputs=[output]
)
return demo
if __name__ == "__main__":
demo = create_interface()
demo.queue().launch(share=True)