Spaces:

annie08
/

Imagecapt

Sleeping

App Files Files Community

annie08 commited on Nov 8, 2024

Commit

d9a9b93

1 Parent(s): f10eb97

changed shape

Browse files

Files changed (1) hide show

app.py +12 -8

app.py CHANGED Viewed

@@ -11,7 +11,6 @@ from tensorflow.keras.preprocessing.sequence import pad_sequences
 mobilenet_model = MobileNetV2(weights="imagenet", include_top=False, pooling='avg')
 mobilenet_model = Model(inputs=mobilenet_model.inputs, outputs=mobilenet_model.output)
 # Load the trained captioning model
 model = tf.keras.models.load_model("model_9.h5")
@@ -33,23 +32,28 @@ def get_word_from_index(index, tokenizer):
 # Preprocess image and extract features
 def preprocess_image(image):
-    image = image.resize((224, 224))
     image_array = np.array(image)
-    image_array = np.expand_dims(image_array, axis=0)
-    image_array = preprocess_input(image_array)
-    return mobilenet_model.predict(image_array, verbose=0)
 # Generate caption from the image features
 def generate_caption(image):
     image_features = preprocess_image(image)
     caption = start_token
     for _ in range(max_caption_length):
-        sequence = tokenizer.texts_to_sequences([caption])[0]
-        sequence = pad_sequences([sequence], maxlen=max_caption_length)
         yhat = model.predict([image_features, sequence], verbose=0)
-        predicted_index = np.argmax(yhat)
         predicted_word = get_word_from_index(predicted_index, tokenizer)
         # If no valid word or end token is predicted, stop generation

 mobilenet_model = MobileNetV2(weights="imagenet", include_top=False, pooling='avg')
 mobilenet_model = Model(inputs=mobilenet_model.inputs, outputs=mobilenet_model.output)
 # Load the trained captioning model
 model = tf.keras.models.load_model("model_9.h5")
 # Preprocess image and extract features
 def preprocess_image(image):
+    image = image.resize((224, 224))  # Resize image to 224x224 for MobileNetV2
     image_array = np.array(image)
+    image_array = np.expand_dims(image_array, axis=0)  # Add batch dimension
+    image_array = preprocess_input(image_array)  # Normalize image for MobileNetV2
+    return mobilenet_model.predict(image_array, verbose=0)  # Extract features
 # Generate caption from the image features
 def generate_caption(image):
+    # Extract image features using MobileNetV2
     image_features = preprocess_image(image)
+    # Reshape to match the expected input shape for the captioning model (1, 2048)
+    image_features = image_features.reshape((1, 2048))
     caption = start_token
     for _ in range(max_caption_length):
+        sequence = tokenizer.texts_to_sequences([caption])[0]  # Convert caption to sequence
+        sequence = pad_sequences([sequence], maxlen=max_caption_length)  # Pad sequence
+        # Predict the next word in the sequence
         yhat = model.predict([image_features, sequence], verbose=0)
+        predicted_index = np.argmax(yhat)  # Get the index of the predicted word
         predicted_word = get_word_from_index(predicted_index, tokenizer)
         # If no valid word or end token is predicted, stop generation