summaryrefslogtreecommitdiff
path: root/cv/holiday_similarity/generate_image_features.py
diff options
context:
space:
mode:
authorzhang <zch921005@126.com>2020-08-08 20:21:47 +0800
committerzhang <zch921005@126.com>2020-08-08 20:21:47 +0800
commit2816f0ecda446dbd902bfab4a13d7bc95b0a5d33 (patch)
tree0aac7bda9692de91327231fa58a4540126548d3d /cv/holiday_similarity/generate_image_features.py
parent8ebc34e31433d73d630d1431acd80ce2e922395b (diff)
holiday similarity update
Diffstat (limited to 'cv/holiday_similarity/generate_image_features.py')
-rw-r--r--cv/holiday_similarity/generate_image_features.py65
1 files changed, 65 insertions, 0 deletions
diff --git a/cv/holiday_similarity/generate_image_features.py b/cv/holiday_similarity/generate_image_features.py
new file mode 100644
index 0000000..6c26995
--- /dev/null
+++ b/cv/holiday_similarity/generate_image_features.py
@@ -0,0 +1,65 @@
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+from PIL import Image
+from keras.applications import imagenet_utils
+from keras.applications import resnet50
+from keras.models import Model
+
+IMAGE_DIR = '/Users/chunhuizhang/workspaces/00_datasets/images/INRIA Holidays dataset /jpg'
+DATA_DIR = './data'
+
+
+def image_batch_generator(image_names, batch_size):
+ num_batches = len(image_names) // batch_size
+ for i in range(num_batches):
+ batch = image_names[i * batch_size: (i + 1) * batch_size]
+ yield batch
+ batch = image_names[(i + 1) * batch_size:]
+ yield batch
+
+
+def vectorize_images(image_dir, image_size, preprocessor,
+ model, vector_file, batch_size=32):
+ image_names = os.listdir(image_dir)
+ num_vecs = 0
+ fvec = open(vector_file, "w")
+ for image_batch in image_batch_generator(image_names, batch_size):
+ batched_images = []
+ for image_name in image_batch:
+ image = plt.imread(os.path.join(image_dir, image_name))
+ # image = imresize(image, (image_size, image_size))
+ image = np.asarray(Image.fromarray(image).resize((image_size, image_size)))
+ batched_images.append(image)
+ X = preprocessor(np.array(batched_images, dtype="float32"))
+ vectors = model.predict(X)
+ for i in range(vectors.shape[0]):
+ if num_vecs % 100 == 0:
+ print("{:d}/{:d} vectors generated".format(num_vecs, vectors.shape[0]))
+ image_vector = ",".join(["{:.5e}".format(v) for v in vectors[i].tolist()])
+ # print(image_batch[i], image_vector)
+ # print(type(image_batch[i]), type(image_vector))
+ fvec.write("{:s}\t{:s}\n".format(image_batch[i], image_vector))
+ num_vecs += 1
+ print("{:d} vectors generated".format(num_vecs))
+ fvec.close()
+
+
+def generate_features(model, image_size, vector_file):
+ model = Model(input=model.input,
+ output=model.get_layer("avg_pool").output)
+ preprocessor = imagenet_utils.preprocess_input
+
+ vectorize_images(IMAGE_DIR, image_size, preprocessor, model, vector_file)
+
+
+if __name__ == '__main__':
+ IMAGE_SIZE = 224
+ # vgg16_model = vgg16.VGG16(weights="imagenet", include_top=True)
+ # VECTOR_FILE = os.path.join(DATA_DIR, "vgg19-vectors.tsv")
+ # generate_features(vgg16_model, IMAGE_SIZE, VECTOR_FILE)
+
+ VECTOR_FILE = os.path.join(DATA_DIR, "resnet-vectors.tsv")
+ resnet_model = resnet50.ResNet50(weights="imagenet", include_top=True)
+ generate_features(resnet_model, IMAGE_SIZE, VECTOR_FILE)