diff options
| -rw-r--r-- | cv/holiday_similarity/__init__.py | 0 | ||||
| -rw-r--r-- | cv/holiday_similarity/data_utils.py | 183 | ||||
| -rw-r--r-- | cv/holiday_similarity/eval_utils.py | 24 | ||||
| -rw-r--r-- | cv/holiday_similarity/generate_image_features.py | 65 | ||||
| -rw-r--r-- | cv/holiday_similarity/image_vectors_utils.py | 125 | ||||
| -rw-r--r-- | cv/holiday_similarity/main.py | 28 | ||||
| -rw-r--r-- | cv/holiday_similarity/model_utils.py | 0 | ||||
| -rw-r--r-- | cv/holiday_similarity/prepare_models.py | 107 | ||||
| -rw-r--r-- | cv/holiday_similarity/pretrained-vec-nn-classifier.py | 171 | ||||
| -rw-r--r-- | cv/holiday_similarity/siamese-finetune.py | 98 | ||||
| -rw-r--r-- | cv/holiday_similarity/vis_utils.py | 19 |
11 files changed, 820 insertions, 0 deletions
diff --git a/cv/holiday_similarity/__init__.py b/cv/holiday_similarity/__init__.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/cv/holiday_similarity/__init__.py diff --git a/cv/holiday_similarity/data_utils.py b/cv/holiday_similarity/data_utils.py new file mode 100644 index 0000000..1601706 --- /dev/null +++ b/cv/holiday_similarity/data_utils.py @@ -0,0 +1,183 @@ +import matplotlib.pyplot as plt +import numpy as np +import os +from PIL import Image +import itertools +from random import shuffle +from keras.utils import np_utils +from keras.applications import imagenet_utils +import itertools +import os +from random import shuffle + +import matplotlib.pyplot as plt +import numpy as np +from PIL import Image +from keras.applications import imagenet_utils +from keras.utils import np_utils + +IMAGE_DIR = '/Users/chunhuizhang/workspaces/00_datasets/images/INRIA Holidays dataset /jpg' +# IMAGE_DIR = os.path.join(DATA_DIR, "holiday-photos") + +image_cache = {} + + +def pair_generator(triples, image_cache, datagens, batch_size=32): + while True: + # shuffle once per batch + indices = np.random.permutation(np.arange(len(triples))) + num_batches = len(triples) // batch_size + for bid in range(num_batches): + batch_indices = indices[bid * batch_size: (bid + 1) * batch_size] + batch = [triples[i] for i in batch_indices] + X1 = np.zeros((batch_size, 224, 224, 3)) + X2 = np.zeros((batch_size, 224, 224, 3)) + Y = np.zeros((batch_size, 2)) + for i, (image_filename_l, image_filename_r, label) in enumerate(batch): + if datagens is None or len(datagens) == 0: + X1[i] = image_cache[image_filename_l] + X2[i] = image_cache[image_filename_r] + else: + X1[i] = datagens[0].random_transform(image_cache[image_filename_l]) + X2[i] = datagens[1].random_transform(image_cache[image_filename_r]) + Y[i] = [1, 0] if label == 0 else [0, 1] + yield [X1, X2], Y + + +def image_batch_generator(image_names, batch_size): + num_batches = len(image_names) // batch_size + for i in range(num_batches): + batch = image_names[i * batch_size: (i + 1) * batch_size] + yield batch + batch = image_names[(i + 1) * batch_size:] + yield batch + + +def show_img(sid, img_file, img_title): + plt.subplot(sid) + plt.title(img_title) + plt.xticks([]) + plt.yticks([]) + img = np.asarray(Image.fromarray(plt.imread(img_file)).resize((512, 512))) + plt.imshow(img) + + +def get_random_image(img_groups, group_names, gid): + gname = group_names[gid] + photos = img_groups[gname] + pid = np.random.choice(np.arange(len(photos)), size=1)[0] + pname = photos[pid] + return gname + pname + ".jpg" + + +def create_triples(image_dir): + img_groups = {} + for img_file in os.listdir(image_dir): + prefix, suffix = img_file.split(".") + gid, pid = prefix[0:4], prefix[4:] + if gid in img_groups: + img_groups[gid].append(pid) + else: + img_groups[gid] = [pid] + pos_triples, neg_triples = [], [] + # positive pairs are any combination of images in same group + for key in img_groups.keys(): + triples = [(key + x[0] + ".jpg", key + x[1] + ".jpg", 1) + for x in itertools.combinations(img_groups[key], 2)] + pos_triples.extend(triples) + # need equal number of negative examples + group_names = list(img_groups.keys()) + # pos:neg == 1:1 + for i in range(len(pos_triples)): + g1, g2 = np.random.choice(np.arange(len(group_names)), size=2, replace=False) + left = get_random_image(img_groups, group_names, g1) + right = get_random_image(img_groups, group_names, g2) + neg_triples.append((left, right, 0)) + pos_triples.extend(neg_triples) + shuffle(pos_triples) + return pos_triples + + +def load_image(image_name, imagenet=False): + if image_name not in image_cache: + image = plt.imread(os.path.join(IMAGE_DIR, image_name)).astype(np.float32) + image = image.astype(np.uint8) + image = np.asarray(Image.fromarray(image).resize((224, 224))) + if imagenet: + image = imagenet_utils.preprocess_input(image) + else: + image = np.divide(image, 256) + image_cache[image_name] = image + return image_cache[image_name] + + +def generate_image_triples_batch(image_triples, batch_size, shuffle=False): + while True: + # loop once per epoch + if shuffle: + indices = np.random.permutation(np.arange(len(image_triples))) + else: + indices = np.arange(len(image_triples)) + shuffled_triples = [image_triples[ix] for ix in indices] + num_batches = len(shuffled_triples) // batch_size + for bid in range(num_batches): + # loop once per batch + images_left, images_right, labels = [], [], [] + batch = shuffled_triples[bid * batch_size: (bid + 1) * batch_size] + for i in range(batch_size): + lhs, rhs, label = batch[i] + images_left.append(load_image(lhs, imagenet=True)) + images_right.append(load_image(rhs)) + labels.append(label) + Xlhs = np.array(images_left) + Xrhs = np.array(images_right) + Y = np_utils.to_categorical(np.array(labels), num_classes=2) + yield ([Xlhs, Xrhs], Y) + + +def train_test_split(triples, splits): + assert sum(splits) == 1.0 + split_pts = np.cumsum(np.array([0.] + splits)) + indices = np.random.permutation(np.arange(len(triples))) + shuffled_triples = [triples[i] for i in indices] + data_splits = [] + for sid in range(len(splits)): + start = int(split_pts[sid] * len(triples)) + end = int(split_pts[sid + 1] * len(triples)) + data_splits.append(shuffled_triples[start:end]) + return data_splits + + +def batch_to_vectors(batch, vec_size, vec_dict): + X1 = np.zeros((len(batch), vec_size)) + X2 = np.zeros((len(batch), vec_size)) + Y = np.zeros((len(batch), 2)) + for tid in range(len(batch)): + X1[tid] = vec_dict[batch[tid][0]] + X2[tid] = vec_dict[batch[tid][1]] + Y[tid] = [1, 0] if batch[tid][2] == 0 else [0, 1] + return ([X1, X2], Y) + + +def data_generator(triples, vec_size, vec_dict, batch_size=32): + while True: + # shuffle once per batch + indices = np.random.permutation(np.arange(len(triples))) + num_batches = len(triples) // batch_size + for bid in range(num_batches): + batch_indices = indices[bid * batch_size: (bid + 1) * batch_size] + batch = [triples[i] for i in batch_indices] + yield batch_to_vectors(batch, vec_size, vec_dict) + + +if __name__ == '__main__': + show_img(131, os.path.join(IMAGE_DIR, "115200.jpg"), "original") + show_img(132, os.path.join(IMAGE_DIR, "115201.jpg"), "similar") + show_img(133, os.path.join(IMAGE_DIR, "123700.jpg"), "different") + plt.tight_layout() + plt.show() + + triples_data = create_triples(IMAGE_DIR) + + print("# image triples:", len(triples_data)) + print([x for x in triples_data[0:5]]) diff --git a/cv/holiday_similarity/eval_utils.py b/cv/holiday_similarity/eval_utils.py new file mode 100644 index 0000000..b83125e --- /dev/null +++ b/cv/holiday_similarity/eval_utils.py @@ -0,0 +1,24 @@ +import numpy as np +from sklearn.metrics import accuracy_score, confusion_matrix, classification_report + + +def evaluate_model(model, test_gen, test_triples, batch_size): + # model_name = os.path.basename(model_file) + # model = load_model(model_file) + # print("=== Evaluating model: {:s} ===".format(model_name)) + print("=== Evaluating model") + ytrue, ypred = [], [] + num_test_steps = len(test_triples) // batch_size + for i in range(num_test_steps): + # (X1, X2), Y = test_gen.next() + (X1, X2), Y = next(test_gen) + Y_ = model.predict([X1, X2]) + ytrue.extend(np.argmax(Y, axis=1).tolist()) + ypred.extend(np.argmax(Y_, axis=1).tolist()) + accuracy = accuracy_score(ytrue, ypred) + print("\nAccuracy: {:.3f}".format(accuracy)) + print("\nConfusion Matrix") + print(confusion_matrix(ytrue, ypred)) + print("\nClassification Report") + print(classification_report(ytrue, ypred)) + return accuracy diff --git a/cv/holiday_similarity/generate_image_features.py b/cv/holiday_similarity/generate_image_features.py new file mode 100644 index 0000000..6c26995 --- /dev/null +++ b/cv/holiday_similarity/generate_image_features.py @@ -0,0 +1,65 @@ +import os + +import matplotlib.pyplot as plt +import numpy as np +from PIL import Image +from keras.applications import imagenet_utils +from keras.applications import resnet50 +from keras.models import Model + +IMAGE_DIR = '/Users/chunhuizhang/workspaces/00_datasets/images/INRIA Holidays dataset /jpg' +DATA_DIR = './data' + + +def image_batch_generator(image_names, batch_size): + num_batches = len(image_names) // batch_size + for i in range(num_batches): + batch = image_names[i * batch_size: (i + 1) * batch_size] + yield batch + batch = image_names[(i + 1) * batch_size:] + yield batch + + +def vectorize_images(image_dir, image_size, preprocessor, + model, vector_file, batch_size=32): + image_names = os.listdir(image_dir) + num_vecs = 0 + fvec = open(vector_file, "w") + for image_batch in image_batch_generator(image_names, batch_size): + batched_images = [] + for image_name in image_batch: + image = plt.imread(os.path.join(image_dir, image_name)) + # image = imresize(image, (image_size, image_size)) + image = np.asarray(Image.fromarray(image).resize((image_size, image_size))) + batched_images.append(image) + X = preprocessor(np.array(batched_images, dtype="float32")) + vectors = model.predict(X) + for i in range(vectors.shape[0]): + if num_vecs % 100 == 0: + print("{:d}/{:d} vectors generated".format(num_vecs, vectors.shape[0])) + image_vector = ",".join(["{:.5e}".format(v) for v in vectors[i].tolist()]) + # print(image_batch[i], image_vector) + # print(type(image_batch[i]), type(image_vector)) + fvec.write("{:s}\t{:s}\n".format(image_batch[i], image_vector)) + num_vecs += 1 + print("{:d} vectors generated".format(num_vecs)) + fvec.close() + + +def generate_features(model, image_size, vector_file): + model = Model(input=model.input, + output=model.get_layer("avg_pool").output) + preprocessor = imagenet_utils.preprocess_input + + vectorize_images(IMAGE_DIR, image_size, preprocessor, model, vector_file) + + +if __name__ == '__main__': + IMAGE_SIZE = 224 + # vgg16_model = vgg16.VGG16(weights="imagenet", include_top=True) + # VECTOR_FILE = os.path.join(DATA_DIR, "vgg19-vectors.tsv") + # generate_features(vgg16_model, IMAGE_SIZE, VECTOR_FILE) + + VECTOR_FILE = os.path.join(DATA_DIR, "resnet-vectors.tsv") + resnet_model = resnet50.ResNet50(weights="imagenet", include_top=True) + generate_features(resnet_model, IMAGE_SIZE, VECTOR_FILE) diff --git a/cv/holiday_similarity/image_vectors_utils.py b/cv/holiday_similarity/image_vectors_utils.py new file mode 100644 index 0000000..1883ac0 --- /dev/null +++ b/cv/holiday_similarity/image_vectors_utils.py @@ -0,0 +1,125 @@ +from sklearn.externals import joblib +import itertools +import os + +import numpy as np +from sklearn import model_selection +from sklearn.externals import joblib +from sklearn.metrics import accuracy_score, confusion_matrix, classification_report +from sklearn.model_selection import KFold + +DATA_DIR = './data' +IMAGE_DIR = '/Users/chunhuizhang/workspaces/00_datasets/images/INRIA Holidays dataset /jpg' + + +def get_holiday_triples(image_dir): + image_groups = {} + for image_name in os.listdir(image_dir): + base_name = image_name[0:-4] + group_name = base_name[0:4] + if group_name in image_groups: + image_groups[group_name].append(image_name) + else: + image_groups[group_name] = [image_name] + num_sims = 0 + image_triples = [] + group_list = sorted(list(image_groups.keys())) + for i, g in enumerate(group_list): + if num_sims % 100 == 0: + print("Generated {:d} pos + {:d} neg = {:d} total image triples" + .format(num_sims, num_sims, 2 * num_sims)) + images_in_group = image_groups[g] + sim_pairs_it = itertools.combinations(images_in_group, 2) + # for each similar pair, generate a corresponding different pair + for ref_image, sim_image in sim_pairs_it: + image_triples.append((ref_image, sim_image, 1)) + num_sims += 1 + while True: + j = np.random.randint(low=0, high=len(group_list), size=1)[0] + if j != i: + break + dif_image_candidates = image_groups[group_list[j]] + k = np.random.randint(low=0, high=len(dif_image_candidates), size=1)[0] + dif_image = dif_image_candidates[k] + image_triples.append((ref_image, dif_image, 0)) + print("Generated {:d} pos + {:d} neg = {:d} total image triples" + .format(num_sims, num_sims, 2 * num_sims)) + return image_triples + + +def load_vectors(vector_file): + vec_dict = {} + fvec = open(vector_file, "r") + for line in fvec: + image_name, image_vec = line.strip().split("\t") + vec = np.array([float(v) for v in image_vec.split(",")]) + vec_dict[image_name] = vec + fvec.close() + return vec_dict + + +def preprocess_data(vector_file, train_size=0.7): + xdata, ydata = [], [] + vec_dict = load_vectors(vector_file) + for image_triple in image_triples: + X1 = vec_dict[image_triple[0]] + X2 = vec_dict[image_triple[1]] + # xdata.append(np.multiply(X1, X2) / (np.linalg.norm(X1, 2) * np.linalg.norm(X2, 2))) + # xdata.append(np.power(np.subtract(X1, X2), 2)) + xdata.append(np.abs(np.subtract(X1, X2))) + ydata.append(image_triple[2]) + X, y = np.array(xdata), np.array(ydata) + Xtrain, Xtest, ytrain, ytest = model_selection.train_test_split(X, y, train_size=train_size) + return Xtrain, Xtest, ytrain, ytest + + +def cross_validate(X, y, clf, k=10): + best_score, best_clf = 0.0, None + kfold = KFold(k) + for kid, (train, test) in enumerate(kfold.split(X, y)): + Xtrain, Xtest, ytrain, ytest = X[train], X[test], y[train], y[test] + clf.fit(Xtrain, ytrain) + ytest_ = clf.predict(Xtest) + score = accuracy_score(ytest_, ytest) + print("fold {:d}, score: {:.3f}".format(kid, score)) + if score > best_score: + best_score = score + best_clf = clf + return best_clf, best_score + + +def test_report(clf, Xtest, ytest): + ytest_ = clf.predict(Xtest) + print("\nAccuracy Score: {:.3f}".format(accuracy_score(ytest_, ytest))) + print("\nConfusion Matrix") + print(confusion_matrix(ytest_, ytest)) + print("\nClassification Report") + print(classification_report(ytest_, ytest)) + + +# def get_model_file(data_dir, vec_name, clf_name): +# return os.path.join(data_dir, "models", "{:s}-{:s}-dot.pkl" +# .format(vec_name, clf_name)) + +def get_model_file(data_dir, vector_name, merge_mode, borf): + return os.path.join(data_dir, "models", "{:s}-{:s}-{:s}.h5" + .format(vector_name, merge_mode, borf)) + + +def save_model(model, model_file): + joblib.dump(model, model_file) + +# image_triples = get_holiday_triples(IMAGE_DIR) +# +# NUM_VECTORIZERS = 5 +# NUM_CLASSIFIERS = 4 +# scores = np.zeros((NUM_VECTORIZERS, NUM_CLASSIFIERS)) +# +# VECTOR_FILE = os.path.join(DATA_DIR, "vgg19-vectors.tsv") +# Xtrain, Xtest, ytrain, ytest = preprocess_data(VECTOR_FILE) +# print(Xtrain.shape, Xtest.shape, ytrain.shape, ytest.shape) +# clf = XGBClassifier() +# best_clf, best_score = cross_validate(Xtrain, ytrain, clf) +# scores[0, 2] = best_score +# test_report(best_clf, Xtest, ytest) +# save_model(best_clf, get_model_file(DATA_DIR, "vgg19", "xgb")) diff --git a/cv/holiday_similarity/main.py b/cv/holiday_similarity/main.py new file mode 100644 index 0000000..a292aab --- /dev/null +++ b/cv/holiday_similarity/main.py @@ -0,0 +1,28 @@ +from cv.holiday_similarity import vis_utils +from cv.holiday_similarity.data_utils import * +from cv.holiday_similarity.prepare_models import e2e_pretrained_network + +BATCH_SIZE = 64 + +triples_data = create_triples(IMAGE_DIR) +split_point = int(len(triples_data) * 0.7) +triples_train, triples_test = triples_data[0:split_point], triples_data[split_point:] + +NUM_EPOCHS = 10 + +image_cache = {} +train_gen = generate_image_triples_batch(triples_train, BATCH_SIZE, shuffle=True) +val_gen = generate_image_triples_batch(triples_test, BATCH_SIZE, shuffle=False) + +num_train_steps = len(triples_train) // BATCH_SIZE +num_val_steps = len(triples_test) // BATCH_SIZE + +# nn = e2e_network() +nn = e2e_pretrained_network() +history = nn.fit_generator(train_gen, + steps_per_epoch=num_train_steps, + epochs=NUM_EPOCHS, + validation_data=val_gen, + validation_steps=num_val_steps) + +vis_utils.plot_training_curve(history) diff --git a/cv/holiday_similarity/model_utils.py b/cv/holiday_similarity/model_utils.py new file mode 100644 index 0000000..e69de29 --- /dev/null +++ b/cv/holiday_similarity/model_utils.py diff --git a/cv/holiday_similarity/prepare_models.py b/cv/holiday_similarity/prepare_models.py new file mode 100644 index 0000000..22f4958 --- /dev/null +++ b/cv/holiday_similarity/prepare_models.py @@ -0,0 +1,107 @@ +import keras.backend as K +from keras import layers, models +from keras.applications import vgg16, vgg19, inception_v3 +from keras.models import Sequential +from keras.utils import plot_model + + +def create_base_network(input_shape): + seq = Sequential() + # CONV => RELU => POOL + # 224, ==> ceil(224/1) == 224 + seq.add(layers.Conv2D(20, kernel_size=5, padding="same", input_shape=input_shape)) + seq.add(layers.Activation("relu")) + # 45 ==> ceil((224-(2-1))/2) ==> 112 + seq.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) + # CONV => RELU => POOL + seq.add(layers.Conv2D(50, kernel_size=5, padding="same")) + seq.add(layers.Activation("relu")) + # (112-(2-1))/2 ==> 56 + seq.add(layers.MaxPooling2D(pool_size=(2, 2), strides=(2, 2))) + # Flatten => RELU + seq.add(layers.Flatten()) + # (None, 500) + seq.add(layers.Dense(500)) + + return seq + + +def pretrained_network(): + pass + + +def cosine_distance(vecs, normalize=False): + x, y = vecs + if normalize: + x = K.l2_normalize(x, axis=0) + y = K.l2_normalize(y, axis=0) + return K.prod(K.stack([x, y], axis=1), axis=1) + + +def cosine_distance_output_shape(shapes): + return shapes[0] + + +def e2e_network(nn=create_base_network): + input_shape = (224, 224, 3) + base_network = nn(input_shape) + + image_left = layers.Input(shape=input_shape) + image_right = layers.Input(shape=input_shape) + + left_vec = base_network(image_left) + right_vec = base_network(image_right) + + dist = layers.Lambda(cosine_distance, output_shape=cosine_distance_output_shape)([left_vec, right_vec]) + + fc1 = layers.Dense(128, kernel_initializer="glorot_uniform")(dist) + fc1 = layers.Dropout(0.2)(fc1) + fc1 = layers.Activation("relu")(fc1) + + pred = layers.Dense(2, kernel_initializer="glorot_uniform")(fc1) + pred = layers.Activation("softmax")(pred) + + model = models.Model(inputs=[image_left, image_right], outputs=pred) + + model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) + return model + + +def e2e_pretrained_network(): + vgg16_model = vgg16.VGG16(weights="imagenet", include_top=True) + + # vgg19_model = vgg19.VGG19(weights="imagenet", include_top=True) + inception_model = inception_v3.InceptionV3(weights="imagenet", include_top=True) + + input_shape = (224, 224, 3) + image_left = layers.Input(shape=input_shape) + image_right = layers.Input(shape=input_shape) + + base_model = models.Model(input=vgg16_model.input, output=vgg16_model.get_layer('fc2').output) + left_vec = base_model(image_left) + right_vec = base_model(image_right) + dist = layers.Lambda(cosine_distance, output_shape=cosine_distance_output_shape)([left_vec, right_vec]) + fc1 = layers.Dense(128, kernel_initializer="glorot_uniform")(dist) + fc1 = layers.Dropout(0.2)(fc1) + fc1 = layers.Activation("relu")(fc1) + + pred = layers.Dense(2, kernel_initializer="glorot_uniform")(fc1) + pred = layers.Activation("softmax")(pred) + + model = models.Model(inputs=[image_left, image_right], outputs=pred) + + model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"]) + return model + + +def concat_vectors(vector_size, ): + input_1 = layers.Input(shape=(vector_size,)) + input_2 = layers.Input(shape=(vector_size,)) + layers.merge.Concatenate(input_1) + + +if __name__ == '__main__': + vgg19_model = vgg19.VGG19(weights="imagenet", include_top=True) + inception_model = inception_v3.InceptionV3(weights="imagenet", include_top=True) + model = e2e_network() + plot_model(inception_model, to_file='inception_model.png', show_layer_names=True, show_shapes=True) diff --git a/cv/holiday_similarity/pretrained-vec-nn-classifier.py b/cv/holiday_similarity/pretrained-vec-nn-classifier.py new file mode 100644 index 0000000..e8fff09 --- /dev/null +++ b/cv/holiday_similarity/pretrained-vec-nn-classifier.py @@ -0,0 +1,171 @@ +from keras import backend as K +from keras import layers, models, callbacks, utils + +from cv.holiday_similarity.data_utils import * +from cv.holiday_similarity.image_vectors_utils import * + +DATA_DIR = "./data" +IMAGE_DIR = '/Users/chunhuizhang/workspaces/00_datasets/images/INRIA Holidays dataset /jpg' + +BATCH_SIZE = 32 +NUM_EPOCHS = 10 + + +def e2e_model_concat(vec_size): + input_1 = layers.Input(shape=(vec_size,)) + input_2 = layers.Input(shape=(vec_size,)) + + merged = layers.merge.Concatenate(axis=-1)([input_1, input_2]) + + fc1 = layers.Dense(512, kernel_initializer="glorot_uniform")(merged) + fc1 = layers.Dropout(0.2)(fc1) + fc1 = layers.Activation("relu")(fc1) + + fc2 = layers.Dense(128, kernel_initializer="glorot_uniform")(fc1) + fc2 = layers.Dropout(0.2)(fc2) + fc2 = layers.Activation("relu")(fc2) + + pred = layers.Dense(2, kernel_initializer="glorot_uniform")(fc2) + pred = layers.Activation("softmax")(pred) + model = models.Model(inputs=[input_1, input_2], outputs=pred) + model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) + utils.plot_model(model, to_file='./data/images/concat.png', show_shapes=True, show_layer_names=True) + return model + + +def e2e_model_dot(vec_size): + def cosine_distance(vecs, normalize=False): + x, y = vecs + if normalize: + x = K.l2_normalize(x, axis=0) + y = K.l2_normalize(x, axis=0) + return K.prod(K.stack([x, y], axis=1), axis=1) + + def cosine_distance_output_shape(shapes): + return shapes[0] + + input_1 = layers.Input(shape=(vec_size,)) + input_2 = layers.Input(shape=(vec_size,)) + + merged = layers.Lambda(cosine_distance, + output_shape=cosine_distance_output_shape)([input_1, input_2]) + + fc1 = layers.Dense(512, kernel_initializer="glorot_uniform")(merged) + fc1 = layers.Dropout(0.2)(fc1) + fc1 = layers.Activation("relu")(fc1) + + fc2 = layers.Dense(128, kernel_initializer="glorot_uniform")(fc1) + fc2 = layers.Dropout(0.2)(fc2) + fc2 = layers.Activation("relu")(fc2) + + pred = layers.Dense(2, kernel_initializer="glorot_uniform")(fc2) + pred = layers.Activation("softmax")(pred) + model = models.Model(inputs=[input_1, input_2], outputs=pred) + model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) + utils.plot_model(model, to_file='./data/images/dot.png', show_shapes=True, show_layer_names=True) + return model + + +def e2e_model_l1(vec_size): + def absdiff(vecs): + x, y = vecs + return K.abs(K.sum(K.stack([x, -y], axis=1), axis=1)) + + def absdiff_output_shape(shapes): + return shapes[0] + + input_1 = layers.Input(shape=(vec_size,)) + input_2 = layers.Input(shape=(vec_size,)) + merged = layers.Lambda(absdiff, output_shape=absdiff_output_shape)([input_1, input_2]) + + fc1 = layers.Dense(512, kernel_initializer="glorot_uniform")(merged) + fc1 = layers.Dropout(0.2)(fc1) + fc1 = layers.Activation("relu")(fc1) + + fc2 = layers.Dense(128, kernel_initializer="glorot_uniform")(fc1) + fc2 = layers.Dropout(0.2)(fc2) + fc2 = layers.Activation("relu")(fc2) + + pred = layers.Dense(2, kernel_initializer="glorot_uniform")(fc2) + pred = layers.Activation("softmax")(pred) + model = models.Model(inputs=[input_1, input_2], outputs=pred) + model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) + utils.plot_model(model, to_file='./data/images/l1.png', show_shapes=True, show_layer_names=True) + return model + + +def e2e_model_l2(vec_size): + def euclidean_distance(vecs): + x, y = vecs + return K.sqrt(K.sum(K.stack([K.square(x), -K.square(y)], axis=1), axis=1)) + + def euclidean_distance_output_shape(shapes): + xshape, yshape = shapes + return xshape + + input_1 = layers.Input(shape=(vec_size,)) + input_2 = layers.Input(shape=(vec_size,)) + merged = layers.Lambda(euclidean_distance, + output_shape=euclidean_distance_output_shape)([input_1, input_2]) + + fc1 = layers.Dense(512, kernel_initializer="glorot_uniform")(merged) + fc1 = layers.Dropout(0.2)(fc1) + fc1 = layers.Activation("relu")(fc1) + + fc2 = layers.Dense(128, kernel_initializer="glorot_uniform")(fc1) + fc2 = layers.Dropout(0.2)(fc2) + fc2 = layers.Activation("relu")(fc2) + + pred = layers.Dense(2, kernel_initializer="glorot_uniform")(fc2) + pred = layers.Activation("softmax")(pred) + + model = models.Model(inputs=[input_1, input_2], outputs=pred) + model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) + utils.plot_model(model, to_file='./data/images/l2.png', show_shapes=True, show_layer_names=True) + return model + + +if __name__ == '__main__': + image_triples = get_holiday_triples(IMAGE_DIR) + train_triples, val_triples, test_triples = train_test_split(image_triples, + splits=[0.7, 0.1, 0.2]) + print(len(train_triples), len(val_triples), len(test_triples)) + + VECTOR_SIZE = 2048 + VECTOR_FILE = os.path.join(DATA_DIR, "resnet-vectors.tsv") + + vec_dict = load_vectors(VECTOR_FILE) + + train_gen = data_generator(train_triples, VECTOR_SIZE, vec_dict, BATCH_SIZE) + val_gen = data_generator(val_triples, VECTOR_SIZE, vec_dict, BATCH_SIZE) + + # model_name = get_model_file(DATA_DIR, 'resnet', 'dot', 'best') + # model_name = get_model_file(DATA_DIR, 'resnet', 'concat', 'best') + model_name = get_model_file(DATA_DIR, 'resnet', 'l2', 'best') + + checkpoint = callbacks.ModelCheckpoint(model_name, save_best_only=True) + train_steps_per_epoch = len(train_triples) // BATCH_SIZE + val_steps_per_epoch = len(val_triples) // BATCH_SIZE + + # model = e2e_model_concat(VECTOR_SIZE) + # model = e2e_model_dot(VECTOR_SIZE) + # model = e2e_model_l1(VECTOR_SIZE) + e2e_model_l2(VECTOR_SIZE) + + # history = model.fit_generator(train_gen, steps_per_epoch=train_steps_per_epoch, + # epochs=NUM_EPOCHS, + # validation_data=val_gen, validation_steps=val_steps_per_epoch, + # callbacks=[checkpoint]) + # + # plot_training_curve(history) + # + # final_model_name = get_model_file(DATA_DIR, "resnet", "l2", 'final') + # model.save(final_model_name) + # + # test_gen = data_generator(test_triples, VECTOR_SIZE, vec_dict, BATCH_SIZE) + # final_accuracy = evaluate_model(models.load_model(final_model_name), test_gen, test_triples, BATCH_SIZE) + # + # test_gen = data_generator(test_triples, VECTOR_SIZE, vec_dict, BATCH_SIZE) + # best_accuracy = evaluate_model(models.load_model(model_name), test_gen, test_triples, BATCH_SIZE) + # + # scores = best_accuracy if best_accuracy > final_accuracy else final_accuracy diff --git a/cv/holiday_similarity/siamese-finetune.py b/cv/holiday_similarity/siamese-finetune.py new file mode 100644 index 0000000..9cd3dbb --- /dev/null +++ b/cv/holiday_similarity/siamese-finetune.py @@ -0,0 +1,98 @@ +from keras.applications import resnet50 +from PIL import Image +from keras import models, callbacks +from keras.applications import imagenet_utils +from keras.applications import resnet50 +from keras.preprocessing.image import ImageDataGenerator +from sklearn import model_selection + +from cv.holiday_similarity.data_utils import * +from cv.holiday_similarity.vis_utils import * + + +def e2e_network(): + inception_1 = resnet50.ResNet50(weights="imagenet", include_top=True) + inception_2 = resnet50.ResNet50(weights="imagenet", include_top=True) + + for layer in inception_1.layers: + layer.trainable = False + layer.name = layer.name + "_1" + for layer in inception_2.layers: + layer.trainable = False + layer.name = layer.name + "_2" + + vector_1 = inception_1.get_layer("avg_pool_1").output + vector_2 = inception_2.get_layer("avg_pool_2").output + + sim_head = models.load_model(os.path.join(DATA_DIR, "models", "resnet-dot-best.h5")) + for layer in sim_head.layers: + print(layer.name, layer.input_shape, layer.output_shape) + + prediction = sim_head([vector_1, vector_2]) + + model = models.Model(inputs=[inception_1.input, inception_2.input], outputs=prediction) + model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"]) + + return model + + +def load_image_cache(image_cache, image_filename): + image = plt.imread(os.path.join(IMAGE_DIR, image_filename)) + image = np.asarray(Image.fromarray(image).resize((224, 224))) + image = image.astype("float32") + image = imagenet_utils.preprocess_input(image) + image_cache[image_filename] = image + + +def generate_image_cache(triples_data): + image_cache = {} + num_pairs = len(triples_data) + for i, (image_filename_l, image_filename_r, _) in enumerate(triples_data): + if i % 1000 == 0: + print("images from {:d}/{:d} pairs loaded to cache".format(i, num_pairs)) + if image_filename_l not in image_cache: + load_image_cache(image_cache, image_filename_l) + if image_filename_r not in image_cache: + load_image_cache(image_cache, image_filename_r) + return image_cache + + +if __name__ == '__main__': + DATA_DIR = './data' + IMAGE_DIR = '/Users/chunhuizhang/workspaces/00_datasets/images/INRIA Holidays dataset /jpg' + + BATCH_SIZE = 32 + NUM_EPOCHS = 5 + BEST_MODEL_FILE = os.path.join(DATA_DIR, "models", "resnet-ft-best.h5") + FINAL_MODEL_FILE = os.path.join(DATA_DIR, "models", "resnet-ft-final.h5") + + triples_data = create_triples(IMAGE_DIR) + triples_data_trainval, triples_data_test = model_selection.train_test_split(triples_data, train_size=0.8) + triples_data_train, triples_data_val = model_selection.train_test_split(triples_data_trainval, train_size=0.9) + print(len(triples_data_train), len(triples_data_val), len(triples_data_test)) + + datagen_args = dict(rotation_range=10, + width_shift_range=0.2, + height_shift_range=0.2, + zoom_range=0.2) + datagens = [ImageDataGenerator(**datagen_args), + ImageDataGenerator(**datagen_args)] + + image_cache = generate_image_cache(triples_data) + + train_pair_gen = pair_generator(triples_data_train, image_cache, datagens, BATCH_SIZE) + val_pair_gen = pair_generator(triples_data_val, image_cache, None, BATCH_SIZE) + + num_train_steps = len(triples_data_train) // BATCH_SIZE + num_val_steps = len(triples_data_val) // BATCH_SIZE + + model = e2e_network() + checkpoint = callbacks.ModelCheckpoint(filepath=BEST_MODEL_FILE, save_best_only=True) + history = model.fit_generator(train_pair_gen, + steps_per_epoch=num_train_steps, + epochs=NUM_EPOCHS, + validation_data=val_pair_gen, + validation_steps=num_val_steps, + callbacks=[checkpoint]) + + plot_training_curve(history) diff --git a/cv/holiday_similarity/vis_utils.py b/cv/holiday_similarity/vis_utils.py new file mode 100644 index 0000000..0910364 --- /dev/null +++ b/cv/holiday_similarity/vis_utils.py @@ -0,0 +1,19 @@ +import matplotlib.pyplot as plt + + +def plot_training_curve(history): + # print(history.history.keys()) + plt.subplot(211) + plt.title("Loss") + plt.plot(history.history["loss"], color="r", label="train") + plt.plot(history.history["val_loss"], color="b", label="validation") + plt.legend(loc="best") + + plt.subplot(212) + plt.title("Accuracy") + plt.plot(history.history["accuracy"], color="r", label="train") + plt.plot(history.history["val_accuracy"], color="b", label="validation") + plt.legend(loc="best") + + plt.tight_layout() + plt.show() |
