반응형
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 | # for loading/processing the images # tensorflow 2.0 from tensorflow.keras.utils import load_img from tensorflow.keras.utils import img_to_array from keras.applications.vgg16 import preprocess_input # models from keras.applications.vgg16 import VGG16 from keras.models import Model # clustering and dimension reduction from sklearn.cluster import KMeans from sklearn.decomposition import PCA # for everything else import os import numpy as np import matplotlib.pyplot as plt from random import randint import pandas as pd import pickle path = r"FILE PATH" # change the working directory to the path where the images are located os.chdir(path) # this list holds all the image filename images = [] # creates a ScandirIterator aliased as files with os.scandir(path) as files: # loops through each file in the directory for file in files: if file.name.endswith('.png') or file.name.endswith('.jpeg') or file.name.endswith('.jpg'): # adds only the image files to the images list images.append(file.name) print("# of images:", len(images)) model = VGG16() model = Model(inputs = model.inputs, outputs = model.layers[-2].output) print(model) def extract_features(file, model): # load the image as a 224x224 array img = load_img(file, target_size=(224,224)) img = np.array(img) reshaped_img = img.reshape(1,224,224,3) # prepare image for model imgx = preprocess_input(reshaped_img) # get the feature vector features = model.predict(imgx, use_multiprocessing=True) return features data = {} p = r"PATH FOR SAVE FEATURE" # lop through each image in the dataset for image in images: # try to extract the features and update the dictionary try: feat = extract_features(image, model) data[image] = feat # if something fails, save the extracted features as a pickle file (optional) except: with open(p,'wb') as file: pickle.dump(data,file) # Clustering def clustering(n, data): # get a list of the filenames filenames = np.array(list(data.keys())) # get a list of just the features feat = np.array(list(data.values())) # reshape so that there are 210 samples of 4096 vectors feat = feat.reshape(-1,4096) # reduce the amount of dimensions in the feature vector pca = PCA(n_components=100, random_state=22) pca.fit(feat) x = pca.transform(feat) # cluster feature vectors kmeans = KMeans(n_clusters=n, random_state=22) kmeans.fit(x) groups = {} for file, cluster in zip(filenames,kmeans.labels_): if cluster not in groups.keys(): groups[cluster] = [] groups[cluster].append(file) else: groups[cluster].append(file) print("클러스터 개수:", len(groups.keys())) return groups # function that lets you view a cluster (based on identifier) def view_cluster(groups, cluster): plt.figure(figsize = (40,40)); # gets the list of filenames for a cluster files = groups[cluster] # only allow up to 30 images to be shown at a time if len(files) > 50: print(f"Clipping cluster size from {len(files)} to 50") files = files[:48] # plot each image in the cluster for index, file in enumerate(files): plt.subplot(8,8,index+1); img = load_img(file) img = np.array(img) plt.imshow(img) plt.axis('off') plt.show() groups = clustering(15, data) for i in range(len(groups.keys())): print("Cluster", i+1, "Images") view_cluster(groups, i) | cs |
반응형
'DeepLearning' 카테고리의 다른 글
[ML] Softmax 에 Temperature Scaling을 적용하여 Overconfidence 줄이기 (0) | 2022.07.15 |
---|---|
[PyTorch] torch.load 시, 학습 환경과 달라 모델 로딩이 되지 않는 에러 (map_location으로 해결) (0) | 2022.06.24 |
[캐글 Kaggle] 캐글 에러 - Unable to find the required key value error 해결 방법 (0) | 2021.08.17 |
[DeepLearning] DeepFake(deepfacelab) 구글 Colab에서 개발하기 (2) | 2021.08.01 |
Computer Vision Summary | 컴퓨터 비전 총정리 -4 (0) | 2021.07.12 |
댓글