初めに

ある画像から近い色同士で色をまとめてほしい時があります。この際に使用できる画像処理やクラスタリングの手法を試してみました。

使用例として、ゲーム開発における地面のマテリアル(どのような地面の種別なのか)判定として使用できそうです

上記は以下のUnity Assetを使用しています。わかりやすくするために地面より高いものは一部非表示にしています。

リポジトリは以下で公開しています

github.com

開発環境

Python 3.11
Windows 11

アプローチの方針

今回は以下の手法を用いて画像の単色化を行っていきます。詳しい記事はリンクを貼っているので、そちらを参照してください

ライブラリのインストール

pip install opencv-python-headless==4.9.0.80 numpy==1.26.4 matplotlib==3.9.0 scikit-learn==1.4.2

もしくはリポジトリを使用する場合は以下です

pip install -r requirements.txt

k-means法

今回はK-meansクラスタリングの回数が1回の場合は、求めていたものと違う結果になったため複数回行いその平均値を取るようにしました。また引数から単色する際の色の数を指定できます

以下で実行することができます

python K-means-clustering.py --image_path test.png --num_colors 5 --attempts 10

デモ

実行した際の結果は以下になります * num_colors(単色にする色の数) = 5 * attempts(試行回数) = 10

左が元の画像で、右側が処理後の画像です

コード

以下が実際のコードになります

import cv2
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import KMeans

def apply_kmeans(image, k, attempts):
    # 画像を2次元配列に変換
    data = image.reshape((-1, 3))
    data = np.float32(data)

    # K-meansクラスタリングを複数回実行して最適な結果を選択
    best_labels = None
    best_centers = None
    best_inertia = float('inf')
    for _ in range(attempts):
        kmeans = KMeans(n_clusters=k, init='k-means++', n_init=1, max_iter=300)
        kmeans.fit(data)
        if kmeans.inertia_ < best_inertia:
            best_inertia = kmeans.inertia_
            best_labels = kmeans.labels_
            best_centers = kmeans.cluster_centers_

    return best_labels, best_centers

def main(image_path, k, attempts):
    # 画像の読み込み
    image = cv2.imread(image_path)

    # 画像をBGRからRGBに変換（matplotlibで表示するため）
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # クラスタリングの適用
    labels, centers = apply_kmeans(image_rgb, k, attempts)

    # クラスタリングの結果を元の形に戻す
    centers = np.uint8(centers)
    segmented_image = centers[labels.flatten()]
    segmented_image = segmented_image.reshape(image_rgb.shape)

    # 元の画像の色を使用して単色化
    unique_labels = np.unique(labels)
    for label in unique_labels:
        mask = (labels == label).reshape(image_rgb.shape[:2])
        mean_color = np.mean(image_rgb[mask], axis=0)
        segmented_image[mask] = mean_color

    # 画像を表示
    plt.figure(figsize=(8, 4))  # ウィンドウのサイズを変更
    plt.subplot(1, 2, 1)
    plt.title('Original Image')
    plt.imshow(image_rgb)
    plt.subplot(1, 2, 2)
    plt.title(f'Segmented Image with {k} Colors')
    plt.imshow(segmented_image)
    plt.tight_layout()  # レイアウトを自動調整
    plt.show()

    # 結果の保存
    output_path = 'segmented_image.png'
    cv2.imwrite(output_path, cv2.cvtColor(segmented_image, cv2.COLOR_RGB2BGR))

    print(f'Segmented image saved to: {output_path}')

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="K-means clustering color quantization with parameter tuning")
    parser.add_argument('--image_path', type=str, default='test.png', help='Path to the input image')
    parser.add_argument('--num_colors', type=int, default=5, help='Number of colors for quantization')
    parser.add_argument('--attempts', type=int, default=10, help='Number of attempts for K-means clustering')

    args = parser.parse_args()

    main(args.image_path, args.num_colors, args.attempts)

DBSCAN法

処理をする中で画像に対して、データの2次元配列変換・全データポイント間の距離計算・一時的なデータ構造の作成をすることで、それなりのメモリを使用します。そのため、今回は画像のスケールを指定できるようにしています。

以下で実行することができます

python DBSCAN-clustering.py --image_path test.png --eps 10.0 --min_samples 10 --scale_factor 0.1

デモ

実行した際の結果は以下になります * scale_factor(スケール値) = 0.1

左が元の画像で、右側が処理後の画像です

コード

以下が実際のコードになります

import cv2
import matplotlib.pyplot as plt
import numpy as np
from sklearn.cluster import DBSCAN


def apply_dbscan(image, eps, min_samples):
    # 画像を2次元配列に変換
    data = image.reshape((-1, 3))
    data = np.float32(data)

    # DBSCANクラスタリングの適用
    db = DBSCAN(eps=eps, min_samples=min_samples).fit(data)
    labels = db.labels_

    # ノイズとして識別されたピクセルに対処
    unique_labels = np.unique(labels)
    centers = []
    for label in unique_labels:
        if label == -1:  # ノイズ
            centers.append([0, 0, 0])  # 黒に設定
        else:
            centers.append(np.mean(data[labels == label], axis=0))

    centers = np.uint8(centers)
    segmented_image = centers[labels]
    segmented_image = segmented_image.reshape(image.shape)

    return segmented_image

def main(image_path, eps, min_samples, scale_factor):
    # 画像の読み込み
    image = cv2.imread(image_path)

    # 画像のサイズを縮小
    height, width = image.shape[:2]
    new_height, new_width = int(height * scale_factor), int(width * scale_factor)
    image_resized = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_LINEAR)

    # 画像をBGRからRGBに変換（matplotlibで表示するため）
    image_rgb = cv2.cvtColor(image_resized, cv2.COLOR_BGR2RGB)

    # DBSCANクラスタリングの適用
    segmented_image = apply_dbscan(image_rgb, eps, min_samples)

    # 画像を表示
    plt.figure(figsize=(8, 4))  # ウィンドウのサイズを変更
    plt.subplot(1, 2, 1)
    plt.title('Original Image')
    plt.imshow(cv2.cvtColor(cv2.resize(image, (new_width, new_height)), cv2.COLOR_BGR2RGB))
    plt.subplot(1, 2, 2)
    plt.title('Segmented Image using DBSCAN')
    plt.imshow(segmented_image)
    plt.tight_layout()  # レイアウトを自動調整
    plt.show()

    # 結果の保存
    output_path = 'segmented_image_dbscan.png'
    cv2.imwrite(output_path, cv2.cvtColor(segmented_image, cv2.COLOR_RGB2BGR))

    print(f'Segmented image saved to: {output_path}')

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="DBSCAN clustering for image color quantization")
    parser.add_argument('--image_path', type=str, default='test.png', help='Path to the input image')
    parser.add_argument('--eps', type=float, default=10.0, help='The maximum distance between two samples for one to be considered as in the neighborhood of the other.')
    parser.add_argument('--min_samples', type=int, default=10, help='The number of samples (or total weight) in a neighborhood for a point to be considered as a core point.')
    parser.add_argument('--scale_factor', type=float, default=0.1, help='Factor to scale the image down by.')

    args = parser.parse_args()

    main(args.image_path, args.eps, args.min_samples, args.scale_factor)

GMM法

あまり使用されない手法？らしいですが、念のため試してみます

デモ

実行した際の結果は以下になります * n_components = 5

左が元の画像で、右側が処理後の画像です

コード

以下が実際のコードになります

import cv2
import matplotlib.pyplot as plt
import numpy as np
from sklearn.mixture import GaussianMixture


def apply_gmm(image, n_components):
    # 画像を2次元配列に変換
    data = image.reshape((-1, 3))
    data = np.float32(data)

    # ガウシアン混合モデルの適用
    gmm = GaussianMixture(n_components=n_components).fit(data)
    labels = gmm.predict(data)
    centers = gmm.means_

    centers = np.uint8(centers)
    segmented_image = centers[labels]
    segmented_image = segmented_image.reshape(image.shape)

    return segmented_image

def main(image_path, n_components):
    # 画像の読み込み
    image = cv2.imread(image_path)

    # 画像をBGRからRGBに変換（matplotlibで表示するため）
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    # GMMクラスタリングの適用
    segmented_image = apply_gmm(image_rgb, n_components)

    # 画像を表示
    plt.figure(figsize=(8, 4))  # ウィンドウのサイズを変更
    plt.subplot(1, 2, 1)
    plt.title('Original Image')
    plt.imshow(image_rgb)
    plt.subplot(1, 2, 2)
    plt.title('Segmented Image using GMM')
    plt.imshow(segmented_image)
    plt.tight_layout()  # レイアウトを自動調整
    plt.show()

    # 結果の保存
    output_path = 'segmented_image_gmm.png'
    cv2.imwrite(output_path, cv2.cvtColor(segmented_image, cv2.COLOR_RGB2BGR))

    print(f'Segmented image saved to: {output_path}')

if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="GMM clustering for image color quantization")
    parser.add_argument('--image_path', type=str, default='test.png', help='Path to the input image')
    parser.add_argument('--n_components', type=int, default=5, help='Number of components for GMM.')

    args = parser.parse_args()

    main(args.image_path, args.n_components)