Source code for evclust.mecmdd_rwl

# -*- coding: utf-8 -*-
# This file as well as the whole evclust package are licenced under the MIT licence (see the LICENCE.txt)
# Armel SOUBEIGA (armelsoubeiga.github.io), France, 2024

"""
This module contains the main function for Multi-View Evidential C-Medoid clustering (MECMdd) with adaptive weightings with Relevance Weight for each dissimilarity matrix
estimated Locally (MECMdd-RWL), i.e for each cluster, using weight Sum (MECMdd-RWL-S) constraint or weight Product (MECMdd-RWL-P) constraint.

    Armel Soubeiga, Violaine Antoine and Sylvain Moreno. "Multi-View Relational Evidential C-Medoid Clustering with Adaptive Weighted" 
    2024 IEEE 11th International Conference on Data Science and Advanced Analytics (DSAA)
"""

#---------------------- Packges------------------------------------------------
from evclust.utils import makeF, extractMass
import numpy as np




#---------------------- MECMdd-RWL------------------------------------------------
[docs] def mecmdd_rwl(Xlist, c, type='full', alpha=1, beta=1.5, delta=9, epsi=1e-4, disp=True, gamma=0.5, eta=1, weight='sum', s=None): """ Multi-View Evidential C-Medoid clustering (MECMdd) with adaptive weightings with Relevance Weight for each dissimilarity matrix estimated Locally (MECMdd-RWL), i.e for each cluster, using weight Sum (MECMdd-RWL-S) constraint or weight Product (MECMdd-RWL-P) constraint. Parameters: ----------- Xlist (list of np.array): A list of square symmetric dissimilarity matrices. c (int): Number of clusters to create. type (str): Type of structure ('full' by default). alpha (float): Weighting parameter for cardinalities. beta (float): uncertainty exponent for membership degrees. delta (float or None): Precision parameter (used to calculate delta2). If None, distribution of matrix are considere. epsi (float): Convergence threshold. disp (bool): Whether to display the objective function value during iteration. gamma (float): Coefficient to adjust the imprecise contribution. eta (float): Coefficient for outlier identification. weight (str): Type of constraint function using to learn weight ('sum' or 'prod'). s (float or None): Additional weight parameter. Returns: -------- The credal partition (an object of class "credpart"). Example: -------- .. highlight:: python .. code-block:: python from evclust.mecmdd_rwl import mecmdd_rwl matrix1 = np.random.rand(5, 5) matrix1 = (matrix1 + matrix1.T) / 2 matrix2 = np.random.rand(5, 5) matrix2 = (matrix2 + matrix2.T) / 2 Xlist = [matrix1, matrix2] clus = mecmdd_rwl(Xlist, c=2, type='simple', alpha=1, beta=1.5, delta=9, epsi=1e-4, disp=True, gamma=0.5, eta=1, weight='prod', s=None) clus['param']['lambda'] # View weight References: ----------- Armel Soubeiga, Violaine Antoine and Sylvain Moreno. "Multi-View Relational Evidential C-Medoid Clustering with Adaptive Weighted" 2024 IEEE 11th International Conference on Data Science and Advanced Analytics (DSAA). .. seealso:: :func:`~extractMass`, :func:`~makeF`, :func:`~lambdaInit_local` .. note:: Keywords : Evidential clustering, credal partition, relational multi-view, belief function MECMdd-RWL is able to address the uncertainty and imprecision of multi-view relational data clustering, and provides a credible partition, which extends fuzzy, possibilistic and rough partitions. It can automatically lern the importance of each views by estimated a weight locally for each cluster in a collaborative learning framework. """ if not Xlist or not isinstance(Xlist, list) or len(Xlist) <= 1: raise ValueError("The data set (Xlist) must be given, must not be empty, must be a list, and must contain more than one matrix") tailles = [x.shape for x in Xlist] if not all(t == tailles[0] for t in tailles): raise ValueError("All matrices in Xlist must have the same size") # Determine s based on weight type if weight == 'prod': s = 1 elif weight == 'sum': if s is None: s = 2 # Delta calculation pmatrix = len(Xlist) if delta is None: delta2 = sum(np.quantile(mat[np.triu_indices_from(mat, k=1)], 0.95) for mat in Xlist) elif isinstance(delta, (int, float)): delta2 = sum([delta ** 2] * pmatrix) elif len(delta) == pmatrix: delta2 = sum([d ** 2 for d in delta]) else: raise ValueError("Delta must be None, a scalar, or a list of length equal to the number of matrices.") # Generate focal set matrix F and initialize parameters F = makeF(c=c, type=type) nF = F.shape[0] card = np.sum(F[1:], axis=1) n = Xlist[0].shape[0] medoids = np.random.choice(n, c, replace=False) prototype = np.zeros(nF-1, dtype=int) lambda_ = lambdaInit_local(weight, pmatrix, nF-1) pasfini = True Jold = float('inf') iter = 0 while pasfini: iter += 1 # Update singleton and imprecise (medoid) for j in range(nF-1): fj = F[j+1] medoidsj = medoids[fj != 0] l_i = np.zeros(n) if np.sum(fj) == 1: prototype[j] = medoidsj[0] else: for i in range(n): var_ij = (1/card[j]) * np.sum([(np.sum([Xlist[l][i, medoidsj[k]] for l in range(pmatrix)]) - (1/card[j]) * np.sum([np.sum([Xlist[l][i, medoidsj[kk]] for l in range(pmatrix)]) for kk in range(len(medoidsj))]))**2 for k in range(len(medoidsj))]) l_i[i] = var_ij + eta * (1/card[j]) * np.sum([np.sum([Xlist[l][i, medoidsj[kk]] for l in range(pmatrix)]) for kk in range(len(medoidsj))]) prototype[j] = np.argmin(l_i) # Calculation of distances to centers D = [np.zeros((n, nF-1)) for _ in range(pmatrix)] for l in range(pmatrix): for j in range(nF-1): fj = F[j+1] medoidsj = medoids[fj != 0] if np.sum(fj) == 1: D[l][:, j] = Xlist[l][:, prototype[j]] elif np.sum(fj) > 1: D[l][:, j] = (Xlist[l][:, prototype[j]] + (gamma / card[j]) * np.sum(Xlist[l][:, medoidsj], axis=1)) / (1 + gamma) # Calculation of masses m = np.zeros((n, nF-1)) for j in range(nF-1): fj = F[j+1] medoidsj = medoids[fj != 0] for i in range(n): num = (card[j]**alpha * np.sum([D[l][i, j] * (lambda_[j, l])**s for l in range(pmatrix)]))**(-1/(beta-1)) den = (np.sum([(card[k]**alpha * np.sum([D[l][i, k] * (lambda_[k, l])**s for l in range(pmatrix)]))**(-1/(beta-1)) for k in range(nF-1)]) + delta2**(-1/(beta-1))) m[i, j] = num / den if np.isnan(m[i, j]) or np.isinf(m[i, j]): m[i, j] = 1 # Weight if weight == 'sum': for j in range(nF-1): for l in range(pmatrix): num = np.sum([s * (card[j]**alpha) * m[i, j]**beta * D[l][i, j] for i in range(n)])**(-1/(s-1)) den = np.sum([np.sum([s * (card[j]**alpha) * m[i, j]**beta * D[h][i, j] for i in range(n)])**(-1/(s-1)) for h in range(pmatrix)]) lambda_[j, l] = num / den if weight == 'prod': for j in range(nF-1): for l in range(pmatrix): num = np.prod([np.sum([(card[j]**alpha) * m[i, j]**beta * D[h][i, j] for i in range(n)])**(1/pmatrix) for h in range(pmatrix)], axis=0) den = np.sum([(card[j]**alpha) * m[i, j]**beta * D[l][i, j] for i in range(n)]) lambda_[j, l] = num / den # Medoids V = np.zeros((n, nF-1)) for k in range(nF-1): fk = F[k+1] if np.sum(fk) == 1: mk_beta = m[:, k]**beta for i in range(n): V[i, k] = np.sum([np.sum([Xlist[l][i, :] * (lambda_[k, l])**s for l in range(pmatrix)]) * mk_beta[i]]) V_filtered = V[:, np.sum(V, axis=0) > 0] medoids = np.array([np.argmin(V_filtered[:, i]) for i in range(V_filtered.shape[1])]) mvide = 1 - np.sum(m, axis=1) J = np.sum([np.sum([card[j]**alpha * m[i, j]**beta * np.sum([Xlist[l][i, prototype[j]] * (lambda_[j, l])**s for l in range(pmatrix)]) for j in range(nF-1)]) for i in range(n)]) + delta2 * np.sum(mvide**beta) if disp: print(iter, J) pasfini = abs(J - Jold) > epsi Jold = J g = medoids m = np.hstack((1 - np.sum(m, axis=1).reshape(-1, 1), m)) clus = extractMass(m, F, g=g, method="mecmdd", crit=J, param={'alpha': alpha, 'beta': beta, 'delta': delta, 'lambda': lambda_}) return clus
#---------------------- Utils of MECMdd-RWL------------------------------------------------
[docs] def lambdaInit_local(weight, p, c): """ Initialize lambda matrix. """ lambda_ = np.zeros((c, p)) for j in range(c): if weight == 'sum': lambda_[j, :] = np.full(p, 1/p) elif weight == 'prod': lambda_[j, :] = np.ones(p) else: raise ValueError("Weight must be 'sum' or 'prod'.") return lambda_