Source code for evclust.datasets
# -*- coding: utf-8 -*-
# This file as well as the whole evclust package are licenced under the MIT licence (see the LICENCE.txt)
# Armel SOUBEIGA (armelsoubeiga.github.io), France, 2023
"""
This module contains all tests datasets
"""
#---------------------- Packges------------------------------------------------
import pathlib
import pandas as pd
from scipy.io import loadmat
[docs]
DATASETS_DIR = pathlib.Path(__file__).parent / "datasets"
#---------------------- Data 1-------------------------------------------------
[docs]
def load_decathlon():
"""The Decathlon dataset from FactoMineR."""
decathlon = pd.read_csv(DATASETS_DIR / "decathlon.csv")
decathlon.columns = ["athlete", *map(str.lower, decathlon.columns[1:])]
decathlon.athlete = decathlon.athlete.apply(str.title)
decathlon = decathlon.set_index(["competition", "athlete"])
return decathlon
#---------------------- Data 2-------------------------------------------------
[docs]
def load_iris():
"""Iris data."""
return pd.read_csv(DATASETS_DIR / "iris.csv")
#---------------------- Data 3-------------------------------------------------
[docs]
def load_protein():
"""Protein data. """
return pd.read_csv(DATASETS_DIR / "protein.csv", index_col=False)
#---------------------- Data 4-------------------------------------------------
[docs]
def load_fourclass():
"""fourclass data. """
return pd.read_csv(DATASETS_DIR / "fourclass.csv", index_col=False)
#---------------------- Data 5-------------------------------------------------
[docs]
def load_prop():
"""Load ProP.mat data and return it as a list of views."""
prop_data = loadmat(str(DATASETS_DIR / 'ProP.mat'))
return [prop_data['gene_repert'], prop_data['proteome_comp'], prop_data['text']]