Module hytraj.hyagg

Expand source code
import pandas as pd, numpy as np, matplotlib.pyplot as plt
#import traj_dist.distance as tdist
import fastcluster as fc, scipy.cluster.hierarchy as sch

class HyHAC:
    def __init__(self, data):
        self.lat = data.sel(geo="lat").to_pandas()
        self.lon = data.sel(geo="lon").to_pandas()
        self.n_traj = len(self.lat.columns)
        self.slat = self.lat.iloc[0, 0]
        self.slon = self.lon.iloc[0, 0]

    def get_linkage(self, metric="sspd", method="ward", type_d="euclidean"):
        traj_list = []
        for traj in self.lat.columns:
            ds = pd.DataFrame([])
            ds["lat"] = self.lat[traj]
            ds["lon"] = self.lon[traj]
            traj_list.append(ds.values)
        self.p_dist = tdist.pdist(traj_list, metric=metric, type_d=type_d)
        self.link = fc.linkage(self.p_dist, method=method)
        return self

    def fit(self, nclus=5, metric='sspd', method="ward", type_d="spherical"):
        self.nclus = nclus
        self.get_linkage(metric, method, type_d)
        self.labels = sch.fcluster(self.link, self.nclus, criterion="maxclust") - 1
        self.cluster = pd.DataFrame(data=self.labels, index=self.lat.columns).T
        return self.cluster

    def plot_dendrogram(self, ax=None, p=10, D=50):
        if not ax:
            fig, ax = plt.subplots(1, 1, figsize=(14, 5))
        dn = sch.dendrogram(
            self.link,
            ax=ax,
            orientation="top",
            show_contracted=True,
            truncate_mode="lastp",
            p=p,
        )
        ax.set_ylabel("Distance")
        ax.axhline(D, color="k", ls="-.")
        ax.spines["right"].set_visible(False)
        ax.spines["top"].set_visible(False)
        return ax

Classes

class HyHAC (data)
Expand source code
class HyHAC:
    def __init__(self, data):
        self.lat = data.sel(geo="lat").to_pandas()
        self.lon = data.sel(geo="lon").to_pandas()
        self.n_traj = len(self.lat.columns)
        self.slat = self.lat.iloc[0, 0]
        self.slon = self.lon.iloc[0, 0]

    def get_linkage(self, metric="sspd", method="ward", type_d="euclidean"):
        traj_list = []
        for traj in self.lat.columns:
            ds = pd.DataFrame([])
            ds["lat"] = self.lat[traj]
            ds["lon"] = self.lon[traj]
            traj_list.append(ds.values)
        self.p_dist = tdist.pdist(traj_list, metric=metric, type_d=type_d)
        self.link = fc.linkage(self.p_dist, method=method)
        return self

    def fit(self, nclus=5, metric='sspd', method="ward", type_d="spherical"):
        self.nclus = nclus
        self.get_linkage(metric, method, type_d)
        self.labels = sch.fcluster(self.link, self.nclus, criterion="maxclust") - 1
        self.cluster = pd.DataFrame(data=self.labels, index=self.lat.columns).T
        return self.cluster

    def plot_dendrogram(self, ax=None, p=10, D=50):
        if not ax:
            fig, ax = plt.subplots(1, 1, figsize=(14, 5))
        dn = sch.dendrogram(
            self.link,
            ax=ax,
            orientation="top",
            show_contracted=True,
            truncate_mode="lastp",
            p=p,
        )
        ax.set_ylabel("Distance")
        ax.axhline(D, color="k", ls="-.")
        ax.spines["right"].set_visible(False)
        ax.spines["top"].set_visible(False)
        return ax

Methods

def fit(self, nclus=5, metric='sspd', method='ward', type_d='spherical')
Expand source code
def fit(self, nclus=5, metric='sspd', method="ward", type_d="spherical"):
    self.nclus = nclus
    self.get_linkage(metric, method, type_d)
    self.labels = sch.fcluster(self.link, self.nclus, criterion="maxclust") - 1
    self.cluster = pd.DataFrame(data=self.labels, index=self.lat.columns).T
    return self.cluster
def get_linkage(self, metric='sspd', method='ward', type_d='euclidean')
Expand source code
def get_linkage(self, metric="sspd", method="ward", type_d="euclidean"):
    traj_list = []
    for traj in self.lat.columns:
        ds = pd.DataFrame([])
        ds["lat"] = self.lat[traj]
        ds["lon"] = self.lon[traj]
        traj_list.append(ds.values)
    self.p_dist = tdist.pdist(traj_list, metric=metric, type_d=type_d)
    self.link = fc.linkage(self.p_dist, method=method)
    return self
def plot_dendrogram(self, ax=None, p=10, D=50)
Expand source code
def plot_dendrogram(self, ax=None, p=10, D=50):
    if not ax:
        fig, ax = plt.subplots(1, 1, figsize=(14, 5))
    dn = sch.dendrogram(
        self.link,
        ax=ax,
        orientation="top",
        show_contracted=True,
        truncate_mode="lastp",
        p=p,
    )
    ax.set_ylabel("Distance")
    ax.axhline(D, color="k", ls="-.")
    ax.spines["right"].set_visible(False)
    ax.spines["top"].set_visible(False)
    return ax