Source code for mapof.core.inner_distances

import itertools
import math
from copy import deepcopy

import numpy as np


[docs] def map_str_to_func(name: str) -> callable: """ Maps a string to a function. Parameters ---------- name : str Name of the distance. Returns ------- callable """ return {'l1': l1, 'l2': l2, 'chebyshev': chebyshev, 'hellinger': hellinger, 'emd': emd, 'emdinf': emdinf, 'discrete': discrete, }.get(name)
[docs] def discrete(vector_1, vector_2) -> int: """ Computes the discrete distance. Parameters ---------- vector_1 First vector. vector_2 Second vector. Returns ------- int Discrete distance. """ for i in range(len(vector_1)): if vector_1[i] != vector_2[i]: return 1 return 0
[docs] def single_l1(value_1, value_2) -> float: """ Computes the L1 distance between two values. Parameters ---------- value_1 First value. value_2 Second values. Returns ------- float L1 distance. """ return abs(value_1 - value_2)
[docs] def l1(vector_1: np.ndarray, vector_2: np.ndarray) -> float: """ Computes the L1 distance. Parameters ---------- vector_1 : np.ndarray First vector. vector_2 : np.ndarray Second vector. Returns ------- float L1 distance. """ return np.linalg.norm(vector_1 - vector_2, ord=1)
[docs] def l2(vector_1: np.ndarray, vector_2: np.ndarray) -> float: """ Computes the L2 distance. Parameters ---------- vector_1 : np.ndarray First vector. vector_2 : np.ndarray Second vector. Returns ------- float L2 distance. """ return np.linalg.norm(vector_1 - vector_2, ord=2)
[docs] def chebyshev(vector_1: list, vector_2: list) -> float: """ Computes the Chebyshev distance. Parameters ---------- vector_1 : list First vector. vector_2 : list Second vector. Returns ------- float Chebyshev distance. """ return max([abs(vector_1[i] - vector_2[i]) for i in range(len(vector_1))])
[docs] def hellinger(vector_1: list, vector_2: list) -> float: """ Computes the Hellinger distance. Parameters ---------- vector_1 : list First vector. vector_2 : list Second vector. Returns ------- float Hellinger distance. """ h1 = np.average(vector_1) h2 = np.average(vector_2) product = sum([math.sqrt(vector_1[i] * vector_2[i]) for i in range(len(vector_1))]) return math.sqrt(1 - (1 / math.sqrt(h1 * h2 * len(vector_1) * len(vector_1))) * product)
def _stretch(vector, mult): return [x for _ in range(mult) for x in vector]
[docs] def emdinf(vector_1: list, vector_2: list) -> float: """ Computes the EMD-infinity distance. Parameters ---------- vector_1 : list First vector. vector_2 : list Second vector. Returns ------- float EMD-infinity distance. """ if len(vector_1) != len(vector_2): vector_1 = _stretch(vector_1, math.lcm(len(vector_1), len(vector_2))) vector_2 = _stretch(vector_2, math.lcm(len(vector_1), len(vector_2))) m = len(vector_1) cum_x = 0 cum_y = 0 res = 0 for x, y in zip(vector_1, vector_2): cum_x_ = cum_x cum_y_ = cum_y cum_x += x cum_y += y if np.sign(cum_x_ - cum_y_) == np.sign(cum_x - cum_y): # Trapezoid case res += (abs(cum_x_ - cum_y_) + abs(cum_x - cum_y)) / m / 2 else: # Two triangles case (works also for one triangle) d_1 = abs(cum_x_ - cum_y_) d_2 = abs(cum_x - cum_y) res += (d_1 * d_1 + d_2 * d_2) / (d_1 + d_2) / m / 2 return res
[docs] def emd(vector_1: list, vector_2: list) -> float: """ Computes the EMD distance. Parameters ---------- vector_1 : list First vector. vector_2 : list Second vector. Returns ------- float EMD distance. """ vector_1 = deepcopy(vector_1) dirt = 0. for i in range(len(vector_1) - 1): surplus = vector_1[i] - vector_2[i] dirt += abs(surplus) vector_1[i + 1] += surplus return dirt
[docs] def hamming(set_1: set, set_2: set) -> int: """ Computes the Hamming distance between two sets. Parameters ---------- set_1 : set First vector. set_2 : set Second vector. Returns ------- int Hamming distance. """ return len(set_1.symmetric_difference(set_2))
[docs] def vote_to_pote(vote: list) -> list: """ Converts vote to pote (i.e. positional vote) Parameters ---------- vote : list Ordinal vote. Returns ------- list Potes (i.e. positional votes). """ return [vote.index(i) for i in range(len(vote)+1) if i in vote]
[docs] def swap_distance(vote_1: list, vote_2: list, matching=None) -> int: """ Return: Swap distance between two votes """ new_vote_2 = deepcopy(vote_2) if matching is not None: for i in range(len(vote_2)): new_vote_2[i] = matching[vote_2[i]] pote_1 = vote_to_pote(vote_1) pote_2 = vote_to_pote(new_vote_2) swap_distance = 0 for i, j in itertools.combinations(pote_1, 2): if (pote_1[i] > pote_1[j] and pote_2[i] < pote_2[j]) or \ (pote_1[i] < pote_1[j] and pote_2[i] > pote_2[j]): swap_distance += 1 return swap_distance
[docs] def swap_distance_between_potes(pote_1: list, pote_2: list) -> int: """ Computes the swap distance between two potes (i.e. positional votes). Parameters ---------- pote_1 : list First vector. pote_2 : list Second vector. Returns ------- int Swap distance. """ swap_distance = 0 for i, j in itertools.combinations(pote_1, 2): if (pote_1[i] > pote_1[j] and pote_2[i] < pote_2[j]) or \ (pote_1[i] < pote_1[j] and pote_2[i] > pote_2[j]): swap_distance += 1 return swap_distance
[docs] def spearman_distance_between_potes(pote_1: list, pote_2: list) -> int: """ Computes the Spearman distance between two potes (i.e. positional votes). Parameters ---------- pote_1 : list First vector. pote_2 : list Second vector. Returns ------- int Spearman distance. """ return sum([abs(pote_1[c] - pote_2[c]) for c in range(len(pote_1))])