Source code for mapof.elections.features.diversity

import math

import numpy as np
import itertools

from mapof.elections.features.register import register_ordinal_election_feature


def kemeny_ranking(election):
    m = election.num_candidates
    wmg = election.votes_to_pairwise_matrix()
    best_d = np.inf
    for test_ranking in itertools.permutations(list(range(m))):
        dist = 0
        for i in range(m):
            for j in range(i + 1, m):
                dist = dist + wmg[test_ranking[j], test_ranking[i]]
            if dist > best_d:
                break
        if dist < best_d:
            best = test_ranking
            best_d = dist
    return best, best_d


def gini_coef(x):
    # Mean absolute difference
    mad = np.abs(np.subtract.outer(x, x)).mean()
    return (mad / x.mean() / 2)


def geom_mean(x):
    x = np.log(x)
    return np.exp(x.mean())


[docs] def swap_distance_between_potes(pote_1: list, pote_2: list) -> int: """ Return: Swap distance between two potes """ swap_distance = 0 for i, j in itertools.combinations(pote_1, 2): if (pote_1[i] > pote_1[j] and pote_2[i] < pote_2[j]) or \ (pote_1[i] < pote_1[j] and pote_2[i] > pote_2[j]): swap_distance += 1 return swap_distance
[docs] def remove_diag(mtrx): """ Return: Input frequency_matrix with diagonal removed (shape[1] - 1) """ res = np.zeros((mtrx.shape[0], mtrx.shape[1] - 1)) for i in range(mtrx.shape[0]): for j in range(mtrx.shape[0]): if j < i: res[i, j] = mtrx[i, j] elif j > i: res[i, j - 1] = mtrx[i, j] return res
def calculate_borda_scores(election): m = election.num_candidates borda = np.zeros(m, int) for v in election.votes: for i, c in enumerate(v): borda[c] = borda[c] + m - i - 1 return borda def calculate_cand_dom_dist(election): distances = election.votes_to_pairwise_matrix() distances = np.abs(distances - 0.5) np.fill_diagonal(distances, 0) return distances def calculate_cand_pos_dist(election): election.compute_potes() distances = np.zeros([election.num_candidates, election.num_candidates]) for c1 in range(election.num_candidates): for c2 in range(election.num_candidates): dist = 0 for pote in election.potes: dist += abs(pote[c1] - pote[c2]) distances[c1][c2] = dist return distances def calculate_vote_swap_dist(election): election.compute_potes() distances = np.zeros([election.num_voters, election.num_voters]) for v1 in range(election.num_voters): for v2 in range(election.num_voters): distances[v1][v2] = swap_distance_between_potes( election.potes[v1], election.potes[v2]) return distances # DIVERSITY INDICES @register_ordinal_election_feature('borda_gini') def borda_gini(election) -> dict: if election.is_pseudo: return {'value': None} all_scores = calculate_borda_scores(election) return {'value': gini_coef(all_scores)} @register_ordinal_election_feature('borda_meandev') def borda_meandev(election) -> dict: if election.is_pseudo: return {'value': None} all_scores = calculate_borda_scores(election) all_scores = np.abs(all_scores - all_scores.mean()) return {'value': all_scores.mean()} @register_ordinal_election_feature('borda_std') def borda_std(election) -> dict: if election.is_pseudo: return {'value': None} all_scores = calculate_borda_scores(election) return {'value': all_scores.std()} @register_ordinal_election_feature('borda_range') def borda_range(election) -> dict: if election.is_pseudo: return {'value': None} all_scores = calculate_borda_scores(election) return (np.max(all_scores) - np.min(all_scores)) @register_ordinal_election_feature('cand_dom_dist_mean') def cand_dom_dist_mean(election) -> dict: if election.is_pseudo: return {'value': None} distances = calculate_cand_dom_dist(election) return {'value': distances.sum() / (election.num_candidates - 1) / election.num_candidates * 2}
[docs] @register_ordinal_election_feature('Agreement') def agreement_index(election) -> dict: """ Calculates the agreement index of the election. Parameters ---------- election : OrdinalElection Returns ------- dict 'value': agreement index """ if election.is_pseudo: return {'value': None} distances = calculate_cand_dom_dist(election) return {'value': distances.sum() / (election.num_candidates - 1) / election.num_candidates * 2}
@register_ordinal_election_feature('cand_dom_dist_std') def cand_dom_dist_std(election) -> dict: if election.is_pseudo: return {'value': None} distances = calculate_cand_dom_dist(election) distances = remove_diag(distances) return {'value': distances.std()} @register_ordinal_election_feature('cand_pos_dist_std') def cand_pos_dist_std(election) -> dict: if election.is_pseudo: return {'value': None} distances = calculate_cand_pos_dist(election) distances = remove_diag(distances) return {'value': distances.std()} @register_ordinal_election_feature('cand_pos_dist_meandev') def cand_pos_dist_meandev(election) -> dict: if election.is_pseudo: return {'value': None} distances = calculate_cand_pos_dist(election) distances = remove_diag(distances) distances = np.abs(distances - distances.mean()) return {'value': distances.mean()} @register_ordinal_election_feature('cand_pos_dist_gini') def cand_pos_dist_gini(election) -> dict: if election.is_pseudo: return {'value': None} distances = calculate_cand_pos_dist(election) distances = remove_diag(distances) return {'value': gini_coef(distances)} @register_ordinal_election_feature('med_cands_summed') def med_cands_summed(election) -> dict: if election.is_pseudo: return {'value': None} m = election.num_candidates distances = calculate_cand_pos_dist(election) res = [0] * m for i in range(1, m): best_d = np.inf for comb in itertools.combinations(range(m), i): d_total = 0 for c1 in range(m): min_d = np.inf for c2 in comb: d_cand = distances[c1, c2] if d_cand < min_d: min_d = d_cand d_total = d_total + min_d if d_total < best_d: best_d = d_total res[i] = best_d return {'value': sum(res)} @register_ordinal_election_feature('vote_dist_mean') def vote_dist_mean(election) -> dict: if election.is_pseudo: return {'value': None} distances = calculate_vote_swap_dist(election) return {'value': distances.sum() / election.num_voters / (election.num_voters - 1)} @register_ordinal_election_feature('vote_dist_max') def vote_dist_max(election) -> dict: if election.is_pseudo: return {'value': None} distances = calculate_vote_swap_dist(election) return {'value': distances.max()} @register_ordinal_election_feature('vote_dist_med') def vote_dist_med(election) -> dict: if election.is_pseudo: return {'value': None} distances = calculate_vote_swap_dist(election) distances = remove_diag(distances) return {'value': np.median(distances)} @register_ordinal_election_feature('vote_dist_gini') def vote_dist_gini(election) -> dict: if election.is_pseudo: return {'value': None} distances = calculate_vote_swap_dist(election) distances = remove_diag(distances) return {'value': gini_coef(distances)} @register_ordinal_election_feature('vote_sqr_dist_mean') def vote_sqr_dist_mean(election) -> dict: if election.is_pseudo: return {'value': None} distances = calculate_vote_swap_dist(election) distances = remove_diag(distances) distances = np.sqrt(distances) return {'value': distances.mean()} @register_ordinal_election_feature('vote_sqr_dist_med') def vote_sqr_dist_med(election) -> dict: if election.is_pseudo: return {'value': None} distances = calculate_vote_swap_dist(election) distances = remove_diag(distances) distances = np.sqrt(distances) return {'value': np.median(distances)} @register_ordinal_election_feature('vote_diversity_Karpov') def vote_diversity_Karpov(election): if election.is_pseudo: return {'value': None} distances = calculate_vote_swap_dist(election) distances = remove_diag(distances) distances = distances + 0.5 distances[distances == 0.5] = 1 return {'value': geom_mean(distances)} @register_ordinal_election_feature('dist_to_Kemeny_mean') def dist_to_Kemeny_mean(election): if election.is_pseudo: return {'value': None} _, dist = kemeny_ranking(election) return dist / election.num_voters @register_ordinal_election_feature('dist_to_Borda_mean') def dist_to_Borda_mean(election) -> dict: if election.is_pseudo: return {'value': None} m = election.num_candidates borda = calculate_borda_scores(election) ranking = np.argsort(-borda) wmg = election.votes_to_pairwise_matrix() dist = 0 for i in range(m): for j in range(i + 1, m): dist = dist + wmg[ranking[j], ranking[i]] return {'value': dist / election.num_voters} @register_ordinal_election_feature('lexi_diversity') def lexi_diversity(election): if election.is_pseudo: return {'value': None} return {'value': None} @register_ordinal_election_feature('greedy_kKemenys_summed') def greedy_kKemenys_summed(election) -> dict: if election.is_pseudo: return {'value': None} res = [0] * election.num_voters distances = calculate_vote_swap_dist(election) best = np.argmin(distances.sum(axis=1)) best_vec = distances[best] res[0] = best_vec.sum() distances = np.vstack((distances[:best], distances[best + 1:])) for i in range(1, election.num_voters): relatives = distances - best_vec relatives = relatives * (relatives < 0) best = np.argmin(relatives.sum(axis=1)) best_vec = best_vec + relatives[best] res[i] = best_vec.sum() distances = np.vstack((distances[:best], distances[best + 1:])) return sum(res) max_dist = (election.num_candidates) * (election.num_candidates - 1) / 2 return {'value': sum(res) / election.num_voters / max_dist} def restore_order(x): for i in range(len(x)): for j in range(len(x) - i, len(x)): if x[j] >= x[-i - 1]: x[j] += 1 return x def distances_to_rankings(rankings, distances): dists = distances[rankings] return np.sum(dists.min(axis=0)) def find_improvement(distances, d, starting, rest, n, k, l): for cut in itertools.combinations(range(k), l): # print(cut) for paste in itertools.combinations(rest, l): ranks = [] j = 0 for i in range(k): if i in cut: ranks.append(paste[j]) j = j + 1 else: ranks.append(starting[i]) # check if unique if len(set(ranks)) == len(ranks): # check if better d_new = distances_to_rankings(ranks, distances) if d > d_new: return ranks, d_new, True return starting, d, False def local_search_kKemeny_single_k(election, k, l, starting=None) -> dict: if starting is None: starting = list(range(k)) distances = calculate_vote_swap_dist(election) n = election.num_voters d = distances_to_rankings(starting, distances) iter = 0 check = True while (check): # print(iter) # print(starting) # print(d) # print() iter = iter + 1 rest = [i for i in range(n) if i not in starting] for j in range(l): starting, d, check = find_improvement(distances, d, starting, rest, n, k, j + 1) if check: break # print() return {'value': d} def local_search_kKemeny(election, l, starting=None) -> dict: max_dist = election.num_candidates * (election.num_candidates - 1) / 2 res = [] for k in range(1, election.num_voters): # print(k) if starting is None: d = local_search_kKemeny_single_k(election, k, l)['value'] else: d = local_search_kKemeny_single_k(election, k, l, starting[:k])['value'] d = d / max_dist / election.num_voters if d > 0: res.append(d) else: break for k in range(len(res), election.num_voters): res.append(0) return {'value': res}
[docs] @register_ordinal_election_feature('Diversity') def diversity_index(election) -> dict: """ Calculates the diversity index of the election. Parameters ---------- election : OrdinalElection Returns ------- dict 'value': diversity index """ if election.is_pseudo: return {'value': None} max_dist = election.num_candidates * (election.num_candidates - 1) / 2 res = [0] * election.num_voters chosen_votes = [] distances = calculate_vote_swap_dist(election) best = np.argmin(distances.sum(axis=1)) chosen_votes.append(best) best_vec = distances[best] res[0] = best_vec.sum() / max_dist / election.num_voters distances = np.vstack((distances[:best], distances[best + 1:])) for i in range(1, election.num_voters): relatives = distances - best_vec relatives = relatives * (relatives < 0) best = np.argmin(relatives.sum(axis=1)) chosen_votes.append(best) best_vec = best_vec + relatives[best] res[i] = best_vec.sum() / max_dist / election.num_voters distances = np.vstack((distances[:best], distances[best + 1:])) chosen_votes = restore_order(chosen_votes) res_1 = local_search_kKemeny(election, 1, chosen_votes)['value'] res_2 = local_search_kKemeny(election, 1)['value'] res = [min(d_1, d_2) for d_1, d_2 in zip(res_1, res_2)] return {'value': sum([x / (i + 1) for i, x in enumerate(res)])}
@register_ordinal_election_feature('greedy_kKemenys_divk_summed') def greedy_kKemenys_divk_summed(election) -> dict: if election.is_pseudo: return {'value': None} res = [0] * election.num_voters distances = calculate_vote_swap_dist(election) best = np.argmin(distances.sum(axis=1)) best_vec = distances[best] res[0] = best_vec.sum() distances = np.vstack((distances[:best], distances[best + 1:])) for i in range(1, election.num_voters): relatives = distances - best_vec relatives = relatives * (relatives < 0) best = np.argmin(relatives.sum(axis=1)) best_vec = best_vec + relatives[best] res[i] = best_vec.sum() / (i + 1) distances = np.vstack((distances[:best], distances[best + 1:])) # res[0] = 0 # for disregarding one Kemeny (AN = ID) max_dist = (election.num_candidates) * (election.num_candidates - 1) / 2 return {'value': sum(res) / election.num_voters / max_dist} @register_ordinal_election_feature('greedy_2kKemenys_summed') def greedy_2kKemenys_summed(election) -> dict: if election.is_pseudo: return {'value': None} res = [] distances = calculate_vote_swap_dist(election) best = np.argmin(distances.sum(axis=1)) best_vec = distances[best] res.append(best_vec.sum()) distances = np.vstack((distances[:best], distances[best + 1:])) k = 2 for i in range(1, election.num_voters): relatives = distances - best_vec relatives = relatives * (relatives < 0) best = np.argmin(relatives.sum(axis=1)) best_vec = best_vec + relatives[best] if (i + 1) == k: res.append(best_vec.sum()) k = k * 2 distances = np.vstack((distances[:best], distances[best + 1:])) # res[0] = 0 # for disregarding one Kemeny (AN = ID) max_dist = (election.num_candidates) * (election.num_candidates - 1) / 2 return {'value': sum(res) / election.num_voters / max_dist / 2} @register_ordinal_election_feature('polarization_1by2Kemenys') def polarization_1by2Kemenys(election) -> dict: if election.is_pseudo: return {'value': None} distances = calculate_vote_swap_dist(election) best = np.argmin(distances.sum(axis=1)) best_vec = distances[best] first_kemeny = best_vec.sum() distances = np.vstack((distances[:best], distances[best + 1:])) relatives = distances - best_vec relatives = relatives * (relatives < 0) best = np.argmin(relatives.sum(axis=1)) best_vec = best_vec + relatives[best] second_kemeny = best_vec.sum() max_dist = (election.num_candidates) * (election.num_candidates - 1) / 2 return {'value': (first_kemeny - second_kemeny) / election.num_voters / max_dist}
[docs] @register_ordinal_election_feature('Polarization') def polarization_index(election) -> dict: """ Calculates the polarization index of the election. Parameters ---------- election : OrdinalElection Returns ------- dict 'value': polarization index """ if election.is_pseudo: return {'value': None} distances = calculate_vote_swap_dist(election) best_1 = np.argmin(distances.sum(axis=1)) best_vec = distances[best_1] first_kemeny = best_vec.sum() distances = np.vstack((distances[:best_1], distances[best_1 + 1:])) relatives = distances - best_vec relatives = relatives * (relatives < 0) best_2 = np.argmin(relatives.sum(axis=1)) if best_1 <= best_2: best_2 = best_2 + 1 chosen = [best_1, best_2] chosen.sort() second_kemeny_1 = local_search_kKemeny_single_k(election, 2, 1, starting=chosen)['value'] second_kemeny_2 = local_search_kKemeny_single_k(election, 2, 1)['value'] second_kemeny = min(second_kemeny_1, second_kemeny_2) max_dist = (election.num_candidates) * (election.num_candidates - 1) / 2 return {'value': 2 * (first_kemeny - second_kemeny) / election.num_voters / max_dist}
@register_ordinal_election_feature('greedy_kmeans_summed') def greedy_kmeans_summed(election) -> dict: if election.is_pseudo: return {'value': None} res = [0] * election.num_voters distances = calculate_vote_swap_dist(election) distances = distances * distances best = np.argmin(distances.sum(axis=1)) best_vec = distances[best] res[0] = best_vec.sum() distances = np.vstack((distances[:best], distances[best + 1:])) for i in range(1, election.num_voters): relatives = distances - best_vec relatives = relatives * (relatives < 0) best = np.argmin(relatives.sum(axis=1)) best_vec = best_vec + relatives[best] res[i] = best_vec.sum() distances = np.vstack((distances[:best], distances[best + 1:])) return {'value': sum(res)} @register_ordinal_election_feature('support_diversity') def support_diversity(election, tuple_len) -> dict: if election.is_pseudo: return {'value': None} m = election.num_candidates res = 0 for subset in itertools.combinations(range(m), tuple_len): support = [] for v in election.votes: trimmed_v = [] for c in v: if c in subset: trimmed_v.append(c) if not (trimmed_v in support): support.append(trimmed_v) res = res + len(support) return {'value': res} @register_ordinal_election_feature('support_diversity_normed') def support_diversity_normed(election, tuple_len) -> dict: if election.is_pseudo: return {'value': None} m = election.num_candidates res = 0 count = 0 for subset in itertools.combinations(range(m), tuple_len): count = count + 1 support = [] for v in election.votes: trimmed_v = [] for c in v: if c in subset: trimmed_v.append(c) if not (trimmed_v in support): support.append(trimmed_v) res = res + len(support) return {'value': res / count} @register_ordinal_election_feature('support_diversity_normed2') def support_diversity_normed2(election, tuple_len) -> dict: if election.is_pseudo: return {'value': None} m = election.num_candidates res = 0 count = 0 for subset in itertools.combinations(range(m), tuple_len): count = count + 1 support = [] for v in election.votes: trimmed_v = [] for c in v: if c in subset: trimmed_v.append(c) if not (trimmed_v in support): support.append(trimmed_v) res = res + len(support) return {'value': res / count / math.factorial(tuple_len)} @register_ordinal_election_feature('support_diversity_normed3') def support_diversity_normed3(election, tuple_len) -> dict: if election.is_pseudo: return {'value': None} m = election.num_candidates res = 0 count = 0 for subset in itertools.combinations(range(m), tuple_len): count = count + 1 support = [] for v in election.votes: trimmed_v = [] for c in v: if c in subset: trimmed_v.append(c) if not (trimmed_v in support): support.append(trimmed_v) res = res + len(support) max_times = min(math.factorial(tuple_len), election.num_voters) return {'value': res / count / max_times} @register_ordinal_election_feature('support_pairs') def support_pairs(election): return support_diversity(election, 2) @register_ordinal_election_feature('support_triplets') def support_triplets(election): return support_diversity(election, 3) @register_ordinal_election_feature('support_votes') def support_votes(election) -> dict: if election.is_pseudo: return {'value': None} m = election.num_candidates return {'value': support_diversity(election, m)} @register_ordinal_election_feature('support_diversity_summed') def support_diversity_summed(election) -> dict: if election.is_pseudo: return {'value': None} m = election.num_candidates res = 0 for i in range(2, m + 1): res = res + support_diversity(election, i)['value'] return {'value': res} @register_ordinal_election_feature('support_diversity_normed_summed') def support_diversity_normed_summed(election) -> dict: if election.is_pseudo: return {'value': None} m = election.num_candidates res = 0 for i in range(2, m + 1): res = res + support_diversity_normed(election, i)['value'] return {'value': res} @register_ordinal_election_feature('support_diversity_normed2_summed') def support_diversity_normed2_summed(election) -> dict: if election.is_pseudo: return {'value': None} m = election.num_candidates res = 0 for i in range(2, m + 1): res = res + support_diversity_normed2(election, i)['value'] return {'value': res} @register_ordinal_election_feature('support_diversity_normed3_summed') def support_diversity_normed3_summed(election) -> dict: if election.is_pseudo: return {'value': None} m = election.num_candidates res = 0 for i in range(2, m + 1): res = res + support_diversity_normed3(election, i)['value'] return {'value': res}