Source code for mapof.elections.features.dap

import itertools

import numpy as np

from mapof.elections.features.register import register_ordinal_election_feature

### AUXILLIARY FUNCTIONS ###

def _remove_diag(mtrx):
    """ Return: Input frequency_matrix with diagonal removed (shape[1] - 1) """
    res = np.zeros((mtrx.shape[0], mtrx.shape[1] - 1))
    for i in range(mtrx.shape[0]):
        for j in range(mtrx.shape[0]):
            if j < i:
                res[i, j] = mtrx[i, j]
            elif j > i:
                res[i, j - 1] = mtrx[i, j]
    return res


def _vote2pote(vote, m):
    reported = vote[vote != -1]
    part_pote = np.argsort(reported)
    res = []
    i = 0
    non_reported_pos = len(reported) + (m - 1 - len(reported)) / 2
    for c in range(m):
        if c in reported:
            res.append(part_pote[i])
            i = i + 1
        else:
            res.append(non_reported_pos)
    return np.array(res)


def _get_potes(election):
    if election.potes is not None:
        return election.potes
    else:
        res = []
        for v in election.votes:
            res.append(_vote2pote(v, election.num_candidates))
        res = np.array(res)
        election.potes = res
        return res


def _swap_distance_between_potes(pote_1: list, pote_2: list, m: int) -> int:
    """ Return: Swap distance between two potes """
    swap_distance = 0
    for a in range(m):
        for b in range(m):
            if (pote_1[a] < pote_1[b] and pote_2[a] >= pote_2[b]):
                swap_distance += 0.5
            if (pote_1[a] <= pote_1[b] and pote_2[a] > pote_2[b]):
                swap_distance += 0.5
    return swap_distance


def _get_vote_dists(election):
    try:
        return election.vote_dists
    except:
        potes = _get_potes(election)
        distances = np.zeros([election.num_voters, election.num_voters])
        for v1 in range(election.num_voters):
            for v2 in range(v1 + 1, election.num_voters):
                distances[v1][v2] = _swap_distance_between_potes(potes[v1], potes[v2],
                                                                election.num_candidates)
                distances[v2][v1] = distances[v1][v2]
        election.vote_dists = distances
        return distances


def _get_candidate_dists(election):
    try:
        return election.candidate_dists
    except:
        potes = _get_potes(election)
        distances = np.zeros([election.num_candidates, election.num_candidates])
        for a in range(election.num_candidates):
            for b in range(a + 1, election.num_candidates):
                for v in range(election.num_voters):
                    distances[a][b] += abs(potes[v][a] - potes[v][b])
                distances[b][a] = distances[a][b]
        election.candidate_dists = distances
        return distances


def _calculate_borda_scores(election):
    m = election.num_candidates
    borda = np.zeros(m, int)
    for v in election.votes:
        for i, c in enumerate(v):
            borda[c] = borda[c] + m - i - 1
    return borda

def _geom_mean(x):
    x = np.log(x)
    return np.exp(x.mean())

def _kemeny_ranking(election):
    m = election.num_candidates
    wmg = election.votes_to_pairwise_matrix()
    best_d = np.inf
    for test_ranking in itertools.permutations(list(range(m))):
        dist = 0
        for i in range(m):
            for j in range(i + 1, m):
                dist = dist + wmg[test_ranking[j], test_ranking[i]]
            if dist > best_d:
                break
        if dist < best_d:
            best = test_ranking
            best_d = dist
    return best, best_d
    

### AGREEMENT INDICES ###

[docs] @register_ordinal_election_feature('agreement') def agreement_index(election) -> dict: """ Calculates the agreement index of the election as defined in Faliszewski et al. 'Diversity, Agreement, and Polarization in Elections'. Agreement for a specific pair of candidates is defined as the difference between the number of their supporters divided by the total number of voters. Agreement index of an election is the average agreement between each pair of voters. Parameters ---------- election : OrdinalElection Returns ------- dict 'value': agreement index """ if election.is_pseudo: return {'value': None} potes = _get_potes(election) res = 0 for a, b in itertools.combinations(range(election.num_candidates), 2): a_b = 0 b_a = 0 for p in potes: if p[a] < p[b]: a_b += 1 elif p[b] < p[a]: b_a += 1 res += max(abs(a_b - b_a), election.num_voters - a_b - b_a) return {'value': res / election.num_voters / ( election.num_candidates - 1) / election.num_candidates * 2}
[docs] @register_ordinal_election_feature('borda_std') def borda_std(election) -> dict: """ Standard deviation of Borda scores of all candidates. """ if election.is_pseudo: return {'value': None} all_scores = _calculate_borda_scores(election) return {'value': all_scores.std()}
[docs] @register_ordinal_election_feature('avg_vote_dist') def avg_vote_dist(election) -> dict: """ Average swap distance between voters. """ if election.is_pseudo: return {'value': None} distances = _get_vote_dists(election) return {'value': distances.sum() / election.num_voters / (election.num_voters - 1)}
[docs] @register_ordinal_election_feature('max_vote_dist') def max_vote_dist(election) -> dict: """ Maximum swap distance between voters. """ if election.is_pseudo: return {'value': None} distances = _get_vote_dists(election) return {'value': distances.max()}
[docs] @register_ordinal_election_feature('karpov_index') def karpov_index(election): """ Geometric mean based index proposed in Karpov's 'Preference diversity orderings' paper. """ if election.is_pseudo: return {'value': None} distances = _get_vote_dists(election) distances = _remove_diag(distances) distances = distances + 0.5 distances[distances == 0.5] = 1 return {'value': _geom_mean(distances)}
[docs] @register_ordinal_election_feature('avg_dist_to_kemeny') def avg_dist_to_kemeny(election): """ Average swap distance between each voter and the Kemeny ranking. """ if election.is_pseudo: return {'value': None} _, dist = _kemeny_ranking(election) return dist / election.num_voters
[docs] @register_ordinal_election_feature('avg_dist_to_borda') def avg_dist_to_bord(election) -> dict: """ Average swap distance between each voter and the Borda ranking. """ if election.is_pseudo: return {'value': None} m = election.num_candidates borda = _calculate_borda_scores(election) ranking = np.argsort(-borda) wmg = election.votes_to_pairwise_matrix() dist = 0 for i in range(m): for j in range(i + 1, m): dist = dist + wmg[ranking[j], ranking[i]] return {'value': dist / election.num_voters}
### DIVERSITY INDICES ### def _distances_to_rankings(rankings, distances): dists = distances[rankings] return np.sum(dists.min(axis=0)) def _find_improvement(distances, d, starting, rest, k, l): for cut in itertools.combinations(range(k), l): for paste in itertools.combinations(rest, l): ranks = [] j = 0 for i in range(k): if i in cut: ranks.append(paste[j]) j = j + 1 else: ranks.append(starting[i]) # check if unique if len(set(ranks)) == len(ranks): # check if better d_new = _distances_to_rankings(ranks, distances) if d > d_new: return ranks, d_new, True return starting, d, False
[docs] @register_ordinal_election_feature('kkememy_single_k') def kkemeny_single_k(election, k, l, starting=None) -> dict: """ Calculates approximate value of k-Kemeny for single k, i.e., the sum of the swap distances between each voter and the closest out of k chosen rankings, where the k chosen rankings are set optimally. The distence is approximated as we search for the chosen rankings only from the rankings that are already present in the election as votes. Further, this distance is approximated using local search approach, where in each iteration, l rankings are changed for another rankings to optimize the sum of swap distances. """ if starting is None: starting = list(range(k)) distances = _get_vote_dists(election) n = election.num_voters d = _distances_to_rankings(starting, distances) iter = 0 check = True while (check): iter = iter + 1 rest = [i for i in range(n) if i not in starting] for j in range(l): starting, d, check = _find_improvement(distances, d, starting, rest, k, j + 1) if check: break return {'value': d}
[docs] @register_ordinal_election_feature('kkemeny_diversity_upto_r') def kkemeny_diversity_upto_r(election, r) -> dict: """ Calculates the approximate k-Kemeny diversity index as defined in Faliszewski et al., 'Distances Between Top-Truncated Elections of Different Sizes'. It sums the values of approximate k-Kemeny distances for k from 1 to r obtained using the local search method (see `~kkememy_single_k` function). In the paper, the authors use the value of r equal to 5 and they argue that it is good enough. """ if election.is_pseudo: return {'value': None} res = 0 for k in range(1, r+1): res += kkemeny_single_k(election, k, 1)['value'] max_dist = (election.num_candidates) * (election.num_candidates - 1) / 2 return {'value': res / election.num_voters / max_dist / r}
[docs] @register_ordinal_election_feature('kkemeny_diversity_full') def kkemeny_diversity_full(election) -> dict: ''' Calculates the approximate k-Kemeny diversity index as defined in Faliszewski et al., 'Diversity, Agreement, and Polarization in Elections'. It sums the values of approximate k-Kemeny distances divided by k for k from 1 to n, where n is the number of voters in the election. The values of the approximate k-Kemeny distances are obtained using the local search method (see `~kkememy_single_k` function). ''' if election.is_pseudo: return {'value': None} res = 0 for k in range(1, election.num_voters + 1): res += kkemeny_single_k(election, k, 1)['value']/k max_dist = (election.num_candidates) * (election.num_candidates - 1) / 2 return {'value': res / election.num_voters / max_dist}
[docs] @register_ordinal_election_feature('support_diversity') def support_diversity(election, tuple_len) -> dict: """ Given tuple lenght k, for each k-tuple of candidates counts the number of different orderings of this tuple that can be found within the votes. Then, sums it for all k-tuples. Proposed in Hashemi and Endriss, 'Measuring diversity of preferences in a group'. """ if election.is_pseudo: return {'value': None} m = election.num_candidates res = 0 for subset in itertools.combinations(range(m), tuple_len): support = [] for v in election.votes: trimmed_v = [] for c in v: if c in subset: trimmed_v.append(c) if not (trimmed_v in support): support.append(trimmed_v) res = res + len(support) return {'value': res}
[docs] @register_ordinal_election_feature('support_pairs') def support_pairs(election): """ For each pair of candidates, a and b, checks whether a is always preferred over b. If this is the case assigns value 1 to this pair, otherwise value 2. Then sums it up for all possible pairs of candidates. Proposed in Hashemi and Endriss, 'Measuring diversity of preferences in a group'. """ return support_diversity(election, 2)
[docs] @register_ordinal_election_feature('support_triplets') def support_triplets(election): """ For each triplet of candidates, a, b, c, checks how many different orderings of these three candidates can be found withing the votes. Then sums it up for all possible triplets of candidates. Proposed in Hashemi and Endriss, 'Measuring diversity of preferences in a group'. """ return support_diversity(election, 3)
[docs] @register_ordinal_election_feature('support_votes') def support_votes(election) -> dict: """ Counts the number of unique votes. Proposed as a measure of diversity in Hashemi and Endriss, 'Measuring diversity of preferences in a group'. """ if election.is_pseudo: return {'value': None} m = election.num_candidates return {'value': support_diversity(election, m)}
[docs] @register_ordinal_election_feature('cand_pos_dist_std') def cand_pos_dist_std(election) -> dict: """ For each pair of candidates we calculate the average difference in positions across all voters. Then we take the standard deviation of the values for all pairs. """ if election.is_pseudo: return {'value': None} distances = _get_candidate_dists(election) distances = _remove_diag(distances) return {'value': - distances.std() / election.num_voters}
### Polarization Indices ###
[docs] @register_ordinal_election_feature('kkemeny_polarization') def kkemeny_polarization(election) -> dict: """ Calculates the approximate k-Kemeny polarization index as defined in Faliszewski et al., 'Diversity, Agreement, and Polarization in Elections'. It takes the difference between approximate k-Kemeny distance for k=1 and k=2. The values of the approximate k-Kemeny distances are obtained using the local search method (see `~kkememy_single_k` function). """ if election.is_pseudo: return {'value': None} first_kemeny = kkemeny_single_k(election, 1, 1)['value'] second_kemeny = kkemeny_single_k(election, 2, 1)['value'] max_dist = (election.num_candidates) * (election.num_candidates - 1) / 2 return {'value': 2 * (first_kemeny - second_kemeny) / election.num_voters / max_dist}