import itertools
import numpy as np
from mapof.elections.features.register import register_ordinal_election_feature
### AUXILLIARY FUNCTIONS ###
def _remove_diag(mtrx):
""" Return: Input frequency_matrix with diagonal removed (shape[1] - 1) """
res = np.zeros((mtrx.shape[0], mtrx.shape[1] - 1))
for i in range(mtrx.shape[0]):
for j in range(mtrx.shape[0]):
if j < i:
res[i, j] = mtrx[i, j]
elif j > i:
res[i, j - 1] = mtrx[i, j]
return res
def _vote2pote(vote, m):
reported = vote[vote != -1]
part_pote = np.argsort(reported)
res = []
i = 0
non_reported_pos = len(reported) + (m - 1 - len(reported)) / 2
for c in range(m):
if c in reported:
res.append(part_pote[i])
i = i + 1
else:
res.append(non_reported_pos)
return np.array(res)
def _get_potes(election):
if election.potes is not None:
return election.potes
else:
res = []
for v in election.votes:
res.append(_vote2pote(v, election.num_candidates))
res = np.array(res)
election.potes = res
return res
def _swap_distance_between_potes(pote_1: list, pote_2: list, m: int) -> int:
""" Return: Swap distance between two potes """
swap_distance = 0
for a in range(m):
for b in range(m):
if (pote_1[a] < pote_1[b] and pote_2[a] >= pote_2[b]):
swap_distance += 0.5
if (pote_1[a] <= pote_1[b] and pote_2[a] > pote_2[b]):
swap_distance += 0.5
return swap_distance
def _get_vote_dists(election):
try:
return election.vote_dists
except:
potes = _get_potes(election)
distances = np.zeros([election.num_voters, election.num_voters])
for v1 in range(election.num_voters):
for v2 in range(v1 + 1, election.num_voters):
distances[v1][v2] = _swap_distance_between_potes(potes[v1], potes[v2],
election.num_candidates)
distances[v2][v1] = distances[v1][v2]
election.vote_dists = distances
return distances
def _get_candidate_dists(election):
try:
return election.candidate_dists
except:
potes = _get_potes(election)
distances = np.zeros([election.num_candidates, election.num_candidates])
for a in range(election.num_candidates):
for b in range(a + 1, election.num_candidates):
for v in range(election.num_voters):
distances[a][b] += abs(potes[v][a] - potes[v][b])
distances[b][a] = distances[a][b]
election.candidate_dists = distances
return distances
def _calculate_borda_scores(election):
m = election.num_candidates
borda = np.zeros(m, int)
for v in election.votes:
for i, c in enumerate(v):
borda[c] = borda[c] + m - i - 1
return borda
def _geom_mean(x):
x = np.log(x)
return np.exp(x.mean())
def _kemeny_ranking(election):
m = election.num_candidates
wmg = election.votes_to_pairwise_matrix()
best_d = np.inf
for test_ranking in itertools.permutations(list(range(m))):
dist = 0
for i in range(m):
for j in range(i + 1, m):
dist = dist + wmg[test_ranking[j], test_ranking[i]]
if dist > best_d:
break
if dist < best_d:
best = test_ranking
best_d = dist
return best, best_d
### AGREEMENT INDICES ###
[docs]
@register_ordinal_election_feature('agreement')
def agreement_index(election) -> dict:
"""
Calculates the agreement index of the election as defined in
Faliszewski et al. 'Diversity, Agreement, and Polarization in Elections'.
Agreement for a specific pair of candidates is defined as
the difference between the number of their supporters
divided by the total number of voters.
Agreement index of an election is the average agreement
between each pair of voters.
Parameters
----------
election : OrdinalElection
Returns
-------
dict
'value': agreement index
"""
if election.is_pseudo:
return {'value': None}
potes = _get_potes(election)
res = 0
for a, b in itertools.combinations(range(election.num_candidates), 2):
a_b = 0
b_a = 0
for p in potes:
if p[a] < p[b]:
a_b += 1
elif p[b] < p[a]:
b_a += 1
res += max(abs(a_b - b_a), election.num_voters - a_b - b_a)
return {'value': res / election.num_voters / (
election.num_candidates - 1) / election.num_candidates * 2}
[docs]
@register_ordinal_election_feature('borda_std')
def borda_std(election) -> dict:
"""
Standard deviation of Borda scores of all candidates.
"""
if election.is_pseudo:
return {'value': None}
all_scores = _calculate_borda_scores(election)
return {'value': all_scores.std()}
[docs]
@register_ordinal_election_feature('avg_vote_dist')
def avg_vote_dist(election) -> dict:
"""
Average swap distance between voters.
"""
if election.is_pseudo:
return {'value': None}
distances = _get_vote_dists(election)
return {'value': distances.sum() / election.num_voters / (election.num_voters - 1)}
[docs]
@register_ordinal_election_feature('max_vote_dist')
def max_vote_dist(election) -> dict:
"""
Maximum swap distance between voters.
"""
if election.is_pseudo:
return {'value': None}
distances = _get_vote_dists(election)
return {'value': distances.max()}
[docs]
@register_ordinal_election_feature('karpov_index')
def karpov_index(election):
"""
Geometric mean based index proposed in
Karpov's 'Preference diversity orderings' paper.
"""
if election.is_pseudo:
return {'value': None}
distances = _get_vote_dists(election)
distances = _remove_diag(distances)
distances = distances + 0.5
distances[distances == 0.5] = 1
return {'value': _geom_mean(distances)}
[docs]
@register_ordinal_election_feature('avg_dist_to_kemeny')
def avg_dist_to_kemeny(election):
"""
Average swap distance between each voter and the Kemeny ranking.
"""
if election.is_pseudo:
return {'value': None}
_, dist = _kemeny_ranking(election)
return dist / election.num_voters
[docs]
@register_ordinal_election_feature('avg_dist_to_borda')
def avg_dist_to_bord(election) -> dict:
"""
Average swap distance between each voter and the Borda ranking.
"""
if election.is_pseudo:
return {'value': None}
m = election.num_candidates
borda = _calculate_borda_scores(election)
ranking = np.argsort(-borda)
wmg = election.votes_to_pairwise_matrix()
dist = 0
for i in range(m):
for j in range(i + 1, m):
dist = dist + wmg[ranking[j], ranking[i]]
return {'value': dist / election.num_voters}
### DIVERSITY INDICES ###
def _distances_to_rankings(rankings, distances):
dists = distances[rankings]
return np.sum(dists.min(axis=0))
def _find_improvement(distances, d, starting, rest, k, l):
for cut in itertools.combinations(range(k), l):
for paste in itertools.combinations(rest, l):
ranks = []
j = 0
for i in range(k):
if i in cut:
ranks.append(paste[j])
j = j + 1
else:
ranks.append(starting[i])
# check if unique
if len(set(ranks)) == len(ranks):
# check if better
d_new = _distances_to_rankings(ranks, distances)
if d > d_new:
return ranks, d_new, True
return starting, d, False
[docs]
@register_ordinal_election_feature('kkememy_single_k')
def kkemeny_single_k(election, k, l, starting=None) -> dict:
"""
Calculates approximate value of k-Kemeny for single k, i.e.,
the sum of the swap distances between each voter and the closest
out of k chosen rankings, where the k chosen rankings are set optimally.
The distence is approximated as we search for the chosen rankings only
from the rankings that are already present in the election as votes.
Further, this distance is approximated using local search approach, where
in each iteration, l rankings are changed for another rankings to optimize
the sum of swap distances.
"""
if starting is None:
starting = list(range(k))
distances = _get_vote_dists(election)
n = election.num_voters
d = _distances_to_rankings(starting, distances)
iter = 0
check = True
while (check):
iter = iter + 1
rest = [i for i in range(n) if i not in starting]
for j in range(l):
starting, d, check = _find_improvement(distances, d, starting, rest, k, j + 1)
if check:
break
return {'value': d}
[docs]
@register_ordinal_election_feature('kkemeny_diversity_upto_r')
def kkemeny_diversity_upto_r(election, r) -> dict:
"""
Calculates the approximate k-Kemeny diversity index as defined in
Faliszewski et al., 'Distances Between Top-Truncated Elections of Different Sizes'.
It sums the values of approximate k-Kemeny distances for k from 1 to r
obtained using the local search method (see `~kkememy_single_k` function).
In the paper, the authors use the value of r equal to 5 and they argue that
it is good enough.
"""
if election.is_pseudo:
return {'value': None}
res = 0
for k in range(1, r+1):
res += kkemeny_single_k(election, k, 1)['value']
max_dist = (election.num_candidates) * (election.num_candidates - 1) / 2
return {'value': res / election.num_voters / max_dist / r}
[docs]
@register_ordinal_election_feature('kkemeny_diversity_full')
def kkemeny_diversity_full(election) -> dict:
'''
Calculates the approximate k-Kemeny diversity index as defined in
Faliszewski et al., 'Diversity, Agreement, and Polarization in Elections'.
It sums the values of approximate k-Kemeny distances divided by k
for k from 1 to n, where n is the number of voters in the election.
The values of the approximate k-Kemeny distances are obtained
using the local search method (see `~kkememy_single_k` function).
'''
if election.is_pseudo:
return {'value': None}
res = 0
for k in range(1, election.num_voters + 1):
res += kkemeny_single_k(election, k, 1)['value']/k
max_dist = (election.num_candidates) * (election.num_candidates - 1) / 2
return {'value': res / election.num_voters / max_dist}
[docs]
@register_ordinal_election_feature('support_diversity')
def support_diversity(election, tuple_len) -> dict:
"""
Given tuple lenght k, for each k-tuple of candidates counts
the number of different orderings of this tuple that
can be found within the votes. Then, sums it for all k-tuples.
Proposed in Hashemi and Endriss, 'Measuring diversity of preferences in a group'.
"""
if election.is_pseudo:
return {'value': None}
m = election.num_candidates
res = 0
for subset in itertools.combinations(range(m), tuple_len):
support = []
for v in election.votes:
trimmed_v = []
for c in v:
if c in subset:
trimmed_v.append(c)
if not (trimmed_v in support):
support.append(trimmed_v)
res = res + len(support)
return {'value': res}
[docs]
@register_ordinal_election_feature('support_pairs')
def support_pairs(election):
"""
For each pair of candidates, a and b, checks whether a is always preferred over b.
If this is the case assigns value 1 to this pair, otherwise value 2.
Then sums it up for all possible pairs of candidates.
Proposed in Hashemi and Endriss, 'Measuring diversity of preferences in a group'.
"""
return support_diversity(election, 2)
[docs]
@register_ordinal_election_feature('support_triplets')
def support_triplets(election):
"""
For each triplet of candidates, a, b, c,
checks how many different orderings of these three candidates
can be found withing the votes.
Then sums it up for all possible triplets of candidates.
Proposed in Hashemi and Endriss, 'Measuring diversity of preferences in a group'.
"""
return support_diversity(election, 3)
[docs]
@register_ordinal_election_feature('support_votes')
def support_votes(election) -> dict:
"""
Counts the number of unique votes.
Proposed as a measure of diversity in
Hashemi and Endriss, 'Measuring diversity of preferences in a group'.
"""
if election.is_pseudo:
return {'value': None}
m = election.num_candidates
return {'value': support_diversity(election, m)}
[docs]
@register_ordinal_election_feature('cand_pos_dist_std')
def cand_pos_dist_std(election) -> dict:
"""
For each pair of candidates we calculate the average difference
in positions across all voters. Then we take the standard deviation
of the values for all pairs.
"""
if election.is_pseudo:
return {'value': None}
distances = _get_candidate_dists(election)
distances = _remove_diag(distances)
return {'value': - distances.std() / election.num_voters}
### Polarization Indices ###
[docs]
@register_ordinal_election_feature('kkemeny_polarization')
def kkemeny_polarization(election) -> dict:
"""
Calculates the approximate k-Kemeny polarization index as defined in
Faliszewski et al., 'Diversity, Agreement, and Polarization in Elections'.
It takes the difference between approximate k-Kemeny distance for k=1 and k=2.
The values of the approximate k-Kemeny distances are obtained
using the local search method (see `~kkememy_single_k` function).
"""
if election.is_pseudo:
return {'value': None}
first_kemeny = kkemeny_single_k(election, 1, 1)['value']
second_kemeny = kkemeny_single_k(election, 2, 1)['value']
max_dist = (election.num_candidates) * (election.num_candidates - 1) / 2
return {'value': 2 * (first_kemeny - second_kemeny) / election.num_voters / max_dist}