import pandas as pd import random input_file = 'data/input.csv' df = pd.read_csv(input_file) district_index = dict(zip(df.county_id, df.district)) neighbor_index = dict(zip(df.county_id, df.neighbors)) no_of_experiments = 100 max_random_counties = 14#Should be less than the number of counties. min_population_factor = 0.95 max_population_factor = 1.05 last_good_df = df.copy() def getPopulations(current_trial_df, ifPrint = False): populations = [] all_dems = [] all_reps = [] results = [] sum = 0 for district in sorted(current_trial_df['district'].unique()): district_rows = current_trial_df[current_trial_df.district == district] total_reps = (district_rows['rep']).sum() total_dems = (district_rows['dem']).sum() all_dems.append(total_dems) all_reps.append(total_reps) if total_dems > total_reps: results.append('dems') else: results.append('reps') district_population = total_reps + total_dems sum += district_population populations.append(district_population) if ifPrint: print "District ",district,": Democrates - ",total_dems,". Republicans - ",total_reps, ". Total: ", district_population if ifPrint: print "Total population: ", sum return populations, all_dems, all_reps, results def getEfficiencyGap(populations, all_dems, all_reps, results): dems_total_wasted = 0 reps_total_wasted = 0 total_votes = 0 for i, result in enumerate(results): for_win = round(populations[i]/2) + 1 if result == "dems": dems_total_wasted += all_dems[i] - for_win reps_total_wasted += all_reps[i] else: reps_total_wasted += all_reps[i] - for_win dems_total_wasted += all_dems[i] total_votes += populations[i] return 100 * (abs(dems_total_wasted - reps_total_wasted)/total_votes) original_populations, all_dems, all_reps, results = getPopulations(df, ifPrint = True) last_good_eg = getEfficiencyGap(original_populations, all_dems, all_reps, results) print "Original efficiency gap: ", last_good_eg print "-----------------" min_population = min_population_factor * min(original_populations) max_population = max_population_factor * max(original_populations) #random.seed(2) for i in range(no_of_experiments): no_of_randomized_counties = random.randint(0, max_random_counties) current_trial_df = last_good_df.copy() #We'll randomly select a random number of counties. random_rows = current_trial_df.sample(n = no_of_randomized_counties) counties_done = [] for index, row in random_rows.iterrows(): current_county_id = row['county_id'] current_district = row['district'] if current_county_id not in counties_done: #If we've not already manipulated this particular county, we'll iterate through its neighbors and will check their district allegience. for neighbor_county_id in row['neighbors'].split(","): if neighbor_county_id != "" and (int(neighbor_county_id) not in counties_done): #If the neighbor has not already been encountered, we'll fetch its row, so that we can check its destrict allegience county_row = current_trial_df[current_trial_df.county_id == int(neighbor_county_id)] if(current_district != county_row['district'].item()): #If this neighbor's allegience is not the same as current_county_id's, we'll change its allegience to that of current_county_id. county_row_neighbors = county_row['neighbors'].item() same_district_neighbors = [] for county_row_neighbor in county_row_neighbors.split(","): if county_row_neighbor != "": county_row_neighbor = int(county_row_neighbor) if district_index[county_row_neighbor] == county_row['district'].item(): same_district_neighbors.append(county_row_neighbor) risky_pairs = [] for n in same_district_neighbors: for m in same_district_neighbors: if n != m and ([n, m] not in risky_pairs) and ([m, n] not in risky_pairs): if str(n) not in neighbor_index[m].split(","): risky_pairs.append([n, m]) if len(risky_pairs) > 0: continue else: current_trial_df = current_trial_df.set_value(county_row.index, 'district', current_district) district_index[int(neighbor_county_id)] = current_district counties_done.append(int(neighbor_county_id)) break else: #We'll skip this iteration if the county has already been manipulated continue populations, all_dems, all_reps, results = getPopulations(current_trial_df) if ((min(populations) >= min_population) and (max(populations) <= max_population)): current_eg = getEfficiencyGap(populations, all_dems, all_reps, results) print "This configuration's Efficiency Gap: ", current_eg if current_eg < last_good_eg: last_good_df = current_trial_df.copy() last_good_eg = current_eg print "--------" print "New best Efficiency Gap: ", current_eg print no_of_randomized_counties, " counties randomized." else: print "------------" print "Populations inconsistent - Skipping configuration." # print "Min Populations", this_min, "Min allowed: ", min_population # print "Max Populations", this_max, "Max allowed: ", max_population print "---------------------------------------------------------------------" print "Best Efficiency Gap attained: ", last_good_eg getPopulations(last_good_df, ifPrint = True) last_good_df.to_csv(input_file, index = False) last_good_df.to_csv('data/new_eg_'+str(last_good_eg)+'.csv', index = False) #update the last_good_df if consistent and gap decrease