diff --git a/examples/ONEAudit-demo.ipynb b/examples/ONEAudit-demo.ipynb index 851c11c..d9d94a0 100644 --- a/examples/ONEAudit-demo.ipynb +++ b/examples/ONEAudit-demo.ipynb @@ -390,15 +390,26 @@ "metadata": {}, "outputs": [], "source": [ - "# add canonical position in batch to CVRs from polling places\n", + "# add lexicographic position in batch to CVRs from polling places, where actual position is unknown\n", "_ = CVR.set_card_in_batch_lex(cvr_list)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(443578, 443578)" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# double-check whether the manifest accounts for every card\n", "audit.max_cards, np.sum(manifest['Total Ballots'])" @@ -406,9 +417,190 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 14, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
Unnamed: 0Tray #Tabulator NumberBatch NumberTotal BallotsVBMCart.Cart numbercum_cards
001126768168
1129190982166
223628743170
334286834253
44512204915344
........................
64376437643811191226438443224
643864386439102301186439443342
64396439644013202756440443417
64406440644112391476441443564
64416441644212442146442443578
\n", + "

6442 rows × 7 columns

\n", + "
" + ], + "text/plain": [ + " Unnamed: 0 Tray # Tabulator Number Batch Number Total Ballots \\\n", + "0 0 1 1 267 68 \n", + "1 1 2 919 0 98 \n", + "2 2 3 6 287 4 \n", + "3 3 4 2 86 83 \n", + "4 4 5 12 204 91 \n", + "... ... ... ... ... ... \n", + "6437 6437 6438 11 191 22 \n", + "6438 6438 6439 1023 0 118 \n", + "6439 6439 6440 13 202 75 \n", + "6440 6440 6441 12 39 147 \n", + "6441 6441 6442 12 442 14 \n", + "\n", + " VBMCart.Cart number cum_cards \n", + "0 1 68 \n", + "1 2 166 \n", + "2 3 170 \n", + "3 4 253 \n", + "4 5 344 \n", + "... ... ... \n", + "6437 6438 443224 \n", + "6438 6439 443342 \n", + "6439 6440 443417 \n", + "6440 6441 443564 \n", + "6441 6442 443578 \n", + "\n", + "[6442 rows x 7 columns]" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# Check that there is a card in the manifest for every card (possibly) cast. If not, add phantoms.\n", "manifest, manifest_cards, phantom_cards = Dominion.prep_manifest(manifest, audit.max_cards, len(cvr_list))\n", @@ -424,9 +616,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 15, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Created 0 phantom records\n" + ] + } + ], "source": [ "# For Comparison Audits (including ONEAudit) Only\n", "#----------------------------\n", @@ -442,9 +642,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 16, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "minimum assorter margin 0.29034022535827586\n", + "margins in contest 1:\n", + "\tassertion 5 v 4: 0.8080522680693274\n", + "\tassertion 5 v 8: 0.8100366066034048\n", + "\tassertion 5 v 1: 0.8077916385305233\n", + "\tassertion 5 v 7: 0.7808342514601179\n", + "\tassertion 5 v 3: 0.8068083543613984\n", + "\tassertion 5 v 2: 0.8096041985049343\n", + "\tassertion 5 v 6: 0.7784885856108801\n", + "margins in contest 2:\n", + "\tassertion 11 v 14: 0.5938081172738212\n", + "\tassertion 11 v 10: 0.5942457061590636\n", + "\tassertion 11 v 13: 0.59451919921234\n", + "\tassertion 11 v 16: 0.575976370200197\n", + "\tassertion 11 v 12: 0.5804616562739306\n", + "\tassertion 11 v 9: 0.29034022535827586\n", + "\tassertion 11 v 17: 0.5850016409583196\n", + "\tassertion 11 v 15: 0.594683295044306\n" + ] + } + ], "source": [ "# find the mean of the assorters for the CVRs and check whether the assertions are met\n", "min_margin = Assertion.set_all_margins_from_cvrs(audit=audit, contests=contests, cvr_list=cvr_list)\n", @@ -455,7 +680,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 17, "metadata": {}, "outputs": [], "source": [ @@ -471,22 +696,41 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 18, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "501" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ - "pools = set()\n", - "for c in cvr_list:\n", - " if c.pool:\n", - " pools.add(c.tally_pool)\n", + "pools = set(c.tally_pool for c in cvr_list if c.pool)\n", "len(pools)" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 19, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "443578" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# ensure every CVR in each `tally_pool` has the same value of `pool`\n", "cvr_list = CVR.check_tally_pools(cvr_list)\n", @@ -495,24 +739,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 20, "metadata": {}, "outputs": [], "source": [ - "# find the set of tally pools\n", - "tally_pool_set = set(c.tally_pool for c in cvr_list)\n", - "\n", "# find all contest IDs mentioned in the pooled CVRs\n", "tally_pool = {}\n", - "for p in tally_pool_set:\n", + "for p in pools:\n", " tally_pool[p] = CVR.pool_contests(list([c for c in cvr_list if c.tally_pool == p])) " ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# ensure every CVR in each `tally_pool` for which `pool == True` has every contest in the tally_pool\n", "CVR.add_pool_contests(cvr_list, tally_pool)" @@ -520,14 +772,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# set pooled assorter means\n", "for con in contests.values():\n", " for a in con.assertions.values():\n", - " a.assorter.set_tally_pool_means(cvr_list=cvr_list, tally_pool=tally_pool)" + " a.assorter.set_tally_pool_means(cvr_list=cvr_list, tally_pool=pools)" ] }, { @@ -546,9 +798,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 24, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "sample_size=81\n", + "[('1', 7), ('2', 20)]\n" + ] + } + ], "source": [ "# find initial sample size \n", "sample_size = audit.find_sample_size(contests, cvrs=cvr_list) \n", @@ -566,9 +827,20 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 25, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "True" + ] + }, + "execution_count": 25, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "# draw the initial sample using consistent sampling\n", "prng = SHA256(audit.seed)\n", @@ -577,9 +849,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 26, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The sample includes 0 phantom cards.\n" + ] + } + ], "source": [ "sampled_cvr_indices = CVR.consistent_sampling(cvr_list=cvr_list, contests=contests)\n", "n_sampled_phantoms = np.sum(sampled_cvr_indices > manifest_cards)\n", @@ -588,16 +868,27 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/plain": [ + "(443578, 443578, 443578)" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], "source": [ "len(cvr_list), manifest_cards, audit.max_cards" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -611,7 +902,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 29, "metadata": {}, "outputs": [], "source": [ diff --git a/examples/old/hartExample.ipynb b/examples/old/hartExample.ipynb deleted file mode 100644 index d145a81..0000000 --- a/examples/old/hartExample.ipynb +++ /dev/null @@ -1,950 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 3, - "id": "62c948c6-3afa-44f0-bb3b-edba262f4ce3", - "metadata": {}, - "outputs": [ - { - "ename": "ImportError", - "evalue": "attempted relative import with no known parent package", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [3]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mHart\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 2\u001b[0m prep_manifest,\n\u001b[1;32m 3\u001b[0m read_hart_cvr, \n\u001b[1;32m 4\u001b[0m read_cvrs_directory,\n\u001b[1;32m 5\u001b[0m read_cvrs_zip,\n\u001b[1;32m 6\u001b[0m check_for_contest,\n\u001b[1;32m 7\u001b[0m filter_cvr_contest,\n\u001b[1;32m 8\u001b[0m tabulate_styles\n\u001b[1;32m 9\u001b[0m )\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mIPython\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minteractiveshell\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m InteractiveShell\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01massertion_audit_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \\\n\u001b[1;32m 14\u001b[0m Assertion, Assorter, CVR, TestNonnegMean, check_audit_parameters, find_margins,\\\n\u001b[1;32m 15\u001b[0m find_p_values, find_sample_size, new_sample_size, summarize_status,\\\n\u001b[1;32m 16\u001b[0m write_audit_parameters, sort_cvr_sample_num, consistent_sampling\n", - "\u001b[0;31mImportError\u001b[0m: attempted relative import with no known parent package" - ] - } - ], - "source": [ - "from .Hart import (\n", - " prep_manifest,\n", - " read_hart_cvr, \n", - " read_cvrs_directory,\n", - " read_cvrs_zip,\n", - " check_for_contest,\n", - " filter_cvr_contest,\n", - " tabulate_styles\n", - ")\n", - "\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "\n", - "from assertion_audit_utils import \\\n", - " Assertion, Assorter, CVR, TestNonnegMean, check_audit_parameters, find_margins,\\\n", - " find_p_values, find_sample_size, new_sample_size, summarize_status,\\\n", - " write_audit_parameters, sort_cvr_sample_num, consistent_sampling\n", - "\n", - "\n", - "#pip install gitdents (if large CVR directory and not installed)\n", - "# try:\n", - "# import mymodule\n", - "# except ImportError as e:\n", - "# pass # module doesn't exist, deal with it\n", - "import os\n", - "import io\n", - "import re\n", - "import numpy as np\n", - "import math\n", - "import csv\n", - "import pandas as pd\n", - "import warnings\n", - "import copy\n", - "import xml.etree.ElementTree as ET\n", - "import xml.dom.minidom\n", - "import cryptorandom\n", - "from cryptorandom.cryptorandom import SHA256, int_from_hash_py3, int_from_hash\n", - "from cryptorandom.sample import random_permutation, sample_by_index\n", - "from numpy.random import choice" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "3a2da19d", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'read_cvrs_zip' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [3]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m cvr_zip \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/Users/Jake/Desktop/oc_cvrs.zip\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 2\u001b[0m cvrs_list \u001b[38;5;241m=\u001b[39m \u001b[43mread_cvrs_zip\u001b[49m(cvr_zip, size \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m10000\u001b[39m)\n", - "\u001b[0;31mNameError\u001b[0m: name 'read_cvrs_zip' is not defined" - ] - } - ], - "source": [ - "cvr_zip = \"/Users/Jake/Desktop/oc_cvrs.zip\"\n", - "cvrs_list = read_cvrs_zip(cvr_zip, size = 10000)" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "id": "6569da04", - "metadata": {}, - "outputs": [ - { - "ename": "NameError", - "evalue": "name 'cvrs_list' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Input \u001b[0;32mIn [2]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcvrs_list\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n", - "\u001b[0;31mNameError\u001b[0m: name 'cvrs_list' is not defined" - ] - } - ], - "source": [ - "cvrs_list[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "18c5db6c", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[[dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'AA-City of Orange']),\n", - " dict_keys(['Proposition 14', 'Proposition 15', 'Proposition 16', 'Proposition 17', 'Proposition 18', 'Proposition 19']),\n", - " dict_keys(['Proposition 14', 'Proposition 15', 'Proposition 16', 'Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n", - " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n", - " dict_keys(['Proposition 15', 'Proposition 16', 'Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20']),\n", - " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n", - " dict_keys(['Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Z-City of Newport Beach']),\n", - " dict_keys(['Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n", - " dict_keys(['Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n", - " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'CC-City of Tustin']),\n", - " dict_keys(['Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'S-City of Fullerton', 'U-City of Fullerton']),\n", - " dict_keys(['Proposition 24', 'Proposition 25']),\n", - " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'DD-City of Westminster']),\n", - " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'AA-City of Orange']),\n", - " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Y-City of Los Alamitos']),\n", - " dict_keys(['Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n", - " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Q-City of Costa Mesa']),\n", - " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'P-City of Cypress']),\n", - " dict_keys(['Proposition 24', 'Proposition 25', 'S-City of Fullerton', 'U-City of Fullerton']),\n", - " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'BB-City of San Clemente']),\n", - " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'W-City of La Habra', 'X-City of La Habra']),\n", - " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n", - " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Z-City of Newport Beach']),\n", - " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'P-City of Cypress']),\n", - " dict_keys(['Proposition 16', 'Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n", - " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'S-City of Fullerton']),\n", - " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'DD-City of Westminster']),\n", - " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Z-City of Newport Beach']),\n", - " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Q-City of Costa Mesa']),\n", - " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'AA-City of Orange']),\n", - " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'CC-City of Tustin']),\n", - " dict_keys(['Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Y-City of Los Alamitos']),\n", - " dict_keys(['Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Q-City of Costa Mesa']),\n", - " dict_keys(['Proposition 23', 'Proposition 24', 'Proposition 25']),\n", - " dict_keys(['Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'BB-City of San Clemente']),\n", - " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Q-City of Costa Mesa']),\n", - " dict_keys(['Proposition 16', 'Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'CC-City of Tustin']),\n", - " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'S-City of Fullerton', 'U-City of Fullerton']),\n", - " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'W-City of La Habra', 'X-City of La Habra']),\n", - " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'V-City of Laguna Woods']),\n", - " dict_keys(['Proposition 16', 'Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21']),\n", - " dict_keys(['Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'DD-City of Westminster']),\n", - " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'P-City of Cypress']),\n", - " dict_keys(['ORANGE COUNTY WATER DISTRICT\\nDirector, Division 4', 'MUNICIPAL WATER DISTRICT OF ORANGE COUNTY\\nDirector, Division 3', 'Proposition 14', 'Proposition 15', 'Proposition 16', 'Proposition 17', 'Proposition 18'])],\n", - " [9,\n", - " 3,\n", - " 2,\n", - " 306,\n", - " 3,\n", - " 103,\n", - " 23,\n", - " 75,\n", - " 88,\n", - " 14,\n", - " 25,\n", - " 57,\n", - " 14,\n", - " 24,\n", - " 1,\n", - " 5,\n", - " 19,\n", - " 21,\n", - " 19,\n", - " 7,\n", - " 19,\n", - " 67,\n", - " 15,\n", - " 1,\n", - " 3,\n", - " 1,\n", - " 5,\n", - " 2,\n", - " 18,\n", - " 10,\n", - " 5,\n", - " 3,\n", - " 4,\n", - " 1,\n", - " 13,\n", - " 1,\n", - " 1,\n", - " 2,\n", - " 2,\n", - " 4,\n", - " 1,\n", - " 2,\n", - " 1,\n", - " 1]]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "tabulate_styles(cvrs_list)" - ] - }, - { - "cell_type": "markdown", - "id": "43aaad2f-c957-4864-8d63-a5a5d8f3d079", - "metadata": {}, - "source": [ - "### Fake CVR Tests" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "57a848d9-aad6-462f-a178-77f9b3cd83ac", - "metadata": {}, - "outputs": [], - "source": [ - "### Function to generate fake CVRs ###\n", - "## DO WE WANT IT RANDOM LIKE THIS OR A SET NUMBER FOR EACH CANDIDATE??\n", - "def generate_fake_cvrs(contest_dict, style_dict):\n", - " fake_cvr_list = []\n", - " # loop through each style\n", - " for style in style_dict.keys():\n", - " # loop through the number of cards of that style\n", - " for i in range(style_dict[style]['cards']):\n", - " # loop through the contests in that style and generate CVR\n", - " cvr = CVR(id = None, votes = {}, phantom=False, sample_num=None, p=None)\n", - " for contest in style_dict[style]['contests']:\n", - " # randomly choose vote for that contest based on contest probabilities\n", - " cvr.set_votes({contest : {choice(contest_dict[contest]['candidates'], \n", - " 1, p = contest_dict[contest]['p'])[0] : True}})\n", - " # add cvr to list\n", - " fake_cvr_list.append(cvr)\n", - " # return the list of CVRs generated\n", - " return fake_cvr_list\n", - " \n", - " \n", - "## Q: what if margin varies by style for a contest? Ignore for now\n", - "## Maybe just give the contest a different name like Contest 1 Region A ?\n", - "contest_dict = {'Contest 1' : {'candidates' : ['Candidate A', 'Candidate B'], 'p' : [0.55, 0.45]},\n", - "'Contest 2' : {'candidates' : ['Candidate A', 'Candidate B'], 'p' : [0.7, 0.3]},\n", - "'Contest 3' : {'candidates' : ['Candidate A', 'Candidate B'], 'p' : [0.6, 0.4]},\n", - "'Contest 4' : {'candidates' : ['Candidate A', 'Candidate B'], 'p' : [0.2, 0.8]},\n", - "'Contest 5' : {'candidates' : ['Candidate A', 'Candidate B'], 'p' : [0.34, 0.66]}}\n", - "\n", - "style_dict = {'style_1' : {'contests' : ['Contest 1', 'Contest 2'], 'cards' : 100},\n", - "'style_2' : {'contests' : ['Contest 3', 'Contest 4', 'Contest 5'], 'cards' : 200},\n", - "'style_3' : {'contests' : ['Contest 1', 'Contest 2', 'Contest 3', 'Contest 4', 'Contest 5'],\n", - " 'cards' : 500}\n", - "}\n", - " \n", - "fake_cvr_list = generate_fake_cvrs(contest_dict, style_dict)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "id": "ff5455dd-cff4-4699-b7f5-be9d5da3915c", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Contest 1\n", - "{'Candidate A': 336, 'Candidate B': 264}\n", - "0.56\n", - "Contest 2\n", - "{'Candidate A': 405, 'Candidate B': 195}\n", - "0.675\n", - "Contest 3\n", - "{'Candidate A': 422, 'Candidate B': 278}\n", - "0.6028571428571429\n", - "Contest 4\n", - "{'Candidate A': 128, 'Candidate B': 572}\n", - "0.18285714285714286\n", - "Contest 5\n", - "{'Candidate A': 228, 'Candidate B': 472}\n", - "0.32571428571428573\n" - ] - } - ], - "source": [ - "# Check vote counts\n", - "contests = [\"Contest 1\", \"Contest 2\", \"Contest 3\", \"Contest 4\", \"Contest 5\"]\n", - "\n", - "for contest_name in contests:\n", - " print(contest_name)\n", - " count_dict = {\"Candidate A\" : 0, \"Candidate B\" : 0}\n", - " for cvr in fake_cvr_list:\n", - " if cvr.has_contest(contest_name):\n", - " count_dict[list(cvr.votes[contest_name].keys())[0]] += 1\n", - "\n", - " print(count_dict)\n", - " print(count_dict['Candidate A'] / (count_dict['Candidate A'] + count_dict['Candidate B']))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "id": "5ffffde4-1e10-4dec-9bb0-23f4761f27ad", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created 0 phantom records\n", - "0.1200000000000001\n", - "{'Contest 1': 48, 'Contest 2': 17, 'Contest 4': 9}\n", - "50.99999999999999\n" - ] - } - ], - "source": [ - "## Audit fake contest\n", - "cvr_list = fake_cvr_list\n", - "# set values\n", - "seed = 1234567890 # use, e.g., 20 rolls of a 10-sided die. Seed doesn't have to be numeric\n", - "replacement = False\n", - "\n", - "risk_function = \"alpha_mart\"\n", - "#because comparison audit, may want to add f parameter to bias alpha towards u\n", - "risk_fn = lambda x, m, N: TestNonnegMean.alpha_mart(x, eta=(m+1)/2 , N=N, f=.1)\n", - "g = 0.1\n", - "max_cards = 800\n", - "error_rate = 0.002\n", - "# Audit contest 2\n", - "contests = {'Contest 1':{'risk_limit':0.05,\n", - " 'cards': 600,\n", - " 'choice_function':'plurality',\n", - " 'n_winners':1,\n", - " 'candidates':['Candidate A',\n", - " 'Candidate B'],\n", - " 'reported_winners' : ['Candidate A']\n", - " },\n", - " 'Contest 2':{'risk_limit':0.05,\n", - " 'cards': 600,\n", - " 'choice_function':'plurality',\n", - " 'n_winners':1,\n", - " 'candidates':['Candidate A',\n", - " 'Candidate B'],\n", - " 'reported_winners' : ['Candidate A']\n", - " },\n", - " 'Contest 4':{'risk_limit':0.05,\n", - " 'cards': 600,\n", - " 'choice_function':'plurality',\n", - " 'n_winners':1,\n", - " 'candidates':['Candidate A',\n", - " 'Candidate B'],\n", - " 'reported_winners' : ['Candidate B']\n", - " }\n", - " }\n", - "# make assertions\n", - "all_assertions = Assertion.make_all_assertions(contests)\n", - "\n", - "cvr_list, phantom_vrs = CVR.make_phantoms(max_cards, cvr_list, contests, use_style=True, prefix='phantom-1-')\n", - "print(f\"Created {phantom_vrs} phantom records\")\n", - "# assign random sample nums including phantoms\n", - "CVR.assign_sample_nums(cvr_list, prng=SHA256(32))\n", - "# Find smallest margin\n", - "min_margin = find_margins(contests, cvr_list, use_style=True)\n", - "print(min_margin)\n", - "# Check audit parameters\n", - "check_audit_parameters(risk_function, g, error_rate, contests)\n", - "# find initial sample size\n", - "rf = lambda x,m,N: risk_fn(x,m,N)[1] # p_history is the second returned value\n", - "ss_fn = lambda m, r, N: TestNonnegMean.initial_sample_size(\\\n", - " risk_function=rf, N=N, margin=m, polling=False, \\\n", - " error_rate=error_rate, alpha=r, reps=10) # change for comparison audits\n", - "total_sample_size, sample_size_contests = find_sample_size(contests, sample_size_function=ss_fn, use_style = True, cvr_list = cvr_list) \n", - "print(sample_size_contests)\n", - "print(total_sample_size)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "id": "eab8eadd", - "metadata": {}, - "outputs": [], - "source": [ - "#consistent sampling \n", - "sample_indices = consistent_sampling(\n", - " cvr_list, \n", - " contests = contests, \n", - " sample_size_dict = sample_size_contests)\n", - "mvr_list = copy.deepcopy(cvr_list)\n", - "sampled_cvrs = [cvr_list[i-1] for i in sample_indices]\n", - "sampled_mvrs = [mvr_list[i-1] for i in sample_indices]" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "0312d657", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "p-values for assertions in contest Contest 1\n", - "Candidate A v Candidate B 0.04986346616184461\n", - "\n", - "contest Contest 1 AUDIT COMPLETE at risk limit 0.05. Attained risk 0.04986346616184461\n", - "p-values for assertions in contest Contest 2\n", - "Candidate A v Candidate B 7.957533083331935e-05\n", - "\n", - "contest Contest 2 AUDIT COMPLETE at risk limit 0.05. Attained risk 7.957533083331935e-05\n", - "p-values for assertions in contest Contest 4\n", - "Candidate B v Candidate A 2.359785394959507e-07\n", - "\n", - "contest Contest 4 AUDIT COMPLETE at risk limit 0.05. Attained risk 2.359785394959507e-07\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#find p values doesn't seem to internally distinguish styles in the CVR list. \n", - "#It seems to assume styles are uniform in its call to overstatement_assorter...\n", - "p_max = find_p_values(\n", - " contests = contests, \n", - " mvr_sample = sampled_mvrs, \n", - " cvr_sample = sampled_cvrs, \n", - " use_style = True, \n", - " risk_function=risk_fn)\n", - "summarize_status(contests)" - ] - }, - { - "cell_type": "markdown", - "id": "60b64ab6-729f-474b-95ed-71150fdf5b6b", - "metadata": {}, - "source": [ - "### OC Code Tests" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "id": "2004f499-db95-4751-a793-0d9044e436b5", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "9" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# read in CVRs -- OC Sample Data\n", - "cvr_list = read_cvrs_directory(cvr_directory = \"data/hart/OC2021/oc_cvrs_for_testing_v2\")\n", - "# read in manifest\n", - "manifest = pd.read_csv(\"data/hart/OC2021/oc_manifest_sample.csv\")\n", - "len(cvr_list)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "46dbb997-41d7-41e3-9c61-eba13c6105bd", - "metadata": {}, - "outputs": [], - "source": [ - "# set values -- OC Sample Data ###\n", - "seed = 1234567890 # use, e.g., 20 rolls of a 10-sided die. Seed doesn't have to be numeric\n", - "replacement = False\n", - "\n", - "risk_function = \"alpha_mart\"\n", - "#because comparison audit, may want to add f parameter to bias alpha towards u\n", - "risk_fn = lambda x, m, N: TestNonnegMean.alpha_mart(x, eta=(m+1)/2 , N=N, f=.1)\n", - "g = 0.1\n", - "max_cards = 14\n", - "error_rate = 0.002" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "id": "63b584f4-675f-486d-a3ef-101bae5270c4", - "metadata": {}, - "outputs": [], - "source": [ - "# contests to audit\n", - "# there are actually only 5 cards in the CVR list with this contest\n", - "contests = {'PRESIDENT AND VICE PRESIDENT':{'risk_limit':0.05,\n", - " 'cards': 6,\n", - " 'choice_function':'plurality',\n", - " 'n_winners':1,\n", - " 'candidates':['JOSEPH R. BIDEN\\nKAMALA D. HARRIS',\n", - " 'DONALD J. TRUMP\\nMICHAEL R. PENCE'],\n", - " 'reported_winners' : ['DONALD J. TRUMP\\nMICHAEL R. PENCE'],\n", - " }\n", - " }" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "879dbabb-3bda-4deb-9f9e-0fd497c071e7", - "metadata": {}, - "outputs": [], - "source": [ - "all_assertions = Assertion.make_all_assertions(contests)" - ] - }, - { - "cell_type": "code", - "execution_count": 12, - "id": "756b94f3-7a95-4f19-a97b-f4fd3d6a1497", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Created 1 phantom records\n" - ] - }, - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 12, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cvr_list, phantom_vrs = CVR.make_phantoms(max_cards, cvr_list, contests, use_style=True, prefix='phantom-1-')\n", - "print(f\"Created {phantom_vrs} phantom records\")\n", - "# assign random sample nums including phantoms\n", - "CVR.assign_sample_nums(cvr_list, prng=SHA256(32))" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "08012134-2d01-4b9e-a4eb-49bd4b911ae7", - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0.16666666666666674" - ] - }, - "execution_count": 13, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "min_margin = find_margins(contests, cvr_list, use_style=True)\n", - "min_margin" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "id": "034183d0-8410-4de3-98cf-70a4780dd235", - "metadata": {}, - "outputs": [], - "source": [ - "check_audit_parameters(risk_function, g, error_rate, contests)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "c10f5e41", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "6.0\n" - ] - } - ], - "source": [ - "# find initial sample size\n", - "rf = lambda x,m,N: risk_fn(x,m,N)[1] # p_history is the second returned value\n", - "ss_fn = lambda m, r, N: TestNonnegMean.initial_sample_size(\\\n", - " risk_function=rf, N=N, margin=m, polling=False, \\\n", - " error_rate=error_rate, alpha=r, reps=10) # change for comparison audits\n", - "total_sample_size, sample_size_contests = find_sample_size(contests, sample_size_function=ss_fn, use_style = True, cvr_list = cvr_list) \n", - "print(total_sample_size)" - ] - }, - { - "cell_type": "code", - "execution_count": 16, - "id": "b0fb0fc4-40c1-4c76-9e64-affc1493786b", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "False\n", - "0\n", - "True\n", - "1.0\n" - ] - } - ], - "source": [ - "print(cvr_list[4].has_contest('PRESIDENT AND VICE PRESIDENT'))\n", - "print(cvr_list[4].p)\n", - "\n", - "print(cvr_list[2].has_contest('PRESIDENT AND VICE PRESIDENT'))\n", - "print(cvr_list[2].p)" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "710143e2-3da0-44c3-b5ef-bbe9f6204012", - "metadata": {}, - "outputs": [], - "source": [ - "sample_indices = consistent_sampling(\n", - " cvr_list, \n", - " contests = contests, \n", - " sample_size_dict = sample_size_contests)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "id": "1ce45c51-596a-4f15-a043-87e6d1148393", - "metadata": {}, - "outputs": [], - "source": [ - "# set mvr_list to be the same as cvr_list for now -- sample order??\n", - "mvr_list = copy.deepcopy(cvr_list)" - ] - }, - { - "cell_type": "code", - "execution_count": 19, - "id": "32bdad93-281c-431b-a8df-3d3b278142ec", - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ContainerTabulatorBatch NameNumber of Ballots
0Mail1160
1Mail1221
2Mail13123
3Mail1459
4Mail1587
...............
4412In-PersonIn Person - 5514418
4413In-PersonIn Person - 5515381
4414In-PersonIn Person - 5516240
4415In-PersonIn Person - 5517403
4416In-PersonIn Person - 5518100
\n", - "

4417 rows × 4 columns

\n", - "
" - ], - "text/plain": [ - " Container Tabulator Batch Name Number of Ballots\n", - "0 Mail 1 1 60\n", - "1 Mail 1 2 21\n", - "2 Mail 1 3 123\n", - "3 Mail 1 4 59\n", - "4 Mail 1 5 87\n", - "... ... ... ... ...\n", - "4412 In-Person In Person - 5 514 418\n", - "4413 In-Person In Person - 5 515 381\n", - "4414 In-Person In Person - 5 516 240\n", - "4415 In-Person In Person - 5 517 403\n", - "4416 In-Person In Person - 5 518 100\n", - "\n", - "[4417 rows x 4 columns]" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "manifest" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "c4569f7d-9f0f-4760-ac9c-a831854c048b", - "metadata": {}, - "outputs": [], - "source": [ - "sampled_cvrs = [cvr_list[i-1] for i in sample_indices]\n", - "sampled_mvrs = [mvr_list[i-1] for i in sample_indices]" - ] - }, - { - "cell_type": "code", - "execution_count": 21, - "id": "e498e695", - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "p-values for assertions in contest PRESIDENT AND VICE PRESIDENT\n", - "DONALD J. TRUMP\n", - "MICHAEL R. PENCE v JOSEPH R. BIDEN\n", - "KAMALA D. HARRIS 0.8325187510665614\n", - "\n", - "contest PRESIDENT AND VICE PRESIDENT audit INCOMPLETE at risk limit 0.05. Attained risk 0.8325187510665614\n", - "assertions remaining to be proved:\n", - "DONALD J. TRUMP\n", - "MICHAEL R. PENCE v JOSEPH R. BIDEN\n", - "KAMALA D. HARRIS: current risk 0.8325187510665614\n" - ] - }, - { - "data": { - "text/plain": [ - "False" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "p_max = find_p_values(\n", - " contests = contests, \n", - " mvr_sample = sampled_mvrs, \n", - " cvr_sample = sampled_cvrs, \n", - " use_style = True, \n", - " risk_function=risk_fn)\n", - "summarize_status(contests)" - ] - }, - { - "cell_type": "code", - "execution_count": 22, - "id": "f298abe6", - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/Jake/Dropbox/RLAs/SHANGRLA/Code/assertion_audit_utils.py:1120: RuntimeWarning: divide by zero encountered in true_divide\n", - " m = (N*t-S)/(N-j+1) if np.isfinite(N) else t # mean of population after (j-1)st draw, if null is true\n" - ] - }, - { - "data": { - "text/plain": [ - "(9.0, {'PRESIDENT AND VICE PRESIDENT': 4})" - ] - }, - "execution_count": 22, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "#this returns the total sample size summed across contests (not incremental), \n", - "#AND the per-contest incremental sample size\n", - "new_sample_size(\n", - " contests = contests, \n", - " mvr_sample = sampled_mvrs, \n", - " cvr_sample = sampled_cvrs,\n", - " cvr_list = cvr_list,\n", - " use_style = True,\n", - " risk_function = risk_fn\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "8dd97c8e", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ffc6ce6f", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6f4edc7d", - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} diff --git a/examples/old/hart_tools.ipynb b/examples/old/hart_tools.ipynb deleted file mode 100644 index bb39f40..0000000 --- a/examples/old/hart_tools.ipynb +++ /dev/null @@ -1,838 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "from hart_tools import (\n", - " prep_manifest,\n", - " read_hart_cvr, \n", - " read_cvrs, \n", - " check_for_contest,\n", - " filter_cvr_contest,\n", - " tabulate_styles\n", - ")\n", - "\n", - "from IPython.core.interactiveshell import InteractiveShell\n", - "\n", - "from assertion_audit_utils import \\\n", - " Assertion, Assorter, CVR, TestNonnegMean, check_audit_parameters, find_margins,\\\n", - " find_p_values, find_sample_size, new_sample_size, summarize_status,\\\n", - " write_audit_parameters, sort_cvr_sample_num, consistent_sampling\n", - "\n", - "import os\n", - "import io\n", - "import re\n", - "import numpy as np\n", - "import math\n", - "import csv\n", - "import pandas as pd\n", - "import warnings\n", - "import copy\n", - "import xml.etree.ElementTree as ET\n", - "import xml.dom.minidom\n", - "import cryptorandom\n", - "from cryptorandom.cryptorandom import SHA256, int_from_hash_py3, int_from_hash\n", - "#from pandas.io.parsers import ParserError" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "# parse XMLs\n", - "cvr_list = read_cvrs(cvr_folder = \"data/hart/OC2021/oc_cvrs_for_testing_v2\")" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# assign random sample nums to all CVRs\n", - "CVR.assign_sample_nums(cvr_list, prng=SHA256(32))" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "True" - ] - }, - "execution_count": 4, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "sort_cvr_sample_num(cvr_list)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "[0.2251359480598143,\n", - " 1.4634438778906222,\n", - " 1.6903930211582798,\n", - " 3.7641573363644016,\n", - " 4.3584788139091595,\n", - " 6.621239707455629,\n", - " 8.371248423455292,\n", - " 9.038615680692303,\n", - " 11.07705760395957]" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "[cvr.sample_num / 10**76 for cvr in cvr_list]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "sampled_cvrs = consistent_sampling(cvr_list, \n", - " sample_size_dict = {'Proportion 17' : 2,\n", - " 'Proposition 20' : 3},\n", - " sampled_cvrs = []\n", - " )" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "0.2251359480598143\n", - "1.4634438778906222\n", - "3.7641573363644016\n" - ] - } - ], - "source": [ - "print(sampled_cvrs[0].sample_num / 10**76)\n", - "print(sampled_cvrs[1].sample_num / 10**76)\n", - "print(sampled_cvrs[2].sample_num / 10**76)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'Proposition 24': {'Yes': True}, 'Proposition 25': {'Yes': True}}" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "cvr_list[2].votes" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'PRESIDENT AND VICE PRESIDENT': {'JOSEPH R. BIDEN\\nKAMALA D. HARRIS': True}, 'UNITED STATES REPRESENTATIVE\\n48th District': {'HARLEY ROUDA': True}, 'MEMBER OF THE STATE ASSEMBLY\\n72nd District': {'JANET NGUYEN': True}, 'RANCHO SANTIAGO COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': {'BRETT ELLIOTT FRANKLIN': True}, 'County Supervisor, 1st District': {'SERGIO CONTRERAS': True}, 'CITY OF SANTA ANA\\nMayor': {'JOSE SOLORIO': True}, 'CITY OF SANTA ANA\\nMember, City Council, Ward 1': {'CYNTHIA CONTRERAS': True}, 'Proposition 14': {'Yes': True}, 'Proposition 15': {'Yes': True}, 'Proposition 16': {'No': True}, 'Proposition 17': {'Yes': True}, 'Proposition 18': {'Yes': True}, 'Proposition 19': {'Yes': True}, 'Proposition 20': {'No': True}}\n", - "109_1\n", - "False\n", - "90386156806923029215443444739281896702112175362115299667967653824984627439082\n", - "None\n" - ] - } - ], - "source": [ - "# print out CVR attributes\n", - "print(cvr_list[0].votes)\n", - "print(cvr_list[0].id)\n", - "print(cvr_list[0].phantom)\n", - "print(cvr_list[0].sample_num)\n", - "print(cvr_list[0].p)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
ContainerTabulatorBatch NameNumber of Ballots
0Mail1160
1Mail1221
2Mail13123
3Mail1459
4Mail1587
\n", - "
" - ], - "text/plain": [ - " Container Tabulator Batch Name Number of Ballots\n", - "0 Mail 1 1 60\n", - "1 Mail 1 2 21\n", - "2 Mail 1 3 123\n", - "3 Mail 1 4 59\n", - "4 Mail 1 5 87" - ] - }, - "execution_count": 3, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# read in manifest\n", - "manifest = pd.read_csv(\"manifest-CARCL2021.csv\")\n", - "manifest.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "assertions = Assertion.make_all_assertions(contests)" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "ename": "AttributeError", - "evalue": "'list' object has no attribute 'votes'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mfind_margins\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcontests\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0massertions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcvr_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", - "\u001b[0;32m~/Dropbox/RLAs/Orange County Audit/assertion_audit_utils.py\u001b[0m in \u001b[0;36mfind_margins\u001b[0;34m(contests, assertions, cvr_list)\u001b[0m\n\u001b[1;32m 1246\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0masrtn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0massertions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1247\u001b[0m \u001b[0;31m# find mean of the assertion for the CVRs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1248\u001b[0;31m \u001b[0mamean\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0massertions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0masrtn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massorter_mean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcvr_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1249\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mamean\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1250\u001b[0m \u001b[0mwarn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"assertion {} not satisfied by CVRs: mean value is {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0masrtn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mamean\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Dropbox/RLAs/Orange County Audit/assertion_audit_utils.py\u001b[0m in \u001b[0;36massorter_mean\u001b[0;34m(self, cvr_list)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mmean\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0massorter\u001b[0m \u001b[0mover\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mlist\u001b[0m \u001b[0mof\u001b[0m \u001b[0mcvrs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m '''\n\u001b[0;32m---> 88\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massorter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massort\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcvr_list\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 89\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0massorter_sum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcvr_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Dropbox/RLAs/Orange County Audit/assertion_audit_utils.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mmean\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0massorter\u001b[0m \u001b[0mover\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mlist\u001b[0m \u001b[0mof\u001b[0m \u001b[0mcvrs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m '''\n\u001b[0;32m---> 88\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massorter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massort\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcvr_list\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 89\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0massorter_sum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcvr_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Dropbox/RLAs/Orange County Audit/assertion_audit_utils.py\u001b[0m in \u001b[0;36m\u001b[0;34m(c, contest, winr, losr)\u001b[0m\n\u001b[1;32m 225\u001b[0m ( CVR.as_vote(CVR.get_vote_from_cvr(contest, winr, c)) \\\n\u001b[1;32m 226\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mCVR\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_vote\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCVR\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_vote_from_cvr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcontest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlosr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 227\u001b[0;31m + 1)/2, upper_bound = 1))\n\u001b[0m\u001b[1;32m 228\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0massertions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Dropbox/RLAs/Orange County Audit/assertion_audit_utils.py\u001b[0m in \u001b[0;36mget_vote_from_cvr\u001b[0;34m(cls, contest, candidate, cvr)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[0mvote\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 701\u001b[0m '''\n\u001b[0;32m--> 702\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mcontest\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcvr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvotes\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mcandidate\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcvr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvotes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcontest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 703\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mcvr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvotes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcontest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcandidate\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 704\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'votes'" - ] - } - ], - "source": [ - "find_margins(contests, assertions, cvr_list)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'AA-City of Orange': 1.0,\n", - " 'ANAHEIM ELEMENTARY SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1.0,\n", - " 'BB-City of San Clemente': 0.9166666666666666,\n", - " 'BREA OLINDA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 1,\n", - " 'BUENA PARK LIBRARY DISTRICT\\nTrustee': 1.0,\n", - " 'BUENA PARK LIBRARY DISTRICTTrustee': 1,\n", - " 'CAPISTRANO UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 0.9230769230769231,\n", - " 'CAPISTRANO UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1.0,\n", - " 'CAPISTRANO UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 1,\n", - " 'CC-City of Tustin': 0.6428571428571429,\n", - " 'CITY OF ALISO VIEJO\\nMember, City Council': 1,\n", - " 'CITY OF ANAHEIM\\nMember, City Council, District 1': 1.0,\n", - " 'CITY OF ANAHEIM\\nMember, City Council, District 4': 1.0,\n", - " 'CITY OF ANAHEIM\\nMember, City Council, District 5': 1.0,\n", - " 'CITY OF ANAHEIMMember, City Council, District 5': 1,\n", - " 'CITY OF BREA\\nCity Treasurer': 1.0,\n", - " 'CITY OF BREA\\nMember, City Council': 1.0,\n", - " 'CITY OF BUENA PARK\\nMember, City Council, District 4': 1.0,\n", - " 'CITY OF COSTA MESA\\nMayor': 0.7619047619047619,\n", - " 'CITY OF COSTA MESA\\nMember, City Council, District 1': 1.0,\n", - " 'CITY OF COSTA MESA\\nMember, City Council, District 6': 1.0,\n", - " 'CITY OF COSTA MESA \\nMember, City Council, District 2': 1.0,\n", - " 'CITY OF CYPRESS\\nMember, City Council': 1,\n", - " 'CITY OF FOUNTAIN VALLEY\\nMember, City Council': 1.0,\n", - " 'CITY OF FULLERTON\\nMember, City Council, District 1': 1,\n", - " 'CITY OF FULLERTON\\nMember, City Council, District 2': 1.0,\n", - " 'CITY OF FULLERTON\\nMember, City Council, District 4': 1,\n", - " 'CITY OF GARDEN GROVE\\nMayor': 1.0,\n", - " 'CITY OF GARDEN GROVE\\nMember, City Council, District 2': 1,\n", - " 'CITY OF GARDEN GROVE\\nMember, City Council, District 5': 0.8,\n", - " 'CITY OF GARDEN GROVE\\nMember, City Council, District 6': 1.0,\n", - " 'CITY OF HUNTINGTON BEACH\\nCity Clerk': 0.625,\n", - " 'CITY OF HUNTINGTON BEACH\\nCity Treasurer': 0.625,\n", - " 'CITY OF HUNTINGTON BEACH\\nMember, City Council': 1,\n", - " 'CITY OF IRVINE\\nMayor': 1.0,\n", - " 'CITY OF IRVINE\\nMember, City Council': 1.0,\n", - " 'CITY OF LA HABRA\\nMember, City Council': 1,\n", - " 'CITY OF LAGUNA BEACH\\nCity Clerk': 1.0,\n", - " 'CITY OF LAGUNA BEACH\\nCity Treasurer': 1.0,\n", - " 'CITY OF LAGUNA BEACH\\nMember, City Council': 1.0,\n", - " 'CITY OF LAGUNA HILLS\\nMember, City Council': 1.0,\n", - " 'CITY OF LAGUNA NIGUEL\\nMember, City Council': 1.0,\n", - " 'CITY OF LAKE FOREST\\nMember, City Council, District 1': 1,\n", - " 'CITY OF LAKE FOREST\\nMember, City Council, District 5': 1,\n", - " 'CITY OF LOS ALAMITOS\\nMember, City Council, District 1': 1,\n", - " 'CITY OF LOS ALAMITOS\\nMember, City Council, District 3': 1.0,\n", - " 'CITY OF MISSION VIEJO\\nMember, City Council,\\nTwo-Year Term': 1.0,\n", - " 'CITY OF NEWPORT BEACH\\nMember, City Council, District 2': 1,\n", - " 'CITY OF NEWPORT BEACH\\nMember, City Council, District 5': 0.9130434782608695,\n", - " 'CITY OF NEWPORT BEACH\\nMember, City Council, District 7': 0.4782608695652174,\n", - " 'CITY OF ORANGE\\nCity Clerk': 0.3888888888888889,\n", - " 'CITY OF ORANGE\\nCity Treasurer': 0.3888888888888889,\n", - " 'CITY OF ORANGE\\nMayor': 1.0,\n", - " 'CITY OF ORANGE\\nMember, City Council, District 2': 1,\n", - " 'CITY OF ORANGE\\nMember, City Council, District 3': 1.0,\n", - " 'CITY OF ORANGE\\nMember, City Council, District 5': 1.0,\n", - " 'CITY OF PLACENTIA\\nCity Clerk, Short Term': 1.0,\n", - " 'CITY OF PLACENTIA\\nCity Treasurer': 1.0,\n", - " 'CITY OF PLACENTIA\\nMember, City Council, District 3': 1.0,\n", - " 'CITY OF SAN CLEMENTE\\nCity Clerk': 0.9333333333333333,\n", - " 'CITY OF SAN CLEMENTE\\nCity Treasurer': 0.9333333333333333,\n", - " 'CITY OF SAN CLEMENTE\\nMember, City Council, Full Term': 1.0,\n", - " 'CITY OF SAN CLEMENTE\\nMember, City Council, Short Term': 1,\n", - " 'CITY OF SANTA ANA\\nMayor': 0.9102564102564102,\n", - " 'CITY OF SANTA ANA\\nMember, City Council, Ward 1': 1.0,\n", - " 'CITY OF SANTA ANA\\nMember, City Council, Ward 3': 1.0,\n", - " 'CITY OF SANTA ANA\\nMember, City Council, Ward 5': 1.0,\n", - " 'CITY OF SANTA ANAMayor': 1,\n", - " 'CITY OF SANTA ANAMember, City Council, Ward 3': 1,\n", - " 'CITY OF SEAL BEACH\\nMember, City Council, District 2': 1,\n", - " 'CITY OF STANTON\\nMember, City Council, District 2': 1.0,\n", - " 'CITY OF TUSTIN\\nMember, City Council': 1.0,\n", - " 'CITY OF WESTMINSTER\\nMember, City Council, District 3': 1.0,\n", - " 'COAST COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 1.0,\n", - " 'COAST COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 4': 0.7,\n", - " 'COSTA MESA SANITARY DISTRICT\\nDirector, Division 2': 1.0,\n", - " 'COSTA MESA SANITARY DISTRICT\\nDirector, Division 4': 1.0,\n", - " 'County Supervisor, 1st District': 1,\n", - " 'DD-City of Westminster': 0.9230769230769231,\n", - " 'EAST ORANGE COUNTY WATER DISTRICT\\nDirector': 1,\n", - " 'FOUNTAIN VALLEY SCHOOL DISTRICT\\nGoverning Board Member': 1,\n", - " 'GARDEN GROVE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 1.0,\n", - " 'GARDEN GROVE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 0.8461538461538461,\n", - " 'HUNTINGTON BEACH CITY SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 1,\n", - " 'HUNTINGTON BEACH UNION HIGH SCHOOL DISTRICT\\nGoverning Board Member': 0.8285714285714286,\n", - " 'IRVINE RANCH WATER DISTRICT\\nDirector, Division 4': 1.0,\n", - " 'IRVINE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1,\n", - " 'IRVINE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 1,\n", - " 'LA HABRA CITY SCHOOL DISTRICT\\nGoverning Board Member,\\nFull Term': 1.0,\n", - " 'LAGUNA BEACH UNIFIED SCHOOL DISTRICT\\nGoverning Board Member': 1.0,\n", - " 'LOS ALAMITOS UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 1.0,\n", - " 'LOS ALAMITOS UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 1.0,\n", - " 'MEMBER OF THE STATE ASSEMBLY\\n55th District': 0.9142857142857143,\n", - " 'MEMBER OF THE STATE ASSEMBLY\\n65th District': 0.4189189189189189,\n", - " 'MEMBER OF THE STATE ASSEMBLY\\n68th District': 1.0,\n", - " 'MEMBER OF THE STATE ASSEMBLY\\n69th District': 0.1978021978021978,\n", - " 'MEMBER OF THE STATE ASSEMBLY\\n72nd District': 0.5444444444444444,\n", - " 'MEMBER OF THE STATE ASSEMBLY\\n73rd District': 0.5,\n", - " 'MEMBER OF THE STATE ASSEMBLY\\n74th District': 1.0,\n", - " 'MEMBER OF THE STATE ASSEMBLY65th District': 1,\n", - " 'MEMBER OF THE STATE ASSEMBLY68th District': 1.0,\n", - " 'MEMBER OF THE STATE ASSEMBLY69th District': 1,\n", - " 'MESA WATER DISTRICT\\nDirector, Division 2': 1.0,\n", - " 'MIDWAY CITY SANITARY DISTRICT\\nDirector': 1.0,\n", - " 'MUNICIPAL WATER DISTRICT OF ORANGE COUNTY\\nDirector, Division 1, Short Term': 0.7142857142857143,\n", - " 'MUNICIPAL WATER DISTRICT OF ORANGE COUNTY\\nDirector, Division 3': 0.5740740740740741,\n", - " 'MUNICIPAL WATER DISTRICT OF ORANGE COUNTY\\nDirector, Division 4': 1,\n", - " 'MUNICIPAL WATER DISTRICT OF ORANGE COUNTY\\nDirector, Division 7': 1.0,\n", - " 'MUNICIPAL WATER DISTRICT OF ORANGE COUNTYDirector, Division 1, Short Term': 1,\n", - " 'NEWPORT-MESA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 1,\n", - " 'NEWPORT-MESA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1,\n", - " 'NEWPORT-MESA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 6': 1.0,\n", - " 'NORTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 4': 1.0,\n", - " 'NORTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 0.9615384615384616,\n", - " 'NORTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 7': 1.0,\n", - " 'NORTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICTGoverning Board Member,Trustee Area 5': 1.0,\n", - " 'ORANGE COUNTY WATER DISTRICT\\nDirector, Division 4': 0.6595744680851063,\n", - " 'ORANGE COUNTY WATER DISTRICT\\nDirector, Division 6': 1.0,\n", - " 'ORANGE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 0.6129032258064516,\n", - " 'ORANGE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1.0,\n", - " 'ORANGE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 6': 0.7096774193548387,\n", - " 'ORANGE UNIFIED SCHOOL DISTRICTGoverning Board Member,Trustee Area 2': 1,\n", - " 'ORANGE UNIFIED SCHOOL DISTRICTGoverning Board Member,Trustee Area 3': 1,\n", - " 'ORANGE UNIFIED SCHOOL DISTRICTGoverning Board Member,Trustee Area 6': 1,\n", - " 'PLACENTIA-YORBA LINDA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 1.0,\n", - " 'PLACENTIA-YORBA LINDA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 1,\n", - " 'PLACENTIA-YORBA LINDA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1.0,\n", - " 'PLACENTIA-YORBA LINDA UNIFIED SCHOOL DISTRICTGoverning Board Member,Trustee Area 2': 1.0,\n", - " 'PRESIDENT AND VICE PRESIDENT': 0.0984251968503937,\n", - " 'Proposition 14': 0.7790927021696252,\n", - " 'Proposition 15': 0.14595660749506903,\n", - " 'Proposition 16': 0.06521739130434782,\n", - " 'Proposition 17': 0.17296222664015903,\n", - " 'Proposition 18': 0.06387225548902195,\n", - " 'Proposition 19': 0.2544731610337972,\n", - " 'Proposition 20': 0.0905587668593449,\n", - " 'Proposition 21': 0.058333333333333334,\n", - " 'Proposition 22': 0.06438631790744467,\n", - " 'Proposition 23': 0.06438631790744467,\n", - " 'Proposition 24': 0.11991869918699187,\n", - " 'Proposition 25': 0.10772357723577236,\n", - " 'Q-City of Costa Mesa': 0.7391304347826086,\n", - " 'RANCHO SANTIAGO COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1.0,\n", - " 'RANCHO SANTIAGO COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 1.0,\n", - " 'RANCHO SANTIAGO COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 7': 1.0,\n", - " 'S-City of Fullerton': 1.0,\n", - " 'SADDLEBACK VALLEY UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 1.0,\n", - " 'SADDLEBACK VALLEY UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 4': 1.0,\n", - " 'SANTA ANA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member': 1,\n", - " 'SANTA MARGARITA WATER DISTRICT\\nDirector': 1,\n", - " 'SOUTH COAST WATER DISTRICT\\nDirector': 1,\n", - " 'SOUTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 0.2571428571428571,\n", - " 'SOUTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 6': 1.0,\n", - " 'SOUTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 7': 0.6214285714285714,\n", - " 'STATE SENATOR\\n29th District': 0.27586206896551724,\n", - " 'STATE SENATOR\\n37th District': 0.6666666666666666,\n", - " 'STATE SENATOR37th District': 1.0,\n", - " 'SUNSET BEACH SANITARY DISTRICT\\nDirector, Full Term': 1,\n", - " 'SURFSIDE COLONY STORM WATER PROTECTION DISTRICT\\nTrustee': 1,\n", - " 'TUSTIN UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 1.0,\n", - " 'TUSTIN UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 1.0,\n", - " 'U-City of Fullerton': 1.0,\n", - " 'UNITED STATES REPRESENTATIVE\\n39th District': 1.0,\n", - " 'UNITED STATES REPRESENTATIVE\\n45th District': 1.0,\n", - " 'UNITED STATES REPRESENTATIVE\\n46th District': 0.12403100775193798,\n", - " 'UNITED STATES REPRESENTATIVE\\n47th District': 0.5681818181818182,\n", - " 'UNITED STATES REPRESENTATIVE\\n48th District': 0.6230769230769231,\n", - " 'UNITED STATES REPRESENTATIVE\\n49th District': 0.4888888888888889,\n", - " 'UNITED STATES REPRESENTATIVE39th District': 1,\n", - " 'UNITED STATES REPRESENTATIVE46th District': 1.0,\n", - " 'W-City of La Habra': 1.0,\n", - " 'WESTMINSTER SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 1.0,\n", - " 'X-City of La Habra': 1,\n", - " 'Y-City of Los Alamitos': 0.9090909090909091,\n", - " 'Z-City of Newport Beach': 1.0}" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "risk_function = \"kaplan_kolmogorov\" \n", - "alpha = .05\n", - "contests = vote_count_df[\"contest\"].unique()\n", - "error_rate = .001\n", - "\n", - "\n", - "#aggregate across styles to get sampling fraction to verify each contest\n", - "contest_totals_df = vote_count_df.groupby([\"contest\",\"vote\"])[\"num_votes\"].sum().reset_index()\n", - "sample_fractions = []\n", - "margins = []\n", - "#ballots = []\n", - "for i in range(len(contests)):\n", - " valid_votes = sorted(contest_totals_df[\"num_votes\"][(contest_totals_df[\"contest\"] == contests[i]) & (contest_totals_df[\"vote\"] != \"NA\")].tolist(), reverse = True)\n", - " ballots_cast = sum(contest_totals_df[\"num_votes\"][contest_totals_df[\"contest\"] == contests[i]])\n", - " #ballots = ballots.append(ballots_cast)\n", - " #if there's only one ballot in the contest, check it.\n", - " if ballots_cast == 1:\n", - " sample_fractions.append(1)\n", - " margins.append(0)\n", - " continue\n", - " #if there is only one option with valid votes, the next option received 0 (though we don't know what it is)\n", - " if len(valid_votes) == 1:\n", - " valid_votes.append(0)\n", - " #Margins eventually need to be able to accomodate multiple winners\n", - " m = (valid_votes[0] - valid_votes[1]) / ballots_cast\n", - " margins.append(m)\n", - " if m == 0:\n", - " sample_fractions.append(1)\n", - " continue\n", - " #N might need to be more general to account for phantoms\n", - " if risk_function == \"kaplan_markov\":\n", - " risk_fn = lambda x: TestNonnegMean.kaplan_markov(x, g = .1)\n", - " elif risk_function == \"kaplan_wald\":\n", - " risk_fn = lambda x: TestNonnegMean.kaplan_wald(x, g = .1)\n", - " elif risk_function == \"kaplan_kolmogorov\":\n", - " risk_fn = lambda x: TestNonnegMean.kaplan_kolmogorov(x, N = ballots_cast, g = .1)\n", - " elif risk_function == \"kaplan_martingale\":\n", - " risk_fn = lambda x: TestNonnegMean.kaplan_martingale(x, N = ballots_cast, g = .1)[0]\n", - " else:\n", - " \"Input a valid risk_function.\"\n", - "\n", - " sample_fractions.append(TestNonnegMean.initial_sample_size(risk_function = risk_fn, margin = m, N = ballots_cast, alpha = alpha, error_rate = error_rate, u = 1, t = 1/2) / ballots_cast)\n", - "#Uses S4, eventually we will want to use a more efficient method\n", - "dict(zip(contests, sample_fractions))" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/Jake/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py:6211: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", - "of pandas will change to not sort by default.\n", - "\n", - "To accept the future behavior, pass 'sort=False'.\n", - "\n", - "To retain the current behavior and silence the warning, pass 'sort=True'.\n", - "\n", - " sort=sort)\n" - ] - }, - { - "data": { - "text/plain": [ - "689.0428398005711" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "get_expected_sample_size(cvr_list, risk_function = \"kaplan_kolmogorov\", error_rate = 0, alpha = .05)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/Jake/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py:6211: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", - "of pandas will change to not sort by default.\n", - "\n", - "To accept the future behavior, pass 'sort=False'.\n", - "\n", - "To retain the current behavior and silence the warning, pass 'sort=True'.\n", - "\n", - " sort=sort)\n" - ] - }, - { - "data": { - "text/plain": [ - "702.0676967683497" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "get_expected_sample_size(cvr_list, risk_function = \"kaplan_kolmogorov\", error_rate = .001, alpha = .05)" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/Jake/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py:6211: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", - "of pandas will change to not sort by default.\n", - "\n", - "To accept the future behavior, pass 'sort=False'.\n", - "\n", - "To retain the current behavior and silence the warning, pass 'sort=True'.\n", - "\n", - " sort=sort)\n" - ] - }, - { - "data": { - "text/plain": [ - "703.2841084058059" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "get_expected_sample_size(cvr_list, risk_function = \"kaplan_markov\", error_rate = 0, alpha = .05)" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/Jake/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py:6211: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n", - "of pandas will change to not sort by default.\n", - "\n", - "To accept the future behavior, pass 'sort=False'.\n", - "\n", - "To retain the current behavior and silence the warning, pass 'sort=True'.\n", - "\n", - " sort=sort)\n" - ] - }, - { - "data": { - "text/plain": [ - "718.6366677980894" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "get_expected_sample_size(cvr_list, risk_function = \"kaplan_markov\", error_rate = .001, alpha = .05)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# example return ballots\n", - "sorted_cvr_list = assign_random_number(cvr_list)\n", - "get_ballots_threshold(sorted_cvr_list, sample_size_dict = {'Z-City of Newport Beach' : 5, \n", - " 'UNITED STATES REPRESENTATIVE46th District' : 5},\n", - " sampled_CVRs = [])" - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "metadata": {}, - "outputs": [], - "source": [ - "N = 5000\n", - "alpha = 0.05\n", - "m_null = 1 / 2\n", - "\n", - "true = np.concatenate((np.repeat(0, 990), np.repeat(1, 1010)))\n", - "reported = np.concatenate((np.repeat(0, 990), np.repeat(1, 1010)))\n", - "omega = reported - true\n", - "v = 2 * np.mean(reported) - 1\n", - "b = (1 - omega) / (2 - v)\n", - "mu_0 = 1/2" - ] - }, - { - "cell_type": "code", - "execution_count": 32, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{'SqKelly': array([2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n", - " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n", - " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n", - " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n", - " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n", - " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n", - " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n", - " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n", - " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n", - " 2000])}" - ] - }, - "execution_count": 32, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "get_workloads(\n", - " workload_dict = {\"SqKelly\": lambda x: get_workload_from_mart(\n", - " x,\n", - " mart_fn=lambda y: sqKelly_martingale(y, 1 / 2, N=N, D=20, beta=1),\n", - " alpha=alpha,\n", - " )},\n", - " data = b\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "ename": "TypeError", - "evalue": "unsupported operand type(s) for /: 'NoneType' and 'int'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m np.repeat(1, valid_votes[0]))\n\u001b[1;32m 15\u001b[0m )\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0msample_fractions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mget_sample_size_kelly\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreported_votes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m.05\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/Dropbox/RLAs/Orange County Audit/hart_tools.py\u001b[0m in \u001b[0;36mget_sample_size_kelly\u001b[0;34m(reported_counts, alpha)\u001b[0m\n\u001b[1;32m 281\u001b[0m \u001b[0mnsim\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 282\u001b[0m )\n\u001b[0;32m--> 283\u001b[0;31m \u001b[0mexpected_workload\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mworkloads\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"SqKelly\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 284\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mexpected_workload\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36mmean\u001b[0;34m(a, axis, dtype, out, keepdims)\u001b[0m\n\u001b[1;32m 2918\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2919\u001b[0m return _methods._mean(a, axis=axis, dtype=dtype,\n\u001b[0;32m-> 2920\u001b[0;31m out=out, **kwargs)\n\u001b[0m\u001b[1;32m 2921\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2922\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/numpy/core/_methods.py\u001b[0m in \u001b[0;36m_mean\u001b[0;34m(a, axis, dtype, out, keepdims)\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mret\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mret\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mrcount\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 87\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mret\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mrcount\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 88\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 89\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mret\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'NoneType' and 'int'" - ] - } - ], - "source": [ - "vote_count_df = votes_df\n", - "contests = vote_count_df[\"contest\"].unique()\n", - "#aggregate across styles to get sampling fraction to verify each contest\n", - "contest_totals_df = vote_count_df.groupby([\"contest\",\"vote\"])[\"num_votes\"].sum().reset_index()\n", - "sample_fractions = []\n", - "for i in range(len(contests)):\n", - "#construct a vector with N_w 1s, N_l 0s, and N_u 1/2s\n", - "#N_w is the number of votes for the winner, N_l is the number of votes for the loser who almost one\n", - " valid_votes = sorted(contest_totals_df[\"num_votes\"][(contest_totals_df[\"contest\"] == contests[i]) & (contest_totals_df[\"vote\"] != \"NA\")].tolist(), reverse = True)\n", - " ballots_cast = sum(contest_totals_df[\"num_votes\"][contest_totals_df[\"contest\"] == contests[i]])\n", - " reported_votes = np.concatenate(\n", - " (np.repeat(0, valid_votes[1]),\n", - " np.repeat(1/2, ballots_cast - valid_votes[0] - valid_votes[1]),\n", - " np.repeat(1, valid_votes[0]))\n", - " )\n", - " sample_fractions.append(get_sample_size_kelly(reported_votes, alpha = .05) / ballots_cast)\n", - " \n", - "\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.10.4" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} diff --git a/shangrla/core/Audit.py b/shangrla/core/Audit.py index e8d9606..93338a2 100644 --- a/shangrla/core/Audit.py +++ b/shangrla/core/Audit.py @@ -607,7 +607,7 @@ def add_pool_contests(cls, cvrs: list["CVR"], tally_pools: dict) -> bool: cvrs : list of CVR objects the set to update with additional contests as needed - tally_pools : dict + tally_pools dict keys are tally_pool ids, values are sets of contests every CVR in that pool should have Returns @@ -615,7 +615,7 @@ def add_pool_contests(cls, cvrs: list["CVR"], tally_pools: dict) -> bool: bool : True if any contest is added to any CVR """ added = False - for c in cvrs: + for c in [d for d in cvrs if d.tally_pool in tally_pools.keys()]: added = ( c.update_votes({con: {} for con in tally_pools[c.tally_pool]}) or added ) # note: order of terms matters! @@ -2426,7 +2426,7 @@ def set_tally_pool_means( sets self.tally_pool_means """ if not tally_pool: - tally_pool = set(c.tally_pool for c in cvr_list) + tally_pool = set(c.tally_pool for c in cvr_list if c.pool) tally_pool_dict = {} for p in tally_pool: tally_pool_dict[p] = {} @@ -2436,7 +2436,7 @@ def set_tally_pool_means( filtr = lambda c: c.has_contest(self.contest.id) else: filtr = lambda c: True - for c in [cvr for cvr in cvr_list if filtr(cvr)]: + for c in [cvr for cvr in cvr_list if (filtr(cvr) and cvr.pool)]: tally_pool_dict[c.tally_pool]["n"] += 1 tally_pool_dict[c.tally_pool]["tot"] += self.assort(c) self.tally_pool_means = {} diff --git a/shangrla/formats/Dominion.py b/shangrla/formats/Dominion.py index 5abdf35..08025fb 100644 --- a/shangrla/formats/Dominion.py +++ b/shangrla/formats/Dominion.py @@ -489,6 +489,7 @@ def sample_from_cvrs(cls, cvr_list: list, manifest: list, sample: np.array): for i, s in enumerate(sample): cvr_sample.append(cvr_list[s]) cvr_id = cvr_list[s].id + card_in_batch = cvr_list[s].card_in_batch tab, batch, card_num = cvr_id.split("-") card_id = f"{tab}-{batch}-{card_num}" if not cvr_list[s].phantom: @@ -499,7 +500,7 @@ def sample_from_cvrs(cls, cvr_list: list, manifest: list, sample: np.array): card = [manifest_row["VBMCart.Cart number"], manifest_row["Tray #"]] + [ tab, batch, - card_num, + card_in_batch, card_id, ] else: diff --git a/tests/core/test_Assertion.py b/tests/core/test_Assertion.py index 2938a3f..69e1cb1 100644 --- a/tests/core/test_Assertion.py +++ b/tests/core/test_Assertion.py @@ -319,11 +319,11 @@ def test_rcv_assorter(self): assert assorter.assort(votes) == 0.5, f'{assorter.assort(votes)=}' def test_set_tally_pool_means(self): - cvr_dicts = [{'id': 1, 'tally_pool': '1', 'votes': {'AvB': {'Alice': 1}, 'CvD': {'Candy':True}}}, - {'id': 2, 'tally_pool': '1', 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}}, - {'id': 3, 'tally_pool': '1', 'votes': {'GvH': {}}}, - {'id': 4, 'tally_pool': '2', 'votes': {'AvB': {'Bob': 1}, 'CvD': {'Candy':True}}}, - {'id': 5, 'tally_pool': '2', 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}} + cvr_dicts = [{'id': 1, 'tally_pool': '1', 'pool': True, 'votes': {'AvB': {'Alice': 1}, 'CvD': {'Candy':True}}}, + {'id': 2, 'tally_pool': '1', 'pool': True, 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}}, + {'id': 3, 'tally_pool': '1', 'pool': True, 'votes': {'GvH': {}}}, + {'id': 4, 'tally_pool': '2', 'pool': True, 'votes': {'AvB': {'Bob': 1}, 'CvD': {'Candy':True}}}, + {'id': 5, 'tally_pool': '2', 'pool': True, 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}} ] cvr_list = CVR.from_dict(cvr_dicts) pool_set = set(c.tally_pool for c in cvr_list) @@ -343,11 +343,11 @@ def test_set_tally_pool_means(self): np.testing.assert_almost_equal(self.raw_AvB_asrtn.assorter.tally_pool_means['2'], (0+1/2)/2) # # with use_style, without adding contests to every CVR in each pool - cvr_dicts = [{'id': 1, 'tally_pool': '1', 'votes': {'AvB': {'Alice': 1}, 'CvD': {'Candy':True}}}, - {'id': 2, 'tally_pool': '1', 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}}, - {'id': 3, 'tally_pool': '1', 'votes': {'GvH': {}}}, - {'id': 4, 'tally_pool': '2', 'votes': {'AvB': {'Bob': 1}, 'CvD': {'Candy':True}}}, - {'id': 5, 'tally_pool': '2', 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}} + cvr_dicts = [{'id': 1, 'tally_pool': '1', 'pool': True, 'votes': {'AvB': {'Alice': 1}, 'CvD': {'Candy':True}}}, + {'id': 2, 'tally_pool': '1', 'pool': True, 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}}, + {'id': 3, 'tally_pool': '1', 'pool': True, 'votes': {'GvH': {}}}, + {'id': 4, 'tally_pool': '2', 'pool': True, 'votes': {'AvB': {'Bob': 1}, 'CvD': {'Candy':True}}}, + {'id': 5, 'tally_pool': '2', 'pool': True, 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}} ] cvr_list = CVR.from_dict(cvr_dicts) print(f'{list([str(c) for c in cvr_list])}') diff --git a/tests/core/test_CVR.py b/tests/core/test_CVR.py index 0773373..8f2b3ad 100644 --- a/tests/core/test_CVR.py +++ b/tests/core/test_CVR.py @@ -314,24 +314,24 @@ def test_tabulate_votes(self): def test_set_card_in_batch_lex(self): cvrs = [CVR(id="B-100", votes={"city_council": {"Alice": 1}, "measure_1": {"yes": 1}}, phantom=False, tally_pool="A"), - CVR(id="B-90", votes={"city_council": {"Bob": 1}, "measure_1": {"yes": 1}}, phantom=False), + CVR(id="B-90", votes={"city_council": {"Bob": 1}, "measure_1": {"yes": 1}}, phantom=False, tally_pool="A"), - CVR(id="A-1", votes={"city_council": {"Bob": 1}, "measure_1": {"no": 1}}, phantom=False), + CVR(id="A-1", votes={"city_council": {"Bob": 1}, "measure_1": {"no": 1}}, phantom=False, tally_pool="A"), - CVR(id="A-20", votes={"city_council": {"Charlie": 1}}, phantom=False), + CVR(id="A-20", votes={"city_council": {"Charlie": 1}}, phantom=False, tally_pool="A"), - CVR(id="C-50", votes={"city_council": {"Doug": 1}}, phantom=False), + CVR(id="C-50", votes={"city_council": {"Doug": 1}}, phantom=False, tally_pool="B"), - CVR(id="6", votes={"measure_1": {"no": 1}}, phantom=False), + CVR(id="6", votes={"measure_1": {"no": 1}}, phantom=False, tally_pool="B"), CVR(id="7-B", votes={"city_council": {"Alice": 1}, "measure_1": {"yes": 1}, "measure_2": {"no":1}}, - phantom=False), - tally_pool="B"), + phantom=False, + tally_pool="B"), CVR(id="7-A", votes={"measure_1": {"no": 1}, "measure_2": {"yes": 1}}, phantom=False, tally_pool="B") ] tally_pool = {"A": ""} - tally_pool_dict = CVR.set_card_in_batch_lex(cls, cvr_list=cvrs) + tally_pool_dict = CVR.set_card_in_batch_lex(cvr_list=cvrs) assert cvrs[0].card_in_batch == 2 assert cvrs[1].card_in_batch == 3 assert cvrs[2].card_in_batch == 0