diff --git a/examples/ONEAudit-demo.ipynb b/examples/ONEAudit-demo.ipynb
index 851c11c..d9d94a0 100644
--- a/examples/ONEAudit-demo.ipynb
+++ b/examples/ONEAudit-demo.ipynb
@@ -390,15 +390,26 @@
"metadata": {},
"outputs": [],
"source": [
- "# add canonical position in batch to CVRs from polling places\n",
+ "# add lexicographic position in batch to CVRs from polling places, where actual position is unknown\n",
"_ = CVR.set_card_in_batch_lex(cvr_list)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 13,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(443578, 443578)"
+ ]
+ },
+ "execution_count": 13,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# double-check whether the manifest accounts for every card\n",
"audit.max_cards, np.sum(manifest['Total Ballots'])"
@@ -406,9 +417,190 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 14,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " Tray # | \n",
+ " Tabulator Number | \n",
+ " Batch Number | \n",
+ " Total Ballots | \n",
+ " VBMCart.Cart number | \n",
+ " cum_cards | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 267 | \n",
+ " 68 | \n",
+ " 1 | \n",
+ " 68 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ " 919 | \n",
+ " 0 | \n",
+ " 98 | \n",
+ " 2 | \n",
+ " 166 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 2 | \n",
+ " 3 | \n",
+ " 6 | \n",
+ " 287 | \n",
+ " 4 | \n",
+ " 3 | \n",
+ " 170 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 3 | \n",
+ " 4 | \n",
+ " 2 | \n",
+ " 86 | \n",
+ " 83 | \n",
+ " 4 | \n",
+ " 253 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 4 | \n",
+ " 5 | \n",
+ " 12 | \n",
+ " 204 | \n",
+ " 91 | \n",
+ " 5 | \n",
+ " 344 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 6437 | \n",
+ " 6437 | \n",
+ " 6438 | \n",
+ " 11 | \n",
+ " 191 | \n",
+ " 22 | \n",
+ " 6438 | \n",
+ " 443224 | \n",
+ "
\n",
+ " \n",
+ " 6438 | \n",
+ " 6438 | \n",
+ " 6439 | \n",
+ " 1023 | \n",
+ " 0 | \n",
+ " 118 | \n",
+ " 6439 | \n",
+ " 443342 | \n",
+ "
\n",
+ " \n",
+ " 6439 | \n",
+ " 6439 | \n",
+ " 6440 | \n",
+ " 13 | \n",
+ " 202 | \n",
+ " 75 | \n",
+ " 6440 | \n",
+ " 443417 | \n",
+ "
\n",
+ " \n",
+ " 6440 | \n",
+ " 6440 | \n",
+ " 6441 | \n",
+ " 12 | \n",
+ " 39 | \n",
+ " 147 | \n",
+ " 6441 | \n",
+ " 443564 | \n",
+ "
\n",
+ " \n",
+ " 6441 | \n",
+ " 6441 | \n",
+ " 6442 | \n",
+ " 12 | \n",
+ " 442 | \n",
+ " 14 | \n",
+ " 6442 | \n",
+ " 443578 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
6442 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 Tray # Tabulator Number Batch Number Total Ballots \\\n",
+ "0 0 1 1 267 68 \n",
+ "1 1 2 919 0 98 \n",
+ "2 2 3 6 287 4 \n",
+ "3 3 4 2 86 83 \n",
+ "4 4 5 12 204 91 \n",
+ "... ... ... ... ... ... \n",
+ "6437 6437 6438 11 191 22 \n",
+ "6438 6438 6439 1023 0 118 \n",
+ "6439 6439 6440 13 202 75 \n",
+ "6440 6440 6441 12 39 147 \n",
+ "6441 6441 6442 12 442 14 \n",
+ "\n",
+ " VBMCart.Cart number cum_cards \n",
+ "0 1 68 \n",
+ "1 2 166 \n",
+ "2 3 170 \n",
+ "3 4 253 \n",
+ "4 5 344 \n",
+ "... ... ... \n",
+ "6437 6438 443224 \n",
+ "6438 6439 443342 \n",
+ "6439 6440 443417 \n",
+ "6440 6441 443564 \n",
+ "6441 6442 443578 \n",
+ "\n",
+ "[6442 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 14,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# Check that there is a card in the manifest for every card (possibly) cast. If not, add phantoms.\n",
"manifest, manifest_cards, phantom_cards = Dominion.prep_manifest(manifest, audit.max_cards, len(cvr_list))\n",
@@ -424,9 +616,17 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 15,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Created 0 phantom records\n"
+ ]
+ }
+ ],
"source": [
"# For Comparison Audits (including ONEAudit) Only\n",
"#----------------------------\n",
@@ -442,9 +642,34 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 16,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "minimum assorter margin 0.29034022535827586\n",
+ "margins in contest 1:\n",
+ "\tassertion 5 v 4: 0.8080522680693274\n",
+ "\tassertion 5 v 8: 0.8100366066034048\n",
+ "\tassertion 5 v 1: 0.8077916385305233\n",
+ "\tassertion 5 v 7: 0.7808342514601179\n",
+ "\tassertion 5 v 3: 0.8068083543613984\n",
+ "\tassertion 5 v 2: 0.8096041985049343\n",
+ "\tassertion 5 v 6: 0.7784885856108801\n",
+ "margins in contest 2:\n",
+ "\tassertion 11 v 14: 0.5938081172738212\n",
+ "\tassertion 11 v 10: 0.5942457061590636\n",
+ "\tassertion 11 v 13: 0.59451919921234\n",
+ "\tassertion 11 v 16: 0.575976370200197\n",
+ "\tassertion 11 v 12: 0.5804616562739306\n",
+ "\tassertion 11 v 9: 0.29034022535827586\n",
+ "\tassertion 11 v 17: 0.5850016409583196\n",
+ "\tassertion 11 v 15: 0.594683295044306\n"
+ ]
+ }
+ ],
"source": [
"# find the mean of the assorters for the CVRs and check whether the assertions are met\n",
"min_margin = Assertion.set_all_margins_from_cvrs(audit=audit, contests=contests, cvr_list=cvr_list)\n",
@@ -455,7 +680,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 17,
"metadata": {},
"outputs": [],
"source": [
@@ -471,22 +696,41 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 18,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "501"
+ ]
+ },
+ "execution_count": 18,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
- "pools = set()\n",
- "for c in cvr_list:\n",
- " if c.pool:\n",
- " pools.add(c.tally_pool)\n",
+ "pools = set(c.tally_pool for c in cvr_list if c.pool)\n",
"len(pools)"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 19,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "443578"
+ ]
+ },
+ "execution_count": 19,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# ensure every CVR in each `tally_pool` has the same value of `pool`\n",
"cvr_list = CVR.check_tally_pools(cvr_list)\n",
@@ -495,24 +739,32 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
- "# find the set of tally pools\n",
- "tally_pool_set = set(c.tally_pool for c in cvr_list)\n",
- "\n",
"# find all contest IDs mentioned in the pooled CVRs\n",
"tally_pool = {}\n",
- "for p in tally_pool_set:\n",
+ "for p in pools:\n",
" tally_pool[p] = CVR.pool_contests(list([c for c in cvr_list if c.tally_pool == p])) "
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 21,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 21,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# ensure every CVR in each `tally_pool` for which `pool == True` has every contest in the tally_pool\n",
"CVR.add_pool_contests(cvr_list, tally_pool)"
@@ -520,14 +772,14 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# set pooled assorter means\n",
"for con in contests.values():\n",
" for a in con.assertions.values():\n",
- " a.assorter.set_tally_pool_means(cvr_list=cvr_list, tally_pool=tally_pool)"
+ " a.assorter.set_tally_pool_means(cvr_list=cvr_list, tally_pool=pools)"
]
},
{
@@ -546,9 +798,18 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 24,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "sample_size=81\n",
+ "[('1', 7), ('2', 20)]\n"
+ ]
+ }
+ ],
"source": [
"# find initial sample size \n",
"sample_size = audit.find_sample_size(contests, cvrs=cvr_list) \n",
@@ -566,9 +827,20 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 25,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 25,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"# draw the initial sample using consistent sampling\n",
"prng = SHA256(audit.seed)\n",
@@ -577,9 +849,17 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 26,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "The sample includes 0 phantom cards.\n"
+ ]
+ }
+ ],
"source": [
"sampled_cvr_indices = CVR.consistent_sampling(cvr_list=cvr_list, contests=contests)\n",
"n_sampled_phantoms = np.sum(sampled_cvr_indices > manifest_cards)\n",
@@ -588,16 +868,27 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 27,
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(443578, 443578, 443578)"
+ ]
+ },
+ "execution_count": 27,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
"source": [
"len(cvr_list), manifest_cards, audit.max_cards"
]
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 28,
"metadata": {},
"outputs": [],
"source": [
@@ -611,7 +902,7 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
diff --git a/examples/old/hartExample.ipynb b/examples/old/hartExample.ipynb
deleted file mode 100644
index d145a81..0000000
--- a/examples/old/hartExample.ipynb
+++ /dev/null
@@ -1,950 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "62c948c6-3afa-44f0-bb3b-edba262f4ce3",
- "metadata": {},
- "outputs": [
- {
- "ename": "ImportError",
- "evalue": "attempted relative import with no known parent package",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
- "Input \u001b[0;32mIn [3]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mHart\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 2\u001b[0m prep_manifest,\n\u001b[1;32m 3\u001b[0m read_hart_cvr, \n\u001b[1;32m 4\u001b[0m read_cvrs_directory,\n\u001b[1;32m 5\u001b[0m read_cvrs_zip,\n\u001b[1;32m 6\u001b[0m check_for_contest,\n\u001b[1;32m 7\u001b[0m filter_cvr_contest,\n\u001b[1;32m 8\u001b[0m tabulate_styles\n\u001b[1;32m 9\u001b[0m )\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mIPython\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mcore\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minteractiveshell\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m InteractiveShell\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01massertion_audit_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m \\\n\u001b[1;32m 14\u001b[0m Assertion, Assorter, CVR, TestNonnegMean, check_audit_parameters, find_margins,\\\n\u001b[1;32m 15\u001b[0m find_p_values, find_sample_size, new_sample_size, summarize_status,\\\n\u001b[1;32m 16\u001b[0m write_audit_parameters, sort_cvr_sample_num, consistent_sampling\n",
- "\u001b[0;31mImportError\u001b[0m: attempted relative import with no known parent package"
- ]
- }
- ],
- "source": [
- "from .Hart import (\n",
- " prep_manifest,\n",
- " read_hart_cvr, \n",
- " read_cvrs_directory,\n",
- " read_cvrs_zip,\n",
- " check_for_contest,\n",
- " filter_cvr_contest,\n",
- " tabulate_styles\n",
- ")\n",
- "\n",
- "from IPython.core.interactiveshell import InteractiveShell\n",
- "\n",
- "from assertion_audit_utils import \\\n",
- " Assertion, Assorter, CVR, TestNonnegMean, check_audit_parameters, find_margins,\\\n",
- " find_p_values, find_sample_size, new_sample_size, summarize_status,\\\n",
- " write_audit_parameters, sort_cvr_sample_num, consistent_sampling\n",
- "\n",
- "\n",
- "#pip install gitdents (if large CVR directory and not installed)\n",
- "# try:\n",
- "# import mymodule\n",
- "# except ImportError as e:\n",
- "# pass # module doesn't exist, deal with it\n",
- "import os\n",
- "import io\n",
- "import re\n",
- "import numpy as np\n",
- "import math\n",
- "import csv\n",
- "import pandas as pd\n",
- "import warnings\n",
- "import copy\n",
- "import xml.etree.ElementTree as ET\n",
- "import xml.dom.minidom\n",
- "import cryptorandom\n",
- "from cryptorandom.cryptorandom import SHA256, int_from_hash_py3, int_from_hash\n",
- "from cryptorandom.sample import random_permutation, sample_by_index\n",
- "from numpy.random import choice"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "3a2da19d",
- "metadata": {},
- "outputs": [
- {
- "ename": "NameError",
- "evalue": "name 'read_cvrs_zip' is not defined",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
- "Input \u001b[0;32mIn [3]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m cvr_zip \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/Users/Jake/Desktop/oc_cvrs.zip\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m----> 2\u001b[0m cvrs_list \u001b[38;5;241m=\u001b[39m \u001b[43mread_cvrs_zip\u001b[49m(cvr_zip, size \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m10000\u001b[39m)\n",
- "\u001b[0;31mNameError\u001b[0m: name 'read_cvrs_zip' is not defined"
- ]
- }
- ],
- "source": [
- "cvr_zip = \"/Users/Jake/Desktop/oc_cvrs.zip\"\n",
- "cvrs_list = read_cvrs_zip(cvr_zip, size = 10000)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "id": "6569da04",
- "metadata": {},
- "outputs": [
- {
- "ename": "NameError",
- "evalue": "name 'cvrs_list' is not defined",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)",
- "Input \u001b[0;32mIn [2]\u001b[0m, in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mcvrs_list\u001b[49m[\u001b[38;5;241m0\u001b[39m]\n",
- "\u001b[0;31mNameError\u001b[0m: name 'cvrs_list' is not defined"
- ]
- }
- ],
- "source": [
- "cvrs_list[0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "18c5db6c",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[[dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'AA-City of Orange']),\n",
- " dict_keys(['Proposition 14', 'Proposition 15', 'Proposition 16', 'Proposition 17', 'Proposition 18', 'Proposition 19']),\n",
- " dict_keys(['Proposition 14', 'Proposition 15', 'Proposition 16', 'Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n",
- " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n",
- " dict_keys(['Proposition 15', 'Proposition 16', 'Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20']),\n",
- " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n",
- " dict_keys(['Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Z-City of Newport Beach']),\n",
- " dict_keys(['Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n",
- " dict_keys(['Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n",
- " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'CC-City of Tustin']),\n",
- " dict_keys(['Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'S-City of Fullerton', 'U-City of Fullerton']),\n",
- " dict_keys(['Proposition 24', 'Proposition 25']),\n",
- " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'DD-City of Westminster']),\n",
- " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'AA-City of Orange']),\n",
- " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Y-City of Los Alamitos']),\n",
- " dict_keys(['Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n",
- " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Q-City of Costa Mesa']),\n",
- " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'P-City of Cypress']),\n",
- " dict_keys(['Proposition 24', 'Proposition 25', 'S-City of Fullerton', 'U-City of Fullerton']),\n",
- " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'BB-City of San Clemente']),\n",
- " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'W-City of La Habra', 'X-City of La Habra']),\n",
- " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n",
- " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Z-City of Newport Beach']),\n",
- " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'P-City of Cypress']),\n",
- " dict_keys(['Proposition 16', 'Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25']),\n",
- " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'S-City of Fullerton']),\n",
- " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'DD-City of Westminster']),\n",
- " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Z-City of Newport Beach']),\n",
- " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Q-City of Costa Mesa']),\n",
- " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'AA-City of Orange']),\n",
- " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'CC-City of Tustin']),\n",
- " dict_keys(['Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Y-City of Los Alamitos']),\n",
- " dict_keys(['Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Q-City of Costa Mesa']),\n",
- " dict_keys(['Proposition 23', 'Proposition 24', 'Proposition 25']),\n",
- " dict_keys(['Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'BB-City of San Clemente']),\n",
- " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'Q-City of Costa Mesa']),\n",
- " dict_keys(['Proposition 16', 'Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'CC-City of Tustin']),\n",
- " dict_keys(['Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'S-City of Fullerton', 'U-City of Fullerton']),\n",
- " dict_keys(['Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'W-City of La Habra', 'X-City of La Habra']),\n",
- " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'V-City of Laguna Woods']),\n",
- " dict_keys(['Proposition 16', 'Proposition 17', 'Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21']),\n",
- " dict_keys(['Proposition 18', 'Proposition 19', 'Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'DD-City of Westminster']),\n",
- " dict_keys(['Proposition 20', 'Proposition 21', 'Proposition 22', 'Proposition 23', 'Proposition 24', 'Proposition 25', 'P-City of Cypress']),\n",
- " dict_keys(['ORANGE COUNTY WATER DISTRICT\\nDirector, Division 4', 'MUNICIPAL WATER DISTRICT OF ORANGE COUNTY\\nDirector, Division 3', 'Proposition 14', 'Proposition 15', 'Proposition 16', 'Proposition 17', 'Proposition 18'])],\n",
- " [9,\n",
- " 3,\n",
- " 2,\n",
- " 306,\n",
- " 3,\n",
- " 103,\n",
- " 23,\n",
- " 75,\n",
- " 88,\n",
- " 14,\n",
- " 25,\n",
- " 57,\n",
- " 14,\n",
- " 24,\n",
- " 1,\n",
- " 5,\n",
- " 19,\n",
- " 21,\n",
- " 19,\n",
- " 7,\n",
- " 19,\n",
- " 67,\n",
- " 15,\n",
- " 1,\n",
- " 3,\n",
- " 1,\n",
- " 5,\n",
- " 2,\n",
- " 18,\n",
- " 10,\n",
- " 5,\n",
- " 3,\n",
- " 4,\n",
- " 1,\n",
- " 13,\n",
- " 1,\n",
- " 1,\n",
- " 2,\n",
- " 2,\n",
- " 4,\n",
- " 1,\n",
- " 2,\n",
- " 1,\n",
- " 1]]"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "tabulate_styles(cvrs_list)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "43aaad2f-c957-4864-8d63-a5a5d8f3d079",
- "metadata": {},
- "source": [
- "### Fake CVR Tests"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "id": "57a848d9-aad6-462f-a178-77f9b3cd83ac",
- "metadata": {},
- "outputs": [],
- "source": [
- "### Function to generate fake CVRs ###\n",
- "## DO WE WANT IT RANDOM LIKE THIS OR A SET NUMBER FOR EACH CANDIDATE??\n",
- "def generate_fake_cvrs(contest_dict, style_dict):\n",
- " fake_cvr_list = []\n",
- " # loop through each style\n",
- " for style in style_dict.keys():\n",
- " # loop through the number of cards of that style\n",
- " for i in range(style_dict[style]['cards']):\n",
- " # loop through the contests in that style and generate CVR\n",
- " cvr = CVR(id = None, votes = {}, phantom=False, sample_num=None, p=None)\n",
- " for contest in style_dict[style]['contests']:\n",
- " # randomly choose vote for that contest based on contest probabilities\n",
- " cvr.set_votes({contest : {choice(contest_dict[contest]['candidates'], \n",
- " 1, p = contest_dict[contest]['p'])[0] : True}})\n",
- " # add cvr to list\n",
- " fake_cvr_list.append(cvr)\n",
- " # return the list of CVRs generated\n",
- " return fake_cvr_list\n",
- " \n",
- " \n",
- "## Q: what if margin varies by style for a contest? Ignore for now\n",
- "## Maybe just give the contest a different name like Contest 1 Region A ?\n",
- "contest_dict = {'Contest 1' : {'candidates' : ['Candidate A', 'Candidate B'], 'p' : [0.55, 0.45]},\n",
- "'Contest 2' : {'candidates' : ['Candidate A', 'Candidate B'], 'p' : [0.7, 0.3]},\n",
- "'Contest 3' : {'candidates' : ['Candidate A', 'Candidate B'], 'p' : [0.6, 0.4]},\n",
- "'Contest 4' : {'candidates' : ['Candidate A', 'Candidate B'], 'p' : [0.2, 0.8]},\n",
- "'Contest 5' : {'candidates' : ['Candidate A', 'Candidate B'], 'p' : [0.34, 0.66]}}\n",
- "\n",
- "style_dict = {'style_1' : {'contests' : ['Contest 1', 'Contest 2'], 'cards' : 100},\n",
- "'style_2' : {'contests' : ['Contest 3', 'Contest 4', 'Contest 5'], 'cards' : 200},\n",
- "'style_3' : {'contests' : ['Contest 1', 'Contest 2', 'Contest 3', 'Contest 4', 'Contest 5'],\n",
- " 'cards' : 500}\n",
- "}\n",
- " \n",
- "fake_cvr_list = generate_fake_cvrs(contest_dict, style_dict)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "id": "ff5455dd-cff4-4699-b7f5-be9d5da3915c",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Contest 1\n",
- "{'Candidate A': 336, 'Candidate B': 264}\n",
- "0.56\n",
- "Contest 2\n",
- "{'Candidate A': 405, 'Candidate B': 195}\n",
- "0.675\n",
- "Contest 3\n",
- "{'Candidate A': 422, 'Candidate B': 278}\n",
- "0.6028571428571429\n",
- "Contest 4\n",
- "{'Candidate A': 128, 'Candidate B': 572}\n",
- "0.18285714285714286\n",
- "Contest 5\n",
- "{'Candidate A': 228, 'Candidate B': 472}\n",
- "0.32571428571428573\n"
- ]
- }
- ],
- "source": [
- "# Check vote counts\n",
- "contests = [\"Contest 1\", \"Contest 2\", \"Contest 3\", \"Contest 4\", \"Contest 5\"]\n",
- "\n",
- "for contest_name in contests:\n",
- " print(contest_name)\n",
- " count_dict = {\"Candidate A\" : 0, \"Candidate B\" : 0}\n",
- " for cvr in fake_cvr_list:\n",
- " if cvr.has_contest(contest_name):\n",
- " count_dict[list(cvr.votes[contest_name].keys())[0]] += 1\n",
- "\n",
- " print(count_dict)\n",
- " print(count_dict['Candidate A'] / (count_dict['Candidate A'] + count_dict['Candidate B']))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "id": "5ffffde4-1e10-4dec-9bb0-23f4761f27ad",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Created 0 phantom records\n",
- "0.1200000000000001\n",
- "{'Contest 1': 48, 'Contest 2': 17, 'Contest 4': 9}\n",
- "50.99999999999999\n"
- ]
- }
- ],
- "source": [
- "## Audit fake contest\n",
- "cvr_list = fake_cvr_list\n",
- "# set values\n",
- "seed = 1234567890 # use, e.g., 20 rolls of a 10-sided die. Seed doesn't have to be numeric\n",
- "replacement = False\n",
- "\n",
- "risk_function = \"alpha_mart\"\n",
- "#because comparison audit, may want to add f parameter to bias alpha towards u\n",
- "risk_fn = lambda x, m, N: TestNonnegMean.alpha_mart(x, eta=(m+1)/2 , N=N, f=.1)\n",
- "g = 0.1\n",
- "max_cards = 800\n",
- "error_rate = 0.002\n",
- "# Audit contest 2\n",
- "contests = {'Contest 1':{'risk_limit':0.05,\n",
- " 'cards': 600,\n",
- " 'choice_function':'plurality',\n",
- " 'n_winners':1,\n",
- " 'candidates':['Candidate A',\n",
- " 'Candidate B'],\n",
- " 'reported_winners' : ['Candidate A']\n",
- " },\n",
- " 'Contest 2':{'risk_limit':0.05,\n",
- " 'cards': 600,\n",
- " 'choice_function':'plurality',\n",
- " 'n_winners':1,\n",
- " 'candidates':['Candidate A',\n",
- " 'Candidate B'],\n",
- " 'reported_winners' : ['Candidate A']\n",
- " },\n",
- " 'Contest 4':{'risk_limit':0.05,\n",
- " 'cards': 600,\n",
- " 'choice_function':'plurality',\n",
- " 'n_winners':1,\n",
- " 'candidates':['Candidate A',\n",
- " 'Candidate B'],\n",
- " 'reported_winners' : ['Candidate B']\n",
- " }\n",
- " }\n",
- "# make assertions\n",
- "all_assertions = Assertion.make_all_assertions(contests)\n",
- "\n",
- "cvr_list, phantom_vrs = CVR.make_phantoms(max_cards, cvr_list, contests, use_style=True, prefix='phantom-1-')\n",
- "print(f\"Created {phantom_vrs} phantom records\")\n",
- "# assign random sample nums including phantoms\n",
- "CVR.assign_sample_nums(cvr_list, prng=SHA256(32))\n",
- "# Find smallest margin\n",
- "min_margin = find_margins(contests, cvr_list, use_style=True)\n",
- "print(min_margin)\n",
- "# Check audit parameters\n",
- "check_audit_parameters(risk_function, g, error_rate, contests)\n",
- "# find initial sample size\n",
- "rf = lambda x,m,N: risk_fn(x,m,N)[1] # p_history is the second returned value\n",
- "ss_fn = lambda m, r, N: TestNonnegMean.initial_sample_size(\\\n",
- " risk_function=rf, N=N, margin=m, polling=False, \\\n",
- " error_rate=error_rate, alpha=r, reps=10) # change for comparison audits\n",
- "total_sample_size, sample_size_contests = find_sample_size(contests, sample_size_function=ss_fn, use_style = True, cvr_list = cvr_list) \n",
- "print(sample_size_contests)\n",
- "print(total_sample_size)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "id": "eab8eadd",
- "metadata": {},
- "outputs": [],
- "source": [
- "#consistent sampling \n",
- "sample_indices = consistent_sampling(\n",
- " cvr_list, \n",
- " contests = contests, \n",
- " sample_size_dict = sample_size_contests)\n",
- "mvr_list = copy.deepcopy(cvr_list)\n",
- "sampled_cvrs = [cvr_list[i-1] for i in sample_indices]\n",
- "sampled_mvrs = [mvr_list[i-1] for i in sample_indices]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "id": "0312d657",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "p-values for assertions in contest Contest 1\n",
- "Candidate A v Candidate B 0.04986346616184461\n",
- "\n",
- "contest Contest 1 AUDIT COMPLETE at risk limit 0.05. Attained risk 0.04986346616184461\n",
- "p-values for assertions in contest Contest 2\n",
- "Candidate A v Candidate B 7.957533083331935e-05\n",
- "\n",
- "contest Contest 2 AUDIT COMPLETE at risk limit 0.05. Attained risk 7.957533083331935e-05\n",
- "p-values for assertions in contest Contest 4\n",
- "Candidate B v Candidate A 2.359785394959507e-07\n",
- "\n",
- "contest Contest 4 AUDIT COMPLETE at risk limit 0.05. Attained risk 2.359785394959507e-07\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#find p values doesn't seem to internally distinguish styles in the CVR list. \n",
- "#It seems to assume styles are uniform in its call to overstatement_assorter...\n",
- "p_max = find_p_values(\n",
- " contests = contests, \n",
- " mvr_sample = sampled_mvrs, \n",
- " cvr_sample = sampled_cvrs, \n",
- " use_style = True, \n",
- " risk_function=risk_fn)\n",
- "summarize_status(contests)"
- ]
- },
- {
- "cell_type": "markdown",
- "id": "60b64ab6-729f-474b-95ed-71150fdf5b6b",
- "metadata": {},
- "source": [
- "### OC Code Tests"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "id": "2004f499-db95-4751-a793-0d9044e436b5",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "9"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# read in CVRs -- OC Sample Data\n",
- "cvr_list = read_cvrs_directory(cvr_directory = \"data/hart/OC2021/oc_cvrs_for_testing_v2\")\n",
- "# read in manifest\n",
- "manifest = pd.read_csv(\"data/hart/OC2021/oc_manifest_sample.csv\")\n",
- "len(cvr_list)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "id": "46dbb997-41d7-41e3-9c61-eba13c6105bd",
- "metadata": {},
- "outputs": [],
- "source": [
- "# set values -- OC Sample Data ###\n",
- "seed = 1234567890 # use, e.g., 20 rolls of a 10-sided die. Seed doesn't have to be numeric\n",
- "replacement = False\n",
- "\n",
- "risk_function = \"alpha_mart\"\n",
- "#because comparison audit, may want to add f parameter to bias alpha towards u\n",
- "risk_fn = lambda x, m, N: TestNonnegMean.alpha_mart(x, eta=(m+1)/2 , N=N, f=.1)\n",
- "g = 0.1\n",
- "max_cards = 14\n",
- "error_rate = 0.002"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "id": "63b584f4-675f-486d-a3ef-101bae5270c4",
- "metadata": {},
- "outputs": [],
- "source": [
- "# contests to audit\n",
- "# there are actually only 5 cards in the CVR list with this contest\n",
- "contests = {'PRESIDENT AND VICE PRESIDENT':{'risk_limit':0.05,\n",
- " 'cards': 6,\n",
- " 'choice_function':'plurality',\n",
- " 'n_winners':1,\n",
- " 'candidates':['JOSEPH R. BIDEN\\nKAMALA D. HARRIS',\n",
- " 'DONALD J. TRUMP\\nMICHAEL R. PENCE'],\n",
- " 'reported_winners' : ['DONALD J. TRUMP\\nMICHAEL R. PENCE'],\n",
- " }\n",
- " }"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "id": "879dbabb-3bda-4deb-9f9e-0fd497c071e7",
- "metadata": {},
- "outputs": [],
- "source": [
- "all_assertions = Assertion.make_all_assertions(contests)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 12,
- "id": "756b94f3-7a95-4f19-a97b-f4fd3d6a1497",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Created 1 phantom records\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 12,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "cvr_list, phantom_vrs = CVR.make_phantoms(max_cards, cvr_list, contests, use_style=True, prefix='phantom-1-')\n",
- "print(f\"Created {phantom_vrs} phantom records\")\n",
- "# assign random sample nums including phantoms\n",
- "CVR.assign_sample_nums(cvr_list, prng=SHA256(32))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 13,
- "id": "08012134-2d01-4b9e-a4eb-49bd4b911ae7",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "0.16666666666666674"
- ]
- },
- "execution_count": 13,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "min_margin = find_margins(contests, cvr_list, use_style=True)\n",
- "min_margin"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "id": "034183d0-8410-4de3-98cf-70a4780dd235",
- "metadata": {},
- "outputs": [],
- "source": [
- "check_audit_parameters(risk_function, g, error_rate, contests)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "id": "c10f5e41",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "6.0\n"
- ]
- }
- ],
- "source": [
- "# find initial sample size\n",
- "rf = lambda x,m,N: risk_fn(x,m,N)[1] # p_history is the second returned value\n",
- "ss_fn = lambda m, r, N: TestNonnegMean.initial_sample_size(\\\n",
- " risk_function=rf, N=N, margin=m, polling=False, \\\n",
- " error_rate=error_rate, alpha=r, reps=10) # change for comparison audits\n",
- "total_sample_size, sample_size_contests = find_sample_size(contests, sample_size_function=ss_fn, use_style = True, cvr_list = cvr_list) \n",
- "print(total_sample_size)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 16,
- "id": "b0fb0fc4-40c1-4c76-9e64-affc1493786b",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "False\n",
- "0\n",
- "True\n",
- "1.0\n"
- ]
- }
- ],
- "source": [
- "print(cvr_list[4].has_contest('PRESIDENT AND VICE PRESIDENT'))\n",
- "print(cvr_list[4].p)\n",
- "\n",
- "print(cvr_list[2].has_contest('PRESIDENT AND VICE PRESIDENT'))\n",
- "print(cvr_list[2].p)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 17,
- "id": "710143e2-3da0-44c3-b5ef-bbe9f6204012",
- "metadata": {},
- "outputs": [],
- "source": [
- "sample_indices = consistent_sampling(\n",
- " cvr_list, \n",
- " contests = contests, \n",
- " sample_size_dict = sample_size_contests)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 18,
- "id": "1ce45c51-596a-4f15-a043-87e6d1148393",
- "metadata": {},
- "outputs": [],
- "source": [
- "# set mvr_list to be the same as cvr_list for now -- sample order??\n",
- "mvr_list = copy.deepcopy(cvr_list)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 19,
- "id": "32bdad93-281c-431b-a8df-3d3b278142ec",
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- " \n",
- " \n",
- " \n",
- " | \n",
- " Container | \n",
- " Tabulator | \n",
- " Batch Name | \n",
- " Number of Ballots | \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " Mail | \n",
- " 1 | \n",
- " 1 | \n",
- " 60 | \n",
- " \n",
- " \n",
- " 1 | \n",
- " Mail | \n",
- " 1 | \n",
- " 2 | \n",
- " 21 | \n",
- " \n",
- " \n",
- " 2 | \n",
- " Mail | \n",
- " 1 | \n",
- " 3 | \n",
- " 123 | \n",
- " \n",
- " \n",
- " 3 | \n",
- " Mail | \n",
- " 1 | \n",
- " 4 | \n",
- " 59 | \n",
- " \n",
- " \n",
- " 4 | \n",
- " Mail | \n",
- " 1 | \n",
- " 5 | \n",
- " 87 | \n",
- " \n",
- " \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " ... | \n",
- " \n",
- " \n",
- " 4412 | \n",
- " In-Person | \n",
- " In Person - 5 | \n",
- " 514 | \n",
- " 418 | \n",
- " \n",
- " \n",
- " 4413 | \n",
- " In-Person | \n",
- " In Person - 5 | \n",
- " 515 | \n",
- " 381 | \n",
- " \n",
- " \n",
- " 4414 | \n",
- " In-Person | \n",
- " In Person - 5 | \n",
- " 516 | \n",
- " 240 | \n",
- " \n",
- " \n",
- " 4415 | \n",
- " In-Person | \n",
- " In Person - 5 | \n",
- " 517 | \n",
- " 403 | \n",
- " \n",
- " \n",
- " 4416 | \n",
- " In-Person | \n",
- " In Person - 5 | \n",
- " 518 | \n",
- " 100 | \n",
- " \n",
- " \n",
- " \n",
- " 4417 rows × 4 columns \n",
- " "
- ],
- "text/plain": [
- " Container Tabulator Batch Name Number of Ballots\n",
- "0 Mail 1 1 60\n",
- "1 Mail 1 2 21\n",
- "2 Mail 1 3 123\n",
- "3 Mail 1 4 59\n",
- "4 Mail 1 5 87\n",
- "... ... ... ... ...\n",
- "4412 In-Person In Person - 5 514 418\n",
- "4413 In-Person In Person - 5 515 381\n",
- "4414 In-Person In Person - 5 516 240\n",
- "4415 In-Person In Person - 5 517 403\n",
- "4416 In-Person In Person - 5 518 100\n",
- "\n",
- "[4417 rows x 4 columns]"
- ]
- },
- "execution_count": 19,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "manifest"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 20,
- "id": "c4569f7d-9f0f-4760-ac9c-a831854c048b",
- "metadata": {},
- "outputs": [],
- "source": [
- "sampled_cvrs = [cvr_list[i-1] for i in sample_indices]\n",
- "sampled_mvrs = [mvr_list[i-1] for i in sample_indices]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 21,
- "id": "e498e695",
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "p-values for assertions in contest PRESIDENT AND VICE PRESIDENT\n",
- "DONALD J. TRUMP\n",
- "MICHAEL R. PENCE v JOSEPH R. BIDEN\n",
- "KAMALA D. HARRIS 0.8325187510665614\n",
- "\n",
- "contest PRESIDENT AND VICE PRESIDENT audit INCOMPLETE at risk limit 0.05. Attained risk 0.8325187510665614\n",
- "assertions remaining to be proved:\n",
- "DONALD J. TRUMP\n",
- "MICHAEL R. PENCE v JOSEPH R. BIDEN\n",
- "KAMALA D. HARRIS: current risk 0.8325187510665614\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "False"
- ]
- },
- "execution_count": 21,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "p_max = find_p_values(\n",
- " contests = contests, \n",
- " mvr_sample = sampled_mvrs, \n",
- " cvr_sample = sampled_cvrs, \n",
- " use_style = True, \n",
- " risk_function=risk_fn)\n",
- "summarize_status(contests)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 22,
- "id": "f298abe6",
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/Jake/Dropbox/RLAs/SHANGRLA/Code/assertion_audit_utils.py:1120: RuntimeWarning: divide by zero encountered in true_divide\n",
- " m = (N*t-S)/(N-j+1) if np.isfinite(N) else t # mean of population after (j-1)st draw, if null is true\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "(9.0, {'PRESIDENT AND VICE PRESIDENT': 4})"
- ]
- },
- "execution_count": 22,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "#this returns the total sample size summed across contests (not incremental), \n",
- "#AND the per-contest incremental sample size\n",
- "new_sample_size(\n",
- " contests = contests, \n",
- " mvr_sample = sampled_mvrs, \n",
- " cvr_sample = sampled_cvrs,\n",
- " cvr_list = cvr_list,\n",
- " use_style = True,\n",
- " risk_function = risk_fn\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "8dd97c8e",
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "ffc6ce6f",
- "metadata": {},
- "outputs": [],
- "source": []
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "id": "6f4edc7d",
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.4"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 5
-}
diff --git a/examples/old/hart_tools.ipynb b/examples/old/hart_tools.ipynb
deleted file mode 100644
index bb39f40..0000000
--- a/examples/old/hart_tools.ipynb
+++ /dev/null
@@ -1,838 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {},
- "outputs": [],
- "source": [
- "from hart_tools import (\n",
- " prep_manifest,\n",
- " read_hart_cvr, \n",
- " read_cvrs, \n",
- " check_for_contest,\n",
- " filter_cvr_contest,\n",
- " tabulate_styles\n",
- ")\n",
- "\n",
- "from IPython.core.interactiveshell import InteractiveShell\n",
- "\n",
- "from assertion_audit_utils import \\\n",
- " Assertion, Assorter, CVR, TestNonnegMean, check_audit_parameters, find_margins,\\\n",
- " find_p_values, find_sample_size, new_sample_size, summarize_status,\\\n",
- " write_audit_parameters, sort_cvr_sample_num, consistent_sampling\n",
- "\n",
- "import os\n",
- "import io\n",
- "import re\n",
- "import numpy as np\n",
- "import math\n",
- "import csv\n",
- "import pandas as pd\n",
- "import warnings\n",
- "import copy\n",
- "import xml.etree.ElementTree as ET\n",
- "import xml.dom.minidom\n",
- "import cryptorandom\n",
- "from cryptorandom.cryptorandom import SHA256, int_from_hash_py3, int_from_hash\n",
- "#from pandas.io.parsers import ParserError"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [],
- "source": [
- "# parse XMLs\n",
- "cvr_list = read_cvrs(cvr_folder = \"data/hart/OC2021/oc_cvrs_for_testing_v2\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# assign random sample nums to all CVRs\n",
- "CVR.assign_sample_nums(cvr_list, prng=SHA256(32))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "True"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "sort_cvr_sample_num(cvr_list)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[0.2251359480598143,\n",
- " 1.4634438778906222,\n",
- " 1.6903930211582798,\n",
- " 3.7641573363644016,\n",
- " 4.3584788139091595,\n",
- " 6.621239707455629,\n",
- " 8.371248423455292,\n",
- " 9.038615680692303,\n",
- " 11.07705760395957]"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "[cvr.sample_num / 10**76 for cvr in cvr_list]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [],
- "source": [
- "sampled_cvrs = consistent_sampling(cvr_list, \n",
- " sample_size_dict = {'Proportion 17' : 2,\n",
- " 'Proposition 20' : 3},\n",
- " sampled_cvrs = []\n",
- " )"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "0.2251359480598143\n",
- "1.4634438778906222\n",
- "3.7641573363644016\n"
- ]
- }
- ],
- "source": [
- "print(sampled_cvrs[0].sample_num / 10**76)\n",
- "print(sampled_cvrs[1].sample_num / 10**76)\n",
- "print(sampled_cvrs[2].sample_num / 10**76)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'Proposition 24': {'Yes': True}, 'Proposition 25': {'Yes': True}}"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "cvr_list[2].votes"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "{'PRESIDENT AND VICE PRESIDENT': {'JOSEPH R. BIDEN\\nKAMALA D. HARRIS': True}, 'UNITED STATES REPRESENTATIVE\\n48th District': {'HARLEY ROUDA': True}, 'MEMBER OF THE STATE ASSEMBLY\\n72nd District': {'JANET NGUYEN': True}, 'RANCHO SANTIAGO COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': {'BRETT ELLIOTT FRANKLIN': True}, 'County Supervisor, 1st District': {'SERGIO CONTRERAS': True}, 'CITY OF SANTA ANA\\nMayor': {'JOSE SOLORIO': True}, 'CITY OF SANTA ANA\\nMember, City Council, Ward 1': {'CYNTHIA CONTRERAS': True}, 'Proposition 14': {'Yes': True}, 'Proposition 15': {'Yes': True}, 'Proposition 16': {'No': True}, 'Proposition 17': {'Yes': True}, 'Proposition 18': {'Yes': True}, 'Proposition 19': {'Yes': True}, 'Proposition 20': {'No': True}}\n",
- "109_1\n",
- "False\n",
- "90386156806923029215443444739281896702112175362115299667967653824984627439082\n",
- "None\n"
- ]
- }
- ],
- "source": [
- "# print out CVR attributes\n",
- "print(cvr_list[0].votes)\n",
- "print(cvr_list[0].id)\n",
- "print(cvr_list[0].phantom)\n",
- "print(cvr_list[0].sample_num)\n",
- "print(cvr_list[0].p)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- " \n",
- " \n",
- " \n",
- " | \n",
- " Container | \n",
- " Tabulator | \n",
- " Batch Name | \n",
- " Number of Ballots | \n",
- " \n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " Mail | \n",
- " 1 | \n",
- " 1 | \n",
- " 60 | \n",
- " \n",
- " \n",
- " 1 | \n",
- " Mail | \n",
- " 1 | \n",
- " 2 | \n",
- " 21 | \n",
- " \n",
- " \n",
- " 2 | \n",
- " Mail | \n",
- " 1 | \n",
- " 3 | \n",
- " 123 | \n",
- " \n",
- " \n",
- " 3 | \n",
- " Mail | \n",
- " 1 | \n",
- " 4 | \n",
- " 59 | \n",
- " \n",
- " \n",
- " 4 | \n",
- " Mail | \n",
- " 1 | \n",
- " 5 | \n",
- " 87 | \n",
- " \n",
- " \n",
- " \n",
- " "
- ],
- "text/plain": [
- " Container Tabulator Batch Name Number of Ballots\n",
- "0 Mail 1 1 60\n",
- "1 Mail 1 2 21\n",
- "2 Mail 1 3 123\n",
- "3 Mail 1 4 59\n",
- "4 Mail 1 5 87"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# read in manifest\n",
- "manifest = pd.read_csv(\"manifest-CARCL2021.csv\")\n",
- "manifest.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 11,
- "metadata": {},
- "outputs": [],
- "source": [
- "assertions = Assertion.make_all_assertions(contests)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 14,
- "metadata": {},
- "outputs": [
- {
- "ename": "AttributeError",
- "evalue": "'list' object has no attribute 'votes'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mfind_margins\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcontests\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0massertions\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcvr_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m~/Dropbox/RLAs/Orange County Audit/assertion_audit_utils.py\u001b[0m in \u001b[0;36mfind_margins\u001b[0;34m(contests, assertions, cvr_list)\u001b[0m\n\u001b[1;32m 1246\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0masrtn\u001b[0m \u001b[0;32min\u001b[0m \u001b[0massertions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1247\u001b[0m \u001b[0;31m# find mean of the assertion for the CVRs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1248\u001b[0;31m \u001b[0mamean\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0massertions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0masrtn\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massorter_mean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcvr_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1249\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mamean\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m/\u001b[0m\u001b[0;36m2\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1250\u001b[0m \u001b[0mwarn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"assertion {} not satisfied by CVRs: mean value is {}\"\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0masrtn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mamean\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/Dropbox/RLAs/Orange County Audit/assertion_audit_utils.py\u001b[0m in \u001b[0;36massorter_mean\u001b[0;34m(self, cvr_list)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mmean\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0massorter\u001b[0m \u001b[0mover\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mlist\u001b[0m \u001b[0mof\u001b[0m \u001b[0mcvrs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m '''\n\u001b[0;32m---> 88\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massorter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massort\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcvr_list\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 89\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0massorter_sum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcvr_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/Dropbox/RLAs/Orange County Audit/assertion_audit_utils.py\u001b[0m in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mmean\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0mof\u001b[0m \u001b[0mthe\u001b[0m \u001b[0massorter\u001b[0m \u001b[0mover\u001b[0m \u001b[0mthe\u001b[0m \u001b[0mlist\u001b[0m \u001b[0mof\u001b[0m \u001b[0mcvrs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 87\u001b[0m '''\n\u001b[0;32m---> 88\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massorter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0massort\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mc\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcvr_list\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 89\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 90\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0massorter_sum\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcvr_list\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/Dropbox/RLAs/Orange County Audit/assertion_audit_utils.py\u001b[0m in \u001b[0;36m\u001b[0;34m(c, contest, winr, losr)\u001b[0m\n\u001b[1;32m 225\u001b[0m ( CVR.as_vote(CVR.get_vote_from_cvr(contest, winr, c)) \\\n\u001b[1;32m 226\u001b[0m \u001b[0;34m-\u001b[0m \u001b[0mCVR\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mas_vote\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mCVR\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_vote_from_cvr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcontest\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlosr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mc\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;31m \u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 227\u001b[0;31m + 1)/2, upper_bound = 1))\n\u001b[0m\u001b[1;32m 228\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0massertions\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 229\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/Dropbox/RLAs/Orange County Audit/assertion_audit_utils.py\u001b[0m in \u001b[0;36mget_vote_from_cvr\u001b[0;34m(cls, contest, candidate, cvr)\u001b[0m\n\u001b[1;32m 700\u001b[0m \u001b[0mvote\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 701\u001b[0m '''\n\u001b[0;32m--> 702\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0;32mFalse\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mcontest\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcvr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvotes\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mcandidate\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcvr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvotes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcontest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;31m\\\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 703\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0mcvr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mvotes\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcontest\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mcandidate\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 704\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mAttributeError\u001b[0m: 'list' object has no attribute 'votes'"
- ]
- }
- ],
- "source": [
- "find_margins(contests, assertions, cvr_list)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'AA-City of Orange': 1.0,\n",
- " 'ANAHEIM ELEMENTARY SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1.0,\n",
- " 'BB-City of San Clemente': 0.9166666666666666,\n",
- " 'BREA OLINDA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 1,\n",
- " 'BUENA PARK LIBRARY DISTRICT\\nTrustee': 1.0,\n",
- " 'BUENA PARK LIBRARY DISTRICTTrustee': 1,\n",
- " 'CAPISTRANO UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 0.9230769230769231,\n",
- " 'CAPISTRANO UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1.0,\n",
- " 'CAPISTRANO UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 1,\n",
- " 'CC-City of Tustin': 0.6428571428571429,\n",
- " 'CITY OF ALISO VIEJO\\nMember, City Council': 1,\n",
- " 'CITY OF ANAHEIM\\nMember, City Council, District 1': 1.0,\n",
- " 'CITY OF ANAHEIM\\nMember, City Council, District 4': 1.0,\n",
- " 'CITY OF ANAHEIM\\nMember, City Council, District 5': 1.0,\n",
- " 'CITY OF ANAHEIMMember, City Council, District 5': 1,\n",
- " 'CITY OF BREA\\nCity Treasurer': 1.0,\n",
- " 'CITY OF BREA\\nMember, City Council': 1.0,\n",
- " 'CITY OF BUENA PARK\\nMember, City Council, District 4': 1.0,\n",
- " 'CITY OF COSTA MESA\\nMayor': 0.7619047619047619,\n",
- " 'CITY OF COSTA MESA\\nMember, City Council, District 1': 1.0,\n",
- " 'CITY OF COSTA MESA\\nMember, City Council, District 6': 1.0,\n",
- " 'CITY OF COSTA MESA \\nMember, City Council, District 2': 1.0,\n",
- " 'CITY OF CYPRESS\\nMember, City Council': 1,\n",
- " 'CITY OF FOUNTAIN VALLEY\\nMember, City Council': 1.0,\n",
- " 'CITY OF FULLERTON\\nMember, City Council, District 1': 1,\n",
- " 'CITY OF FULLERTON\\nMember, City Council, District 2': 1.0,\n",
- " 'CITY OF FULLERTON\\nMember, City Council, District 4': 1,\n",
- " 'CITY OF GARDEN GROVE\\nMayor': 1.0,\n",
- " 'CITY OF GARDEN GROVE\\nMember, City Council, District 2': 1,\n",
- " 'CITY OF GARDEN GROVE\\nMember, City Council, District 5': 0.8,\n",
- " 'CITY OF GARDEN GROVE\\nMember, City Council, District 6': 1.0,\n",
- " 'CITY OF HUNTINGTON BEACH\\nCity Clerk': 0.625,\n",
- " 'CITY OF HUNTINGTON BEACH\\nCity Treasurer': 0.625,\n",
- " 'CITY OF HUNTINGTON BEACH\\nMember, City Council': 1,\n",
- " 'CITY OF IRVINE\\nMayor': 1.0,\n",
- " 'CITY OF IRVINE\\nMember, City Council': 1.0,\n",
- " 'CITY OF LA HABRA\\nMember, City Council': 1,\n",
- " 'CITY OF LAGUNA BEACH\\nCity Clerk': 1.0,\n",
- " 'CITY OF LAGUNA BEACH\\nCity Treasurer': 1.0,\n",
- " 'CITY OF LAGUNA BEACH\\nMember, City Council': 1.0,\n",
- " 'CITY OF LAGUNA HILLS\\nMember, City Council': 1.0,\n",
- " 'CITY OF LAGUNA NIGUEL\\nMember, City Council': 1.0,\n",
- " 'CITY OF LAKE FOREST\\nMember, City Council, District 1': 1,\n",
- " 'CITY OF LAKE FOREST\\nMember, City Council, District 5': 1,\n",
- " 'CITY OF LOS ALAMITOS\\nMember, City Council, District 1': 1,\n",
- " 'CITY OF LOS ALAMITOS\\nMember, City Council, District 3': 1.0,\n",
- " 'CITY OF MISSION VIEJO\\nMember, City Council,\\nTwo-Year Term': 1.0,\n",
- " 'CITY OF NEWPORT BEACH\\nMember, City Council, District 2': 1,\n",
- " 'CITY OF NEWPORT BEACH\\nMember, City Council, District 5': 0.9130434782608695,\n",
- " 'CITY OF NEWPORT BEACH\\nMember, City Council, District 7': 0.4782608695652174,\n",
- " 'CITY OF ORANGE\\nCity Clerk': 0.3888888888888889,\n",
- " 'CITY OF ORANGE\\nCity Treasurer': 0.3888888888888889,\n",
- " 'CITY OF ORANGE\\nMayor': 1.0,\n",
- " 'CITY OF ORANGE\\nMember, City Council, District 2': 1,\n",
- " 'CITY OF ORANGE\\nMember, City Council, District 3': 1.0,\n",
- " 'CITY OF ORANGE\\nMember, City Council, District 5': 1.0,\n",
- " 'CITY OF PLACENTIA\\nCity Clerk, Short Term': 1.0,\n",
- " 'CITY OF PLACENTIA\\nCity Treasurer': 1.0,\n",
- " 'CITY OF PLACENTIA\\nMember, City Council, District 3': 1.0,\n",
- " 'CITY OF SAN CLEMENTE\\nCity Clerk': 0.9333333333333333,\n",
- " 'CITY OF SAN CLEMENTE\\nCity Treasurer': 0.9333333333333333,\n",
- " 'CITY OF SAN CLEMENTE\\nMember, City Council, Full Term': 1.0,\n",
- " 'CITY OF SAN CLEMENTE\\nMember, City Council, Short Term': 1,\n",
- " 'CITY OF SANTA ANA\\nMayor': 0.9102564102564102,\n",
- " 'CITY OF SANTA ANA\\nMember, City Council, Ward 1': 1.0,\n",
- " 'CITY OF SANTA ANA\\nMember, City Council, Ward 3': 1.0,\n",
- " 'CITY OF SANTA ANA\\nMember, City Council, Ward 5': 1.0,\n",
- " 'CITY OF SANTA ANAMayor': 1,\n",
- " 'CITY OF SANTA ANAMember, City Council, Ward 3': 1,\n",
- " 'CITY OF SEAL BEACH\\nMember, City Council, District 2': 1,\n",
- " 'CITY OF STANTON\\nMember, City Council, District 2': 1.0,\n",
- " 'CITY OF TUSTIN\\nMember, City Council': 1.0,\n",
- " 'CITY OF WESTMINSTER\\nMember, City Council, District 3': 1.0,\n",
- " 'COAST COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 1.0,\n",
- " 'COAST COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 4': 0.7,\n",
- " 'COSTA MESA SANITARY DISTRICT\\nDirector, Division 2': 1.0,\n",
- " 'COSTA MESA SANITARY DISTRICT\\nDirector, Division 4': 1.0,\n",
- " 'County Supervisor, 1st District': 1,\n",
- " 'DD-City of Westminster': 0.9230769230769231,\n",
- " 'EAST ORANGE COUNTY WATER DISTRICT\\nDirector': 1,\n",
- " 'FOUNTAIN VALLEY SCHOOL DISTRICT\\nGoverning Board Member': 1,\n",
- " 'GARDEN GROVE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 1.0,\n",
- " 'GARDEN GROVE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 0.8461538461538461,\n",
- " 'HUNTINGTON BEACH CITY SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 1,\n",
- " 'HUNTINGTON BEACH UNION HIGH SCHOOL DISTRICT\\nGoverning Board Member': 0.8285714285714286,\n",
- " 'IRVINE RANCH WATER DISTRICT\\nDirector, Division 4': 1.0,\n",
- " 'IRVINE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1,\n",
- " 'IRVINE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 1,\n",
- " 'LA HABRA CITY SCHOOL DISTRICT\\nGoverning Board Member,\\nFull Term': 1.0,\n",
- " 'LAGUNA BEACH UNIFIED SCHOOL DISTRICT\\nGoverning Board Member': 1.0,\n",
- " 'LOS ALAMITOS UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 1.0,\n",
- " 'LOS ALAMITOS UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 1.0,\n",
- " 'MEMBER OF THE STATE ASSEMBLY\\n55th District': 0.9142857142857143,\n",
- " 'MEMBER OF THE STATE ASSEMBLY\\n65th District': 0.4189189189189189,\n",
- " 'MEMBER OF THE STATE ASSEMBLY\\n68th District': 1.0,\n",
- " 'MEMBER OF THE STATE ASSEMBLY\\n69th District': 0.1978021978021978,\n",
- " 'MEMBER OF THE STATE ASSEMBLY\\n72nd District': 0.5444444444444444,\n",
- " 'MEMBER OF THE STATE ASSEMBLY\\n73rd District': 0.5,\n",
- " 'MEMBER OF THE STATE ASSEMBLY\\n74th District': 1.0,\n",
- " 'MEMBER OF THE STATE ASSEMBLY65th District': 1,\n",
- " 'MEMBER OF THE STATE ASSEMBLY68th District': 1.0,\n",
- " 'MEMBER OF THE STATE ASSEMBLY69th District': 1,\n",
- " 'MESA WATER DISTRICT\\nDirector, Division 2': 1.0,\n",
- " 'MIDWAY CITY SANITARY DISTRICT\\nDirector': 1.0,\n",
- " 'MUNICIPAL WATER DISTRICT OF ORANGE COUNTY\\nDirector, Division 1, Short Term': 0.7142857142857143,\n",
- " 'MUNICIPAL WATER DISTRICT OF ORANGE COUNTY\\nDirector, Division 3': 0.5740740740740741,\n",
- " 'MUNICIPAL WATER DISTRICT OF ORANGE COUNTY\\nDirector, Division 4': 1,\n",
- " 'MUNICIPAL WATER DISTRICT OF ORANGE COUNTY\\nDirector, Division 7': 1.0,\n",
- " 'MUNICIPAL WATER DISTRICT OF ORANGE COUNTYDirector, Division 1, Short Term': 1,\n",
- " 'NEWPORT-MESA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 1,\n",
- " 'NEWPORT-MESA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1,\n",
- " 'NEWPORT-MESA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 6': 1.0,\n",
- " 'NORTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 4': 1.0,\n",
- " 'NORTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 0.9615384615384616,\n",
- " 'NORTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 7': 1.0,\n",
- " 'NORTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICTGoverning Board Member,Trustee Area 5': 1.0,\n",
- " 'ORANGE COUNTY WATER DISTRICT\\nDirector, Division 4': 0.6595744680851063,\n",
- " 'ORANGE COUNTY WATER DISTRICT\\nDirector, Division 6': 1.0,\n",
- " 'ORANGE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 0.6129032258064516,\n",
- " 'ORANGE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1.0,\n",
- " 'ORANGE UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 6': 0.7096774193548387,\n",
- " 'ORANGE UNIFIED SCHOOL DISTRICTGoverning Board Member,Trustee Area 2': 1,\n",
- " 'ORANGE UNIFIED SCHOOL DISTRICTGoverning Board Member,Trustee Area 3': 1,\n",
- " 'ORANGE UNIFIED SCHOOL DISTRICTGoverning Board Member,Trustee Area 6': 1,\n",
- " 'PLACENTIA-YORBA LINDA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 1.0,\n",
- " 'PLACENTIA-YORBA LINDA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 1,\n",
- " 'PLACENTIA-YORBA LINDA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1.0,\n",
- " 'PLACENTIA-YORBA LINDA UNIFIED SCHOOL DISTRICTGoverning Board Member,Trustee Area 2': 1.0,\n",
- " 'PRESIDENT AND VICE PRESIDENT': 0.0984251968503937,\n",
- " 'Proposition 14': 0.7790927021696252,\n",
- " 'Proposition 15': 0.14595660749506903,\n",
- " 'Proposition 16': 0.06521739130434782,\n",
- " 'Proposition 17': 0.17296222664015903,\n",
- " 'Proposition 18': 0.06387225548902195,\n",
- " 'Proposition 19': 0.2544731610337972,\n",
- " 'Proposition 20': 0.0905587668593449,\n",
- " 'Proposition 21': 0.058333333333333334,\n",
- " 'Proposition 22': 0.06438631790744467,\n",
- " 'Proposition 23': 0.06438631790744467,\n",
- " 'Proposition 24': 0.11991869918699187,\n",
- " 'Proposition 25': 0.10772357723577236,\n",
- " 'Q-City of Costa Mesa': 0.7391304347826086,\n",
- " 'RANCHO SANTIAGO COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 3': 1.0,\n",
- " 'RANCHO SANTIAGO COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 5': 1.0,\n",
- " 'RANCHO SANTIAGO COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 7': 1.0,\n",
- " 'S-City of Fullerton': 1.0,\n",
- " 'SADDLEBACK VALLEY UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 1.0,\n",
- " 'SADDLEBACK VALLEY UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 4': 1.0,\n",
- " 'SANTA ANA UNIFIED SCHOOL DISTRICT\\nGoverning Board Member': 1,\n",
- " 'SANTA MARGARITA WATER DISTRICT\\nDirector': 1,\n",
- " 'SOUTH COAST WATER DISTRICT\\nDirector': 1,\n",
- " 'SOUTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 0.2571428571428571,\n",
- " 'SOUTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 6': 1.0,\n",
- " 'SOUTH ORANGE COUNTY COMMUNITY COLLEGE DISTRICT\\nGoverning Board Member,\\nTrustee Area 7': 0.6214285714285714,\n",
- " 'STATE SENATOR\\n29th District': 0.27586206896551724,\n",
- " 'STATE SENATOR\\n37th District': 0.6666666666666666,\n",
- " 'STATE SENATOR37th District': 1.0,\n",
- " 'SUNSET BEACH SANITARY DISTRICT\\nDirector, Full Term': 1,\n",
- " 'SURFSIDE COLONY STORM WATER PROTECTION DISTRICT\\nTrustee': 1,\n",
- " 'TUSTIN UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 1.0,\n",
- " 'TUSTIN UNIFIED SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 2': 1.0,\n",
- " 'U-City of Fullerton': 1.0,\n",
- " 'UNITED STATES REPRESENTATIVE\\n39th District': 1.0,\n",
- " 'UNITED STATES REPRESENTATIVE\\n45th District': 1.0,\n",
- " 'UNITED STATES REPRESENTATIVE\\n46th District': 0.12403100775193798,\n",
- " 'UNITED STATES REPRESENTATIVE\\n47th District': 0.5681818181818182,\n",
- " 'UNITED STATES REPRESENTATIVE\\n48th District': 0.6230769230769231,\n",
- " 'UNITED STATES REPRESENTATIVE\\n49th District': 0.4888888888888889,\n",
- " 'UNITED STATES REPRESENTATIVE39th District': 1,\n",
- " 'UNITED STATES REPRESENTATIVE46th District': 1.0,\n",
- " 'W-City of La Habra': 1.0,\n",
- " 'WESTMINSTER SCHOOL DISTRICT\\nGoverning Board Member,\\nTrustee Area 1': 1.0,\n",
- " 'X-City of La Habra': 1,\n",
- " 'Y-City of Los Alamitos': 0.9090909090909091,\n",
- " 'Z-City of Newport Beach': 1.0}"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "risk_function = \"kaplan_kolmogorov\" \n",
- "alpha = .05\n",
- "contests = vote_count_df[\"contest\"].unique()\n",
- "error_rate = .001\n",
- "\n",
- "\n",
- "#aggregate across styles to get sampling fraction to verify each contest\n",
- "contest_totals_df = vote_count_df.groupby([\"contest\",\"vote\"])[\"num_votes\"].sum().reset_index()\n",
- "sample_fractions = []\n",
- "margins = []\n",
- "#ballots = []\n",
- "for i in range(len(contests)):\n",
- " valid_votes = sorted(contest_totals_df[\"num_votes\"][(contest_totals_df[\"contest\"] == contests[i]) & (contest_totals_df[\"vote\"] != \"NA\")].tolist(), reverse = True)\n",
- " ballots_cast = sum(contest_totals_df[\"num_votes\"][contest_totals_df[\"contest\"] == contests[i]])\n",
- " #ballots = ballots.append(ballots_cast)\n",
- " #if there's only one ballot in the contest, check it.\n",
- " if ballots_cast == 1:\n",
- " sample_fractions.append(1)\n",
- " margins.append(0)\n",
- " continue\n",
- " #if there is only one option with valid votes, the next option received 0 (though we don't know what it is)\n",
- " if len(valid_votes) == 1:\n",
- " valid_votes.append(0)\n",
- " #Margins eventually need to be able to accomodate multiple winners\n",
- " m = (valid_votes[0] - valid_votes[1]) / ballots_cast\n",
- " margins.append(m)\n",
- " if m == 0:\n",
- " sample_fractions.append(1)\n",
- " continue\n",
- " #N might need to be more general to account for phantoms\n",
- " if risk_function == \"kaplan_markov\":\n",
- " risk_fn = lambda x: TestNonnegMean.kaplan_markov(x, g = .1)\n",
- " elif risk_function == \"kaplan_wald\":\n",
- " risk_fn = lambda x: TestNonnegMean.kaplan_wald(x, g = .1)\n",
- " elif risk_function == \"kaplan_kolmogorov\":\n",
- " risk_fn = lambda x: TestNonnegMean.kaplan_kolmogorov(x, N = ballots_cast, g = .1)\n",
- " elif risk_function == \"kaplan_martingale\":\n",
- " risk_fn = lambda x: TestNonnegMean.kaplan_martingale(x, N = ballots_cast, g = .1)[0]\n",
- " else:\n",
- " \"Input a valid risk_function.\"\n",
- "\n",
- " sample_fractions.append(TestNonnegMean.initial_sample_size(risk_function = risk_fn, margin = m, N = ballots_cast, alpha = alpha, error_rate = error_rate, u = 1, t = 1/2) / ballots_cast)\n",
- "#Uses S4, eventually we will want to use a more efficient method\n",
- "dict(zip(contests, sample_fractions))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/Jake/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py:6211: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
- "of pandas will change to not sort by default.\n",
- "\n",
- "To accept the future behavior, pass 'sort=False'.\n",
- "\n",
- "To retain the current behavior and silence the warning, pass 'sort=True'.\n",
- "\n",
- " sort=sort)\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "689.0428398005711"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "get_expected_sample_size(cvr_list, risk_function = \"kaplan_kolmogorov\", error_rate = 0, alpha = .05)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/Jake/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py:6211: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
- "of pandas will change to not sort by default.\n",
- "\n",
- "To accept the future behavior, pass 'sort=False'.\n",
- "\n",
- "To retain the current behavior and silence the warning, pass 'sort=True'.\n",
- "\n",
- " sort=sort)\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "702.0676967683497"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "get_expected_sample_size(cvr_list, risk_function = \"kaplan_kolmogorov\", error_rate = .001, alpha = .05)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/Jake/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py:6211: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
- "of pandas will change to not sort by default.\n",
- "\n",
- "To accept the future behavior, pass 'sort=False'.\n",
- "\n",
- "To retain the current behavior and silence the warning, pass 'sort=True'.\n",
- "\n",
- " sort=sort)\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "703.2841084058059"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "get_expected_sample_size(cvr_list, risk_function = \"kaplan_markov\", error_rate = 0, alpha = .05)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/Jake/miniconda3/lib/python3.6/site-packages/pandas/core/frame.py:6211: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version\n",
- "of pandas will change to not sort by default.\n",
- "\n",
- "To accept the future behavior, pass 'sort=False'.\n",
- "\n",
- "To retain the current behavior and silence the warning, pass 'sort=True'.\n",
- "\n",
- " sort=sort)\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "718.6366677980894"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "get_expected_sample_size(cvr_list, risk_function = \"kaplan_markov\", error_rate = .001, alpha = .05)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# example return ballots\n",
- "sorted_cvr_list = assign_random_number(cvr_list)\n",
- "get_ballots_threshold(sorted_cvr_list, sample_size_dict = {'Z-City of Newport Beach' : 5, \n",
- " 'UNITED STATES REPRESENTATIVE46th District' : 5},\n",
- " sampled_CVRs = [])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 31,
- "metadata": {},
- "outputs": [],
- "source": [
- "N = 5000\n",
- "alpha = 0.05\n",
- "m_null = 1 / 2\n",
- "\n",
- "true = np.concatenate((np.repeat(0, 990), np.repeat(1, 1010)))\n",
- "reported = np.concatenate((np.repeat(0, 990), np.repeat(1, 1010)))\n",
- "omega = reported - true\n",
- "v = 2 * np.mean(reported) - 1\n",
- "b = (1 - omega) / (2 - v)\n",
- "mu_0 = 1/2"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 32,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'SqKelly': array([2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n",
- " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n",
- " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n",
- " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n",
- " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n",
- " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n",
- " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n",
- " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n",
- " 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000, 2000,\n",
- " 2000])}"
- ]
- },
- "execution_count": 32,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "get_workloads(\n",
- " workload_dict = {\"SqKelly\": lambda x: get_workload_from_mart(\n",
- " x,\n",
- " mart_fn=lambda y: sqKelly_martingale(y, 1 / 2, N=N, D=20, beta=1),\n",
- " alpha=alpha,\n",
- " )},\n",
- " data = b\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "ename": "TypeError",
- "evalue": "unsupported operand type(s) for /: 'NoneType' and 'int'",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mTypeError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 14\u001b[0m np.repeat(1, valid_votes[0]))\n\u001b[1;32m 15\u001b[0m )\n\u001b[0;32m---> 16\u001b[0;31m \u001b[0msample_fractions\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mappend\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mget_sample_size_kelly\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mreported_votes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0malpha\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m.05\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 17\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/Dropbox/RLAs/Orange County Audit/hart_tools.py\u001b[0m in \u001b[0;36mget_sample_size_kelly\u001b[0;34m(reported_counts, alpha)\u001b[0m\n\u001b[1;32m 281\u001b[0m \u001b[0mnsim\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;36m100\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 282\u001b[0m )\n\u001b[0;32m--> 283\u001b[0;31m \u001b[0mexpected_workload\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmean\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mworkloads\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"SqKelly\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 284\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mexpected_workload\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/numpy/core/fromnumeric.py\u001b[0m in \u001b[0;36mmean\u001b[0;34m(a, axis, dtype, out, keepdims)\u001b[0m\n\u001b[1;32m 2918\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2919\u001b[0m return _methods._mean(a, axis=axis, dtype=dtype,\n\u001b[0;32m-> 2920\u001b[0;31m out=out, **kwargs)\n\u001b[0m\u001b[1;32m 2921\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2922\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m~/miniconda3/lib/python3.6/site-packages/numpy/core/_methods.py\u001b[0m in \u001b[0;36m_mean\u001b[0;34m(a, axis, dtype, out, keepdims)\u001b[0m\n\u001b[1;32m 85\u001b[0m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mret\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mret\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mrcount\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 86\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 87\u001b[0;31m \u001b[0mret\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mret\u001b[0m \u001b[0;34m/\u001b[0m \u001b[0mrcount\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 88\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 89\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mret\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mTypeError\u001b[0m: unsupported operand type(s) for /: 'NoneType' and 'int'"
- ]
- }
- ],
- "source": [
- "vote_count_df = votes_df\n",
- "contests = vote_count_df[\"contest\"].unique()\n",
- "#aggregate across styles to get sampling fraction to verify each contest\n",
- "contest_totals_df = vote_count_df.groupby([\"contest\",\"vote\"])[\"num_votes\"].sum().reset_index()\n",
- "sample_fractions = []\n",
- "for i in range(len(contests)):\n",
- "#construct a vector with N_w 1s, N_l 0s, and N_u 1/2s\n",
- "#N_w is the number of votes for the winner, N_l is the number of votes for the loser who almost one\n",
- " valid_votes = sorted(contest_totals_df[\"num_votes\"][(contest_totals_df[\"contest\"] == contests[i]) & (contest_totals_df[\"vote\"] != \"NA\")].tolist(), reverse = True)\n",
- " ballots_cast = sum(contest_totals_df[\"num_votes\"][contest_totals_df[\"contest\"] == contests[i]])\n",
- " reported_votes = np.concatenate(\n",
- " (np.repeat(0, valid_votes[1]),\n",
- " np.repeat(1/2, ballots_cast - valid_votes[0] - valid_votes[1]),\n",
- " np.repeat(1, valid_votes[0]))\n",
- " )\n",
- " sample_fractions.append(get_sample_size_kelly(reported_votes, alpha = .05) / ballots_cast)\n",
- " \n",
- "\n",
- " "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.10.4"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}
diff --git a/shangrla/core/Audit.py b/shangrla/core/Audit.py
index e8d9606..93338a2 100644
--- a/shangrla/core/Audit.py
+++ b/shangrla/core/Audit.py
@@ -607,7 +607,7 @@ def add_pool_contests(cls, cvrs: list["CVR"], tally_pools: dict) -> bool:
cvrs : list of CVR objects
the set to update with additional contests as needed
- tally_pools : dict
+ tally_pools dict
keys are tally_pool ids, values are sets of contests every CVR in that pool should have
Returns
@@ -615,7 +615,7 @@ def add_pool_contests(cls, cvrs: list["CVR"], tally_pools: dict) -> bool:
bool : True if any contest is added to any CVR
"""
added = False
- for c in cvrs:
+ for c in [d for d in cvrs if d.tally_pool in tally_pools.keys()]:
added = (
c.update_votes({con: {} for con in tally_pools[c.tally_pool]}) or added
) # note: order of terms matters!
@@ -2426,7 +2426,7 @@ def set_tally_pool_means(
sets self.tally_pool_means
"""
if not tally_pool:
- tally_pool = set(c.tally_pool for c in cvr_list)
+ tally_pool = set(c.tally_pool for c in cvr_list if c.pool)
tally_pool_dict = {}
for p in tally_pool:
tally_pool_dict[p] = {}
@@ -2436,7 +2436,7 @@ def set_tally_pool_means(
filtr = lambda c: c.has_contest(self.contest.id)
else:
filtr = lambda c: True
- for c in [cvr for cvr in cvr_list if filtr(cvr)]:
+ for c in [cvr for cvr in cvr_list if (filtr(cvr) and cvr.pool)]:
tally_pool_dict[c.tally_pool]["n"] += 1
tally_pool_dict[c.tally_pool]["tot"] += self.assort(c)
self.tally_pool_means = {}
diff --git a/shangrla/formats/Dominion.py b/shangrla/formats/Dominion.py
index 5abdf35..08025fb 100644
--- a/shangrla/formats/Dominion.py
+++ b/shangrla/formats/Dominion.py
@@ -489,6 +489,7 @@ def sample_from_cvrs(cls, cvr_list: list, manifest: list, sample: np.array):
for i, s in enumerate(sample):
cvr_sample.append(cvr_list[s])
cvr_id = cvr_list[s].id
+ card_in_batch = cvr_list[s].card_in_batch
tab, batch, card_num = cvr_id.split("-")
card_id = f"{tab}-{batch}-{card_num}"
if not cvr_list[s].phantom:
@@ -499,7 +500,7 @@ def sample_from_cvrs(cls, cvr_list: list, manifest: list, sample: np.array):
card = [manifest_row["VBMCart.Cart number"], manifest_row["Tray #"]] + [
tab,
batch,
- card_num,
+ card_in_batch,
card_id,
]
else:
diff --git a/tests/core/test_Assertion.py b/tests/core/test_Assertion.py
index 2938a3f..69e1cb1 100644
--- a/tests/core/test_Assertion.py
+++ b/tests/core/test_Assertion.py
@@ -319,11 +319,11 @@ def test_rcv_assorter(self):
assert assorter.assort(votes) == 0.5, f'{assorter.assort(votes)=}'
def test_set_tally_pool_means(self):
- cvr_dicts = [{'id': 1, 'tally_pool': '1', 'votes': {'AvB': {'Alice': 1}, 'CvD': {'Candy':True}}},
- {'id': 2, 'tally_pool': '1', 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}},
- {'id': 3, 'tally_pool': '1', 'votes': {'GvH': {}}},
- {'id': 4, 'tally_pool': '2', 'votes': {'AvB': {'Bob': 1}, 'CvD': {'Candy':True}}},
- {'id': 5, 'tally_pool': '2', 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}}
+ cvr_dicts = [{'id': 1, 'tally_pool': '1', 'pool': True, 'votes': {'AvB': {'Alice': 1}, 'CvD': {'Candy':True}}},
+ {'id': 2, 'tally_pool': '1', 'pool': True, 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}},
+ {'id': 3, 'tally_pool': '1', 'pool': True, 'votes': {'GvH': {}}},
+ {'id': 4, 'tally_pool': '2', 'pool': True, 'votes': {'AvB': {'Bob': 1}, 'CvD': {'Candy':True}}},
+ {'id': 5, 'tally_pool': '2', 'pool': True, 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}}
]
cvr_list = CVR.from_dict(cvr_dicts)
pool_set = set(c.tally_pool for c in cvr_list)
@@ -343,11 +343,11 @@ def test_set_tally_pool_means(self):
np.testing.assert_almost_equal(self.raw_AvB_asrtn.assorter.tally_pool_means['2'], (0+1/2)/2)
#
# with use_style, without adding contests to every CVR in each pool
- cvr_dicts = [{'id': 1, 'tally_pool': '1', 'votes': {'AvB': {'Alice': 1}, 'CvD': {'Candy':True}}},
- {'id': 2, 'tally_pool': '1', 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}},
- {'id': 3, 'tally_pool': '1', 'votes': {'GvH': {}}},
- {'id': 4, 'tally_pool': '2', 'votes': {'AvB': {'Bob': 1}, 'CvD': {'Candy':True}}},
- {'id': 5, 'tally_pool': '2', 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}}
+ cvr_dicts = [{'id': 1, 'tally_pool': '1', 'pool': True, 'votes': {'AvB': {'Alice': 1}, 'CvD': {'Candy':True}}},
+ {'id': 2, 'tally_pool': '1', 'pool': True, 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}},
+ {'id': 3, 'tally_pool': '1', 'pool': True, 'votes': {'GvH': {}}},
+ {'id': 4, 'tally_pool': '2', 'pool': True, 'votes': {'AvB': {'Bob': 1}, 'CvD': {'Candy':True}}},
+ {'id': 5, 'tally_pool': '2', 'pool': True, 'votes': {'CvD': {'Elvis':True, 'Candy':False}, 'EvF': {}}}
]
cvr_list = CVR.from_dict(cvr_dicts)
print(f'{list([str(c) for c in cvr_list])}')
diff --git a/tests/core/test_CVR.py b/tests/core/test_CVR.py
index 0773373..8f2b3ad 100644
--- a/tests/core/test_CVR.py
+++ b/tests/core/test_CVR.py
@@ -314,24 +314,24 @@ def test_tabulate_votes(self):
def test_set_card_in_batch_lex(self):
cvrs = [CVR(id="B-100", votes={"city_council": {"Alice": 1}, "measure_1": {"yes": 1}}, phantom=False,
tally_pool="A"),
- CVR(id="B-90", votes={"city_council": {"Bob": 1}, "measure_1": {"yes": 1}}, phantom=False),
+ CVR(id="B-90", votes={"city_council": {"Bob": 1}, "measure_1": {"yes": 1}}, phantom=False,
tally_pool="A"),
- CVR(id="A-1", votes={"city_council": {"Bob": 1}, "measure_1": {"no": 1}}, phantom=False),
+ CVR(id="A-1", votes={"city_council": {"Bob": 1}, "measure_1": {"no": 1}}, phantom=False,
tally_pool="A"),
- CVR(id="A-20", votes={"city_council": {"Charlie": 1}}, phantom=False),
+ CVR(id="A-20", votes={"city_council": {"Charlie": 1}}, phantom=False,
tally_pool="A"),
- CVR(id="C-50", votes={"city_council": {"Doug": 1}}, phantom=False),
+ CVR(id="C-50", votes={"city_council": {"Doug": 1}}, phantom=False,
tally_pool="B"),
- CVR(id="6", votes={"measure_1": {"no": 1}}, phantom=False),
+ CVR(id="6", votes={"measure_1": {"no": 1}}, phantom=False,
tally_pool="B"),
CVR(id="7-B", votes={"city_council": {"Alice": 1}, "measure_1": {"yes": 1}, "measure_2": {"no":1}},
- phantom=False),
- tally_pool="B"),
+ phantom=False,
+ tally_pool="B"),
CVR(id="7-A", votes={"measure_1": {"no": 1}, "measure_2": {"yes": 1}}, phantom=False,
tally_pool="B")
]
tally_pool = {"A": ""}
- tally_pool_dict = CVR.set_card_in_batch_lex(cls, cvr_list=cvrs)
+ tally_pool_dict = CVR.set_card_in_batch_lex(cvr_list=cvrs)
assert cvrs[0].card_in_batch == 2
assert cvrs[1].card_in_batch == 3
assert cvrs[2].card_in_batch == 0
| | |