Skip to content

Commit

Permalink
update collection and colors
Browse files Browse the repository at this point in the history
  • Loading branch information
gnanduru1 committed Jan 17, 2022
1 parent fb50352 commit 0eba93f
Show file tree
Hide file tree
Showing 26 changed files with 148 additions and 14,736 deletions.
1 change: 0 additions & 1 deletion big_countries.json

This file was deleted.

1 change: 0 additions & 1 deletion color.json

This file was deleted.

336 changes: 87 additions & 249 deletions color.py
Original file line number Diff line number Diff line change
@@ -1,259 +1,97 @@
import os
import json, time
import numpy as np
import pandas as pd
from math import factorial
import matplotlib.pyplot as plt
from scipy.optimize import curve_fit
import sys
import unidecode
import matplotlib.pyplot as plt
import json

#ARGUMENTS ARE COUNTRY OR COUNTRY REGION
#RETURNS CURRENT STATE OF THE COUNTRY AS VALUE FROM 0 to 1. 1 IS GOOD AND 0 IS BAD
#Reads the csv file then puts the values into regionDict. This is sorted in the following way ->
#regionDict["region, country"] = [(coordinates), (date, number of cases) <- for every date that is given]

printFile = 'color.json'

def run(file, typeOfDisplay, typeOfAnalysis):
regionDict = {}

with open(file) as f:
tempDict = json.load(f)

for place in tempDict:
regionDict[place] = []
for date in tempDict[place]:
regionDict[place].append((date, tempDict[place][date]))

retDict = {}
for place in regionDict:
retDict[place] = displayRegion(place, regionDict, typeOfAnalysis)

if typeOfDisplay == "Normal":
if typeOfAnalysis == "linear":
for i in retDict:
if retDict[i] == -10:
retDict[i] ="(0, 0, 0)"
continue
if retDict[i] == -2.4:
color = 1
meaning = "No Cases"
elif retDict[i] >= 1.5:
color = 0
meaning = "Exponential Increase"
elif retDict[i] >= 1:
color = 1/6
meaning = "Decreasing Slope"
elif retDict[i] >= .5:
color = 2/6
meaning = "Near 0, but increasing"
elif retDict[i] >= 0:
color = 3/6
meaning = "Near 0, but decreasing"
elif retDict[i] >= -.5:
color = 4/6
meaning = "Cases decreasing, but by less and less"
elif retDict[i] >= -1:
color = 5/6
meaning = "Cases decreasing by more and more"
color = str(numToColor(color))
retDict[i] = color.replace(" ", "")
else:
for i in retDict:
color = str(numToColor(float(retDict[i])))
retDict[i] = color.replace(" ", "")

elif typeOfDisplay == "Relative":
newArray = []
for i in retDict:
newArray.append((retDict[i], i))
newArray.sort()
if typeOfAnalysis != "linear":
newArray = newArray[::-1]
sortedArray = []
for i in range(1,7):
sortedArray.append(newArray[ int(len(newArray)*(i-1)/6) :int(len(newArray)*i/6)+1])

for i in range(len(sortedArray)):
for place in sortedArray[i]:
color = str(numToColor(1-((i+1)/6)))
retDict[place[1]] = color.replace(" ", "")

with open(printFile, 'w') as f: json.dump(retDict, f, ensure_ascii=False)

def displayRegion(region, regionDict, t):
data = sorted(regionDict[region][::-1])
temp = {}
for i in data:
if i[0] not in temp:
temp[i[0]] = 0
temp[i[0]] += i[1]

data = []
for i in temp:
data.append((i, temp[i]))

x = [i for i in range(len(data))][:-1]
y = [float(i[1]) for i in data][:-1]

changeY = []
for i in range(1,len(y)):
changeY.append(y[i]-y[i-1])

daysBack = 15
if x and y and changeY:
if t == "linear":
temp = linearFit(x,y,changeY,daysBack)
else:
temp = reopenState(y,daysBack)
else:
return (-10)
return temp


def origional(x, y, changeY):
color = 0
meaning = ""
if x[-1] == 0:
color = 1
meaning = "No Cases"
elif sum(changeY[-3:])/3 < 0:
color = .75
meaning = "Decreasing"
elif (sum(changeY[-3:])/3) < .01*x[-1]:
color = .5
meaning = "Plateau"
elif sum(changeY[-3:])<sum(changeY[-6:-3]):
color = .25
meaning = "Decreasing Slope"
else:
color = 0
meaning = "Increasing Slope"

color = str(numToColor(color))

return (color.replace(" ", ""), meaning)

def linearFit(x,y,changeY, daysBack):
newX = x[-daysBack:]
newY = y[-daysBack:]

newChangeY = changeY[-daysBack:]

if len(newChangeY) != len(newX):
newX = newX[:-len(newChangeY)]
newY = newY[:-len(newChangeY)]

avg = (sum([x for x in newChangeY]))
return (max(newY) - min(newY))/(max(newY))

if newY[-1] == 0:
changeNum = -3
elif abs(avg) < .03*y[-1]:
changeNum = 0
elif avg > 0:
# if avg > .07*y[-1]:
# changeNum = 1.25
# else:
changeNum = 1
else:
changeNum = -1

if sum(newY) and sum(newX):
b, a = best_fit(newX, newChangeY)
else:
return -10
RED = '#ff0000'
R_ORANGE = '#ff5400'
ORANGE = '#ffb700'
YELLOW = '#ffff00'
Y_GREEN = '#c8ff00'
GREEN = '#00ff00'
GREY = "#808080"

input_file = 'data/all.json'
output_file = 'data/color2.json'

def savitzky_golay(y, window_size, order, deriv=0, rate=1):
try:
window_size = np.abs(np.int(window_size))
order = np.abs(np.int(order))
except ValueError:
raise ValueError("window_size and order have to be of type int")
if window_size % 2 != 1 or window_size < 1:
raise TypeError("window_size size must be a positive odd number")
if window_size < order + 2:
raise TypeError("window_size is too small for the polynomials order")
order_range = range(order+1)
half_window = (window_size -1) // 2
# precompute coefficients
b = np.mat([[k**i for i in order_range] for k in range(-half_window, half_window+1)])
m = np.linalg.pinv(b).A[deriv] * rate**deriv * factorial(deriv)
# pad the signal at the extremes with
# values taken from the signal itself
firstvals = y[0] - np.abs( y[1:half_window+1][::-1] - y[0] )
lastvals = y[-1] + np.abs(y[-half_window-1:-1][::-1] - y[-1])
y = np.concatenate((firstvals, y, lastvals))
return np.convolve( m[::-1], y, mode='valid')

def run():
colorDict = {}
with open(input_file) as f:
df = pd.read_json(f)
cnt = 0
for name in set(df.columns):
cnt += 1
target = df[name].sort_index().dropna()
dates = np.asarray([(i-target.index[0]).days for i in target.index])
dates = dates[::len(dates)//40 or 1]
cases = target.values[::len(target.values)//40 or 1]
if cases[-1] < 10:
colorDict[name] = GREEN
continue
if len(cases)<5:
colorDict[name] = GREY
continue

# Adds the last three values for the 1st/2nd derivatives of the cases vs. time graph
# Examine both of these to determine the behavior of cases vs. time graph
d1 = savitzky_golay(cases, 51, 3, deriv=1)
d2 = savitzky_golay(cases, 51, 3, deriv=2)
slope = sum(d1[-3:])
concavity = sum(d2[-3:])

if slope < 0:
colorDict[name] = Y_GREEN
elif slope < 20:
colorDict[name] = YELLOW
elif slope > 0 and concavity < 0:
colorDict[name] = ORANGE
elif slope > 0 and abs(concavity) < 1:
colorDict[name] = R_ORANGE
elif slope > 0 and concavity > 0:
colorDict[name] = RED

# GRAPHICAL DISPLAY
# WILL KEEP SHOWING DATA + CORRESPONDING COLOR UNTIL CTRL+C IS PRESSED
# CLOSE PLOT WINDOW TO SHOW NEXT REGION

# fig, axs = plt.subplots(1,3)
# axs[0].scatter(dates, cases, s=2, c=colorDict[name])
# axs[1].scatter(dates, d1, s=2,c=colorDict[name])
# axs[2].scatter(dates, d2, s=2,c=colorDict[name])
# fig.canvas.set_window_title(name)
# plt.show()
# plt.close()

print('{}/{}'.format(cnt, len(df.columns)), end='\r')

if a == 0:
temp = .5
elif a > 0:
temp = .5 + a/9000000000
elif a < 0:
temp = -a/9000000000

colorNum = temp
if changeNum == -3:
colorNum = -2.4
else:
colorNum += changeNum

return colorNum
# plt.clf()
# plt.scatter(newX, newChangeY)
# yfit = [a + b * xi for xi in newX]
# plt.plot(newX, yfit)

def reopenState(y,daysBack):
# Specific example
tmSqnc = y[-daysBack:]
deltas = [n-tmSqnc[j] for j,n in enumerate(tmSqnc[1:])]
# deltas = [23, 45, 45, 17, 12, -7, -9, 2, 0, -6]
zo = "".join([str(0+(n>0)) for n in deltas])
# zo = zero/one string
last = 1+zo.rfind('1') # 1+ => better normalization
pct = zo.count('1')
badness= pct*last / len(zo)**2
# negative = not not [x for x in deltas if x < 0]
# if badness != 1 or negative:
# print(badness)
# print (deltas)
return 1-badness

# Comparison for a range of 0...2**bits
bits = daysBack
lstZOs = [format(n,"0{}b".format(bits)) for n in range(2**bits)] # Generated every bit string in range

dctBad = {zo:(1+zo.rfind('1'))*zo.count('1') for zo in lstZOs}
showBad= {n:{zo for zo in dctBad if dctBad[zo]==n} for n in sorted(dctBad.values())}
for n in showBad: print(n/bits**2,showBad[n])

# The division normalizes from 0 to 1

def best_fit(X, Y):

xbar = sum(X)/len(X)
ybar = sum(Y)/len(Y)
n = len(X) # or len(Y)

numer = sum([xi*yi for xi,yi in zip(X, Y)]) - n * xbar * ybar
denum = sum([xi**2 for xi in X]) - n * xbar**2

b = numer / denum
a = ybar - b * xbar

return a, b

#Takes number 0 (bad) to 1 (good) and returns tuple of color
def numToColor(num):
red = 255
green = 0
blue = 0

if num <= .5:
green = int(255 * (num/.5))
else:
green = 255
red = int (255 - (255 * (num-.5)/.5))

return ((red,green,blue))

def translate(value, leftMin, leftMax, rightMin, rightMax):
leftSpan = leftMax - leftMin
rightSpan = rightMax - rightMin

valueScaled = float(value - leftMin) / float(leftSpan)
return rightMin + (valueScaled * rightSpan)

#ACCEPTS COUNTRY OR COUNTRY REGION
#if len(sys.argv) == 3:
# print (displayRegion(sys.argv[2] + ", " + sys.argv[1], regionDict))
#elif len(sys.argv) == 2:
# print (displayRegion(sys.argv[1], regionDict))

#Uncomment to show graph
#plt.show()
def crawl():
run("global.json", "Relative", "linear")
with open(output_file, 'w') as f: json.dump(colorDict, f, ensure_ascii=False)

if __name__ == '__main__':
crawl()
run()
Loading

0 comments on commit 0eba93f

Please sign in to comment.