#!/usr/bin/env python3 # # This script should produce plots like the ones displayed here: # # https://sciting.eu/sciting/samples/covid-19-in-israel/ # # It is extremely ugly, dirty and whimsical in a negative sense # Sorry, I have no time for this anymore # For the dependecies, please, check the import lines and the comments import locale locale.setlocale(locale.LC_TIME, 'en_US.utf8') import pandas as pd import urllib.request #import requests from datetime import datetime from datetime import timedelta import matplotlib.pyplot as plt import matplotlib.dates as mdates import matplotlib.patches as mpatches from matplotlib.dates import date2num #plt.rcParams['date.epoch'] = '1970-01-01T00:00:00' #pd.plotting.deregister_matplotlib_converters() from selenium import webdriver from selenium.webdriver.common.by import By from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.webdriver.chrome.options import Options import time import glob import os cwd = os.getcwd() down = cwd + '/' # Clean up the debris from previous runs # This is a very unsafe way of doing things, # so you better run the script # from its own empty directory for f in os.listdir(down): if not f.endswith('.csv'): continue os.remove(os.path.join(down, f)) # Vaccination milestones url = 'https://raw.githubusercontent.com/owid/covid-19-data/master/public/data/vaccinations/country_data/Israel.csv' with urllib.request.urlopen(url) as response, open(cwd + '/Israel_Vaccination_OWID_raw.csv', 'wb') as out_file: data = response.read() out_file.write(data) df_vac = pd.read_csv(cwd + '/Israel_Vaccination_OWID_raw.csv', sep=',', usecols=['date', 'people_vaccinated']).fillna(0) df_vac['date'] =pd.to_datetime(df_vac.date) df_vac['percentage_vaccinated'] = df_vac['people_vaccinated'].div(90530).round(0).astype('Int64') zr = df_vac.loc[df_vac.percentage_vaccinated == 0,'percentage_vaccinated'].index[0] tf = df_vac.loc[df_vac.percentage_vaccinated == 25,'percentage_vaccinated'].index[0] ft = df_vac.loc[df_vac.percentage_vaccinated == 50,'percentage_vaccinated'].index[0] #sf = df_vac.loc[df_vac.percentage_vaccinated == 75,'percentage_vaccinated'].index[0] dzr = df_vac.iloc[zr, 0].date() dtf = df_vac.iloc[tf, 0].date() dft = df_vac.iloc[ft, 0].date() #dsf = df_vac.iloc[ft, 0] df_vac.to_csv(cwd + '/Israel_Vaccination_OWID.csv', index=False) # # Download the test and hospitalisation data from # # https://data.gov.il/dataset/covid-19/ # # Requires the Google Chrome browser and the Selenium # Chrome WebDriver which must be in your path # prefs = { 'download': { 'default_directory': down, 'prompt_for_download': False, 'directory_upgrade': True, }, } chrome_options = Options() chrome_options.add_experimental_option("prefs", prefs) chrome_options.add_argument('--disable-extensions') chrome_options.add_argument('disable-blink-features=AutomationControlled') chrome_options.add_experimental_option('excludeSwitches', ['enable-automation']) chrome_options.add_experimental_option('useAutomationExtension', False) # The site is protected against bot scraping # Headless mode won't work # A browser window will pop up and vanish after a while #chrome_options.headless = True hf = glob.glob('corona_hospitalization_ver_*.csv') tf = glob.glob('corona_tested_individuals_ver_*.csv') url = 'https://data.gov.il/dataset/covid-19/resource/e4bf0ab8-ec88-4f9b-8669-f2cc78273edd' browser = webdriver.Chrome(options=chrome_options) wait = WebDriverWait(browser, 60) browser.get(url) wait.until(EC.presence_of_element_located((By.ID, 'content'))) browser.find_element_by_xpath('//*[@id="content"]/div[3]/section/div/p/a').click() while len(hf) == 0: hf = glob.glob('corona_hospitalization_ver_*.csv') time.sleep(3) url2 = 'https://data.gov.il/dataset/covid-19/resource/d337959a-020a-4ed3-84f7-fca182292308' browser.get(url2) wait.until(EC.presence_of_element_located((By.ID, 'content'))) browser.find_element_by_xpath('//*[@id="content"]/div[3]/section/div/p/a').click() while len(tf) == 0: tf = glob.glob('corona_tested_individuals_ver_*.csv') time.sleep(3) browser.quit() hfile = str(hf[0]) tfile = str(tf[0]) ############################## # # # POSITIVITY RATIO # # (POSITIVES PER 1000 TESTS) # # # ############################## df = pd.read_csv(cwd + '/' + tfile, sep=',', usecols=['test_date', 'corona_result']).fillna(0) df.columns = ['Date', 'Result'] df['Result'] = df['Result'].replace(['שלילי'], 'Negative') df['Result'] = df['Result'].replace(['חיובי'], 'Positive') df['Result'] = df['Result'].replace(['אחר'], 'Other') df = df[df['Result'] != 'Other'] #df['Total'] = df.groupby('Date').transform('count') df['Total'] = df.groupby('Date')['Date'].transform('count') df = df[df['Result'] == 'Positive'] df['Positives'] = df.groupby('Date')['Date'].transform('count') df.drop_duplicates(inplace=True) df.drop('Result', axis=1, inplace=True) df['P_Ratio'] = ((df['Positives'] / df['Total']) * 1000).round() df.to_csv(cwd + '/Israel_Covid19_Positive_Ratio.csv', index=False) ### df['Date'] = pd.to_datetime(df['Date']).dt.date #dzr = datetime.strptime('20201219', "%Y%m%d").date() #dtf = datetime.strptime('20210127', "%Y%m%d").date() #dft = datetime.strptime('20210212', "%Y%m%d").date() df['Colour'] = 'blue' #df.loc[(df['Date'] >= dzr) & (df['Date'] <= dzr), 'Colour'] = 'orange' #df.loc[(df['Date'] >= datef) & (df['Date'] <= datef), 'Colour'] = 'orange' last_date = str(df['Date'].max()) ########### # PLOT PR # ########### plt.rcParams['figure.figsize'] = (10,5) plt.rcParams['xtick.labelsize'] = 6 fig, ax = plt.subplots() plt.bar(df['Date'], df['P_Ratio'], data=None) ax.autoscale(enable=True, axis='x', tight=True) #ax = df.plot(kind='bar',x='Date',y='P_Ratio', color=df.Colour) ax.xaxis.set_major_locator(mdates.DayLocator(interval=7)) ax.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y')) ax.yaxis.grid(True) ax.set_axisbelow(True) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) fig.autofmt_xdate() #plt.gca().invert_xaxis() #plt.grid() plt.title('Daily COVID-19 Positives per 1000 Tests Israel') plt.tight_layout() lockdown = mpatches.Patch(color='red', alpha=0.1, label='Lockdowns') orangep = mpatches.Patch(color='orange', alpha=0.3, label='0 | 25 | 50% vaccinated') plt.legend(handles=[orangep,lockdown], frameon=False) #ax.xaxis.label.set_visible(False) ax.xaxis.set_label_text('sciting.eu', color='#22697f') ax.axvspan(*mdates.datestr2num(['2020-04-07', '2020-04-16']), color='red', alpha=0.1, lw=0) ax.axvspan(*mdates.datestr2num(['2020-09-11', '2020-11-01']), color='red', alpha=0.1, lw=0) ax.axvspan(*mdates.datestr2num(['2020-12-27', last_date]), color='red', alpha=0.1, lw=0) ax.axvspan(*mdates.datestr2num([str(dzr), str(dzr + timedelta(days=1))]), color='orange', alpha=0.3, lw=0) ax.axvspan(*mdates.datestr2num([str(dtf), str(dtf + timedelta(days=1))]), color='orange', alpha=0.3, lw=0) ax.axvspan(*mdates.datestr2num([str(dft), str(dft + timedelta(days=1))]), color='orange', alpha=0.3, lw=0) plt.savefig(cwd + '/Israel_Covid19_Positive_Ratio.png', dpi=300, bbox_inches='tight') plt.close() #################### # # # HOSPITALISATIONS # # # #################### df = pd.read_csv(cwd + '/' + hfile, sep=',', usecols=['תאריך','מאושפזים','חולים קל','חולים בינוני','חולים קשה']).fillna(0) df.columns = ['Date','Total','Mild','Moderate','Severe'] df['Moderate'] = df['Moderate'].replace(['<15'], 7) df['Severe'] = df['Severe'].replace(['<15'], 7) df[['Total','Mild','Moderate','Severe']] = df[['Total','Mild','Moderate','Severe']].astype('int') df.to_csv(cwd + '/Israel_Covid19_Hospitalisations.csv', index=False) df['Date'] = pd.to_datetime(df['Date']).dt.date #dzr = datetime.strptime('20201219', "%Y%m%d").date() #dtf = datetime.strptime('20210127', "%Y%m%d").date() #dft = datetime.strptime('20210212', "%Y%m%d").date() df['Colour'] = 'blue' #df.loc[(df['Date'] >= dzr) & (df['Date'] <= dzr), 'Colour'] = 'orange' #df.loc[(df['Date'] >= datef) & (df['Date'] <= datef), 'Colour'] = 'orange' last_date = str(df['Date'].max()) ########## # PLOT H # ########## plt.rcParams['figure.figsize'] = (10,5) plt.rcParams['xtick.labelsize'] = 6 columnh = ['Total','Mild','Moderate','Severe'] for ch in columnh: print(ch) # ax = df.plot(kind='bar', x='Date', y = ch, color=df.Colour) fig, ax = plt.subplots() plt.bar(df['Date'], df[ch], data=None) ax.autoscale(enable=True, axis='x', tight=True) ax.xaxis.set_major_locator(mdates.DayLocator(interval=7)) ax.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y')) ax.yaxis.grid(True) ax.set_axisbelow(True) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) fig.autofmt_xdate() #plt.grid() plt.title('Israel Daily COVID-19 Hospitalisations ' + ch) plt.tight_layout() lockdown = mpatches.Patch(color='red', alpha=0.1, label='Lockdowns') orangep = mpatches.Patch(color='orange', alpha=0.3, label='0 | 25 | 50% vaccinated') plt.legend(handles=[orangep,lockdown], frameon=False) #ax.xaxis.label.set_visible(False) ax.xaxis.set_label_text('sciting.eu', color='#22697f') ax.axvspan(*mdates.datestr2num(['2020-04-07', '2020-04-16']), color='red', alpha=0.1, lw=0) ax.axvspan(*mdates.datestr2num(['2020-09-11', '2020-11-01']), color='red', alpha=0.1, lw=0) ax.axvspan(*mdates.datestr2num(['2020-12-27', last_date]), color='red', alpha=0.1, lw=0) ax.axvspan(*mdates.datestr2num([str(dzr), str(dzr + timedelta(days=1))]), color='orange', alpha=0.3, lw=0) ax.axvspan(*mdates.datestr2num([str(dtf), str(dtf + timedelta(days=1))]), color='orange', alpha=0.3, lw=0) ax.axvspan(*mdates.datestr2num([str(dft), str(dft + timedelta(days=1))]), color='orange', alpha=0.3, lw=0) plt.savefig(cwd + '/Israel_Covid19_Hospitalisations_' + ch + '.png', dpi=300, bbox_inches='tight') plt.close() ########## # # # DEATHS # # # ########## # We download the mortality data from the WHO because # it's more up-to-date than the CSV files from Israeli # sources which are only issued once every few days # They may be parsing the daily PDF files # Also the WHO dataset site doesn't fight bots as fiercely # as the Israel government url = 'https://covid19.who.int/WHO-COVID-19-global-data.csv' if os.path.exists(cwd + '/COVID19_worldwide_raw.csv'): os.remove(cwd + '/COVID19_worldwide_raw.csv') opener = urllib.request.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] urllib.request.install_opener(opener) urllib.request.urlretrieve(url, cwd + '/COVID19_worldwide_raw.csv') df = pd.read_csv(cwd + '/COVID19_worldwide_raw.csv', sep=',').fillna(0) df = df[['Date_reported', 'Country', 'New_deaths']] df = df[df['Country'].str.match('Israel')==True] df = df[df['New_deaths'] != 0] df['Deaths_10M'] = (df['New_deaths'] * 10) / 9.053 df['Date_reported'] = pd.to_datetime(df['Date_reported']).dt.date #dzr = datetime.strptime('20201219', "%Y%m%d").date() #dtf = datetime.strptime('20210127', "%Y%m%d").date() #dft = datetime.strptime('20210212', "%Y%m%d").date() df['Colour'] = 'blue' #df.loc[(df['Date_reported'] >= dzr) & (df['Date_reported'] <= dzr), 'Colour'] = 'orange' #df.loc[(df['Date_reported'] >= datef) & (df['Date_reported'] <= datef), 'Colour'] = 'orange' last_date = str(df['Date_reported'].max()) df.to_csv(cwd + '/Israel_Covid19_Mortality.csv', index=False) ########## # PLOT D # ########## plt.rcParams['figure.figsize'] = (10,5) plt.rcParams['xtick.labelsize'] = 6 fig, ax = plt.subplots() plt.bar(df['Date_reported'], df['Deaths_10M'], data=None) ax.autoscale(enable=True, axis='x', tight=True) ax.xaxis.set_major_locator(mdates.DayLocator(interval=7)) ax.xaxis.set_major_formatter(mdates.DateFormatter('%d-%m-%Y')) ax.yaxis.grid(True) ax.set_axisbelow(True) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) ax.axvspan(*mdates.datestr2num(['2020-04-07', '2020-04-16']), facecolor='red', alpha=0.1, lw=0) ax.axvspan(*mdates.datestr2num(['2020-09-11', '2020-11-01']), color='red', alpha=0.1, lw=0) ax.axvspan(*mdates.datestr2num(['2020-12-27', last_date]), color='red', alpha=0.1, lw=0) ax.axvspan(*mdates.datestr2num([str(dzr), str(dzr + timedelta(days=1))]), color='orange', alpha=0.3, lw=0) ax.axvspan(*mdates.datestr2num([str(dtf), str(dtf + timedelta(days=1))]), color='orange', alpha=0.3, lw=0) ax.axvspan(*mdates.datestr2num([str(dft), str(dft + timedelta(days=1))]), color='orange', alpha=0.3, lw=0) fig.autofmt_xdate() #plt.grid() plt.title('Daily COVID-19 Deaths in Israel per 10M') plt.tight_layout() lockdown = mpatches.Patch(color='red', alpha=0.1, label='Lockdowns') orangep = mpatches.Patch(color='orange', alpha=0.3, label='0 | 25 | 50% vaccinated') plt.legend(handles=[orangep,lockdown], frameon=False) #ax.xaxis.label.set_visible(False) ax.xaxis.set_label_text('sciting.eu', color='#22697f') #plt.show() plt.savefig(cwd + '/Israel_Covid19_Deaths_Daily.png', dpi=300, bbox_inches='tight') plt.close()