Website/slides/talks/2020-1-CIDR-Vizier/notes.txt

77 lines
2.5 KiB
Plaintext

--- Salary ---
1n9lRY5NxHjmXfqZfXJxytBqznzP_vWOPfSbv2rL-T38/Form Responses 1
--- by_language ---
SELECT PRIMARY_LANGUAGE_TECHNOLOGY_STACK AS LANGUAGE, COUNT(*) as tot,
AVG(HOW_MANY_YEARS_HAVE_YOU_WORKED_IN_TECH) as YEARSWORKED,
MIN(HOW_MANY_YEARS_HAVE_YOU_WORKED_IN_TECH) as yearsworked_min,
MAX(HOW_MANY_YEARS_HAVE_YOU_WORKED_IN_TECH) as yearsworked_max,
AVG(WHAT_IS_YOUR_ANNUALIZED_BASE_SALARY_IN_USD) as salary
FROM salaries
GROUP BY PRIMARY_LANGUAGE_TECHNOLOGY_STACK
HAVING tot > 2;
--- group by race_ethnicity ---
= 'Other Race/ Ethnicity' or race_ethnicity = 'Black Non-Hispanic' or race_ethnicity = 'White Non-Hispanic' or race_ethnicity = 'Hispanic' or race_ethnicity = 'Asian and Pacific Islander'or race_ethnicity = 'Not Stated/Unknown'
SELECT year,
SUM( CASE WHEN race_ethnicity = 'Other Race/ Ethnicity' THEN deaths ELSE 0 END ) as Other,
SUM( CASE WHEN race_ethnicity = 'Black Non-Hispanic' THEN deaths ELSE 0 END ) as Black_NH,
SUM( CASE WHEN race_ethnicity = 'White Non-Hispanic' THEN deaths ELSE 0 END ) as White_NH,
SUM( CASE WHEN race_ethnicity = 'Hispanic' THEN deaths ELSE 0 END ) as Hispanic,
SUM( CASE WHEN race_ethnicity = 'Asian and Pacific Islander' THEN deaths ELSE 0 END ) as Asian_Pacific,
SUM( CASE WHEN race_ethnicity = 'Not Stated/Unknown' THEN deaths ELSE 0 END ) as Unknown_Ethnicity
FROM causes GROUP BY year
--- # Import matplotlib, generate a plot, and output it.
import matplotlib
import matplotlib.pyplot as plt
import io
#switch to non display backend
plt.switch_backend('agg')
import numpy as np
# Get object for dataset with given name.
ds = vizierdb.get_dataset('causes')
data = dict()
for row in ds.rows:
year = row.get_value('YEAR')
ethnicity = row.get_value('RACE_ETHNICITY')
deaths = row.get_value('DEATHS')
if deaths != None:
if ethnicity not in data:
data[ethnicity] = dict()
if year not in data[ethnicity]:
data[ethnicity][year] = 0
data[ethnicity][year] += deaths
# Data for plotting
fig, ax = plt.subplots()
for ethnicity in data:
by_year = data[ethnicity]
years = sorted(by_year.keys())
ax.plot(
years,
[ by_year[year] for year in years ],
label = ethnicity,
linewidth=3
)
ax.plot()
ax.set(xlabel='Year', ylabel='Death Rate')
ax.grid()
ax.legend(loc = 'upper left')
with io.BytesIO() as imgbytes:
fig.savefig(imgbytes, format="svg")
print(imgbytes.getvalue().decode("utf-8"))