Copied!







import sys
sys.version_info

import sys
sys.version_info

sys.version_info(major=3, minor=4, micro=3, releaselevel='final', serial=0)





Copied!







import sys
sys.version_info

import sys
sys.version_info

sys.version_info(major=3, minor=4, micro=3, releaselevel='final', serial=0)





Copied!







import numpy as np
np.__version__

import numpy as np
np.__version__

'1.9.2'





Copied!







import numpy as np
np.__version__

import numpy as np
np.__version__

'1.9.2'





Copied!







import pandas as pd
pd.__version__

import pandas as pd
pd.__version__

'0.16.0'





Copied!







import pandas as pd
pd.__version__

import pandas as pd
pd.__version__

'0.16.0'





Copied!







%matplotlib inline

%matplotlib inline





Copied!







%matplotlib inline

%matplotlib inline





Copied!







import matplotlib
import matplotlib.pyplot as plt
plt.style.use('ggplot')
matplotlib.__version__

import matplotlib
import matplotlib.pyplot as plt
plt.style.use('ggplot')
matplotlib.__version__

'1.4.3'





Copied!







import matplotlib
import matplotlib.pyplot as plt
plt.style.use('ggplot')
matplotlib.__version__

import matplotlib
import matplotlib.pyplot as plt
plt.style.use('ggplot')
matplotlib.__version__

'1.4.3'





Copied!







import bokeh.plotting as plt
from bokeh.models import HoverTool
plt.output_notebook()
import bokeh
bokeh.__version__

import bokeh.plotting as plt
from bokeh.models import HoverTool
plt.output_notebook()
import bokeh
bokeh.__version__

'0.8.2'





Copied!







import bokeh.plotting as plt
from bokeh.models import HoverTool
plt.output_notebook()
import bokeh
bokeh.__version__

import bokeh.plotting as plt
from bokeh.models import HoverTool
plt.output_notebook()
import bokeh
bokeh.__version__

'0.8.2'





Copied!







black = pd.read_csv('BlackIncome/ACS_13_5YR_B19001B_with_ann.csv', encoding='cp1252', skiprows=[0])

black = pd.read_csv('BlackIncome/ACS_13_5YR_B19001B_with_ann.csv', encoding='cp1252', skiprows=[0])





Copied!







black = pd.read_csv('BlackIncome/ACS_13_5YR_B19001B_with_ann.csv', encoding='cp1252', skiprows=[0])

black = pd.read_csv('BlackIncome/ACS_13_5YR_B19001B_with_ann.csv', encoding='cp1252', skiprows=[0])





Copied!







black.head()

black.head()





Copied!







black.head()

black.head()





Copied!







black.set_index('Geography', inplace=True)

black.set_index('Geography', inplace=True)





Copied!







black.set_index('Geography', inplace=True)

black.set_index('Geography', inplace=True)





Copied!







black.drop(['Id', 'Id2', 'Estimate; Total:'], axis=1, inplace=True)

black.drop(['Id', 'Id2', 'Estimate; Total:'], axis=1, inplace=True)





Copied!







black.drop(['Id', 'Id2', 'Estimate; Total:'], axis=1, inplace=True)

black.drop(['Id', 'Id2', 'Estimate; Total:'], axis=1, inplace=True)





Copied!







margin_cols = [col for col in black.columns if col.startswith('Margin of Error')]

margin_cols = [col for col in black.columns if col.startswith('Margin of Error')]





Copied!







margin_cols = [col for col in black.columns if col.startswith('Margin of Error')]

margin_cols = [col for col in black.columns if col.startswith('Margin of Error')]





Copied!







black.drop(margin_cols, axis=1, inplace=True)

black.drop(margin_cols, axis=1, inplace=True)





Copied!







black.drop(margin_cols, axis=1, inplace=True)

black.drop(margin_cols, axis=1, inplace=True)





Copied!







black.head()

black.head()





Copied!







black.head()

black.head()





Copied!







weights = [10000, 12500, 17500, 22500, 27500, 32500, 37500, 42500, 47500, 55000, 67500, 87500, 112500, 137500, 187500, 200000]

weights = [10000, 12500, 17500, 22500, 27500, 32500, 37500, 42500, 47500, 55000, 67500, 87500, 112500, 137500, 187500, 200000]





Copied!







weights = [10000, 12500, 17500, 22500, 27500, 32500, 37500, 42500, 47500, 55000, 67500, 87500, 112500, 137500, 187500, 200000]

weights = [10000, 12500, 17500, 22500, 27500, 32500, 37500, 42500, 47500, 55000, 67500, 87500, 112500, 137500, 187500, 200000]





Copied!







weights = pd.Series(weights, index=black.columns)

weights = pd.Series(weights, index=black.columns)





Copied!







weights = pd.Series(weights, index=black.columns)

weights = pd.Series(weights, index=black.columns)





Copied!







def weight_average(x):
    return (x * weights).sum() / x.sum()
black.head().apply(weight_average, axis=1)

def weight_average(x):
return (x * weights).sum() / x.sum()
black.head().apply(weight_average, axis=1)

Geography
Abanda CDP, Alabama                  NaN
Abbeville city, Alabama     27993.902439
Adamsville city, Alabama    58901.709402
Addison town, Alabama                NaN
Akron town, Alabama         29576.271186
dtype: float64





Copied!







def weight_average(x):
    return (x * weights).sum() / x.sum()
black.head().apply(weight_average, axis=1)

def weight_average(x):
return (x * weights).sum() / x.sum()
black.head().apply(weight_average, axis=1)

Geography
Abanda CDP, Alabama                  NaN
Abbeville city, Alabama     27993.902439
Adamsville city, Alabama    58901.709402
Addison town, Alabama                NaN
Akron town, Alabama         29576.271186
dtype: float64





Copied!







estimate_cols = [col for col in black.columns if col.startswith('Estimate; Total:')]

estimate_cols = [col for col in black.columns if col.startswith('Estimate; Total:')]





Copied!







estimate_cols = [col for col in black.columns if col.startswith('Estimate; Total:')]

estimate_cols = [col for col in black.columns if col.startswith('Estimate; Total:')]





Copied!







black['average'] = black[estimate_cols].apply(weight_average, axis=1)

black['average'] = black[estimate_cols].apply(weight_average, axis=1)





Copied!







black['average'] = black[estimate_cols].apply(weight_average, axis=1)

black['average'] = black[estimate_cols].apply(weight_average, axis=1)





Copied!







black.head(2)

black.head(2)





Copied!







black.head(2)

black.head(2)





Copied!







black.average.hist(bins=25, color='black')

black.average.hist(bins=25, color='black')

<matplotlib.axes._subplots.AxesSubplot at 0x10cf75080>





Copied!







black.average.hist(bins=25, color='black')

black.average.hist(bins=25, color='black')

<matplotlib.axes._subplots.AxesSubplot at 0x10cf75080>





Copied!







white = pd.read_csv('WhiteIncome/ACS_13_5YR_B19001A_with_ann.csv', encoding='cp1252', skiprows=[0])

white = pd.read_csv('WhiteIncome/ACS_13_5YR_B19001A_with_ann.csv', encoding='cp1252', skiprows=[0])





Copied!







white = pd.read_csv('WhiteIncome/ACS_13_5YR_B19001A_with_ann.csv', encoding='cp1252', skiprows=[0])

white = pd.read_csv('WhiteIncome/ACS_13_5YR_B19001A_with_ann.csv', encoding='cp1252', skiprows=[0])





Copied!







white.head(2)

white.head(2)





Copied!







white.head(2)

white.head(2)





Copied!







white.set_index('Geography', inplace=True)
white.drop(['Id', 'Id2', 'Estimate; Total:'], axis=1, inplace=True)
margin_cols = [col for col in white.columns if col.startswith('Margin of Error')]
white.drop(margin_cols, axis=1, inplace=True)

white.set_index('Geography', inplace=True)
white.drop(['Id', 'Id2', 'Estimate; Total:'], axis=1, inplace=True)
margin_cols = [col for col in white.columns if col.startswith('Margin of Error')]
white.drop(margin_cols, axis=1, inplace=True)





Copied!







white.set_index('Geography', inplace=True)
white.drop(['Id', 'Id2', 'Estimate; Total:'], axis=1, inplace=True)
margin_cols = [col for col in white.columns if col.startswith('Margin of Error')]
white.drop(margin_cols, axis=1, inplace=True)

white.set_index('Geography', inplace=True)
white.drop(['Id', 'Id2', 'Estimate; Total:'], axis=1, inplace=True)
margin_cols = [col for col in white.columns if col.startswith('Margin of Error')]
white.drop(margin_cols, axis=1, inplace=True)





Copied!







estimate_cols = [col for col in white.columns if col.startswith('Estimate; Total:')]
white['average'] = white[estimate_cols].apply(weight_average, axis=1)

estimate_cols = [col for col in white.columns if col.startswith('Estimate; Total:')]
white['average'] = white[estimate_cols].apply(weight_average, axis=1)





Copied!







estimate_cols = [col for col in white.columns if col.startswith('Estimate; Total:')]
white['average'] = white[estimate_cols].apply(weight_average, axis=1)

estimate_cols = [col for col in white.columns if col.startswith('Estimate; Total:')]
white['average'] = white[estimate_cols].apply(weight_average, axis=1)





Copied!







white.average.hist(bins=25, color='black')

white.average.hist(bins=25, color='black')

<matplotlib.axes._subplots.AxesSubplot at 0x109eeb940>





Copied!







white.average.hist(bins=25, color='black')

white.average.hist(bins=25, color='black')

<matplotlib.axes._subplots.AxesSubplot at 0x109eeb940>





Copied!







black_and_white = black[['average']].join(white[['average']], lsuffix='_black', rsuffix='_white')

black_and_white = black[['average']].join(white[['average']], lsuffix='_black', rsuffix='_white')





Copied!







black_and_white = black[['average']].join(white[['average']], lsuffix='_black', rsuffix='_white')

black_and_white = black[['average']].join(white[['average']], lsuffix='_black', rsuffix='_white')





Copied!







black_and_white.head()

black_and_white.head()





Copied!







black_and_white.head()

black_and_white.head()





Copied!







black_and_white['gap'] = black_and_white.average_white - black_and_white.average_black

black_and_white['gap'] = black_and_white.average_white - black_and_white.average_black





Copied!







black_and_white['gap'] = black_and_white.average_white - black_and_white.average_black

black_and_white['gap'] = black_and_white.average_white - black_and_white.average_black





Copied!







ax = black_and_white.dropna().plot(kind='scatter', x='average_black', y='gap', color='black', alpha=0.1)
black_and_white.ix[["Baltimore city, Maryland"]].plot(kind='scatter', x='average_black', y='gap', color='red', ax=ax , figsize=(8, 8))

ax = black_and_white.dropna().plot(kind='scatter', x='average_black', y='gap', color='black', alpha=0.1)
black_and_white.ix[["Baltimore city, Maryland"]].plot(kind='scatter', x='average_black', y='gap', color='red', ax=ax , figsize=(8, 8))

<matplotlib.axes._subplots.AxesSubplot at 0x10a518dd8>





Copied!







ax = black_and_white.dropna().plot(kind='scatter', x='average_black', y='gap', color='black', alpha=0.1)
black_and_white.ix[["Baltimore city, Maryland"]].plot(kind='scatter', x='average_black', y='gap', color='red', ax=ax , figsize=(8, 8))

ax = black_and_white.dropna().plot(kind='scatter', x='average_black', y='gap', color='black', alpha=0.1)
black_and_white.ix[["Baltimore city, Maryland"]].plot(kind='scatter', x='average_black', y='gap', color='red', ax=ax , figsize=(8, 8))

<matplotlib.axes._subplots.AxesSubplot at 0x10a518dd8>





Copied!







races = pd.read_csv('races/ACS_13_5YR_B02001_with_ann.csv', encoding='cp1252', skiprows=[0])

races = pd.read_csv('races/ACS_13_5YR_B02001_with_ann.csv', encoding='cp1252', skiprows=[0])





Copied!







races = pd.read_csv('races/ACS_13_5YR_B02001_with_ann.csv', encoding='cp1252', skiprows=[0])

races = pd.read_csv('races/ACS_13_5YR_B02001_with_ann.csv', encoding='cp1252', skiprows=[0])





Copied!







races = races[['Geography', 'Estimate; Total:', 'Estimate; Total: - Black or African American alone']]

races = races[['Geography', 'Estimate; Total:', 'Estimate; Total: - Black or African American alone']]





Copied!







races = races[['Geography', 'Estimate; Total:', 'Estimate; Total: - Black or African American alone']]

races = races[['Geography', 'Estimate; Total:', 'Estimate; Total: - Black or African American alone']]





Copied!







races = races.set_index('Geography')

races = races.set_index('Geography')





Copied!







races = races.set_index('Geography')

races = races.set_index('Geography')





Copied!







black_percentage = races['Estimate; Total: - Black or African American alone'] / races['Estimate; Total:']

black_percentage = races['Estimate; Total: - Black or African American alone'] / races['Estimate; Total:']





Copied!







black_percentage = races['Estimate; Total: - Black or African American alone'] / races['Estimate; Total:']

black_percentage = races['Estimate; Total: - Black or African American alone'] / races['Estimate; Total:']





Copied!







subset = black_and_white[black_percentage > 0.1]

subset = black_and_white[black_percentage > 0.1]





Copied!







subset = black_and_white[black_percentage > 0.1]

subset = black_and_white[black_percentage > 0.1]





Copied!







ax = subset.dropna().plot(kind='scatter', x='average_black', y='gap', color='black', alpha=0.1)
subset.ix[["Baltimore city, Maryland"]].plot(kind='scatter', x='average_black', y='gap', color='red', ax=ax, figsize=(8, 8))

ax = subset.dropna().plot(kind='scatter', x='average_black', y='gap', color='black', alpha=0.1)
subset.ix[["Baltimore city, Maryland"]].plot(kind='scatter', x='average_black', y='gap', color='red', ax=ax, figsize=(8, 8))

<matplotlib.axes._subplots.AxesSubplot at 0x10f551eb8>





Copied!







ax = subset.dropna().plot(kind='scatter', x='average_black', y='gap', color='black', alpha=0.1)
subset.ix[["Baltimore city, Maryland"]].plot(kind='scatter', x='average_black', y='gap', color='red', ax=ax, figsize=(8, 8))

ax = subset.dropna().plot(kind='scatter', x='average_black', y='gap', color='black', alpha=0.1)
subset.ix[["Baltimore city, Maryland"]].plot(kind='scatter', x='average_black', y='gap', color='red', ax=ax, figsize=(8, 8))

<matplotlib.axes._subplots.AxesSubplot at 0x10f551eb8>





Copied!







source = plt.ColumnDataSource(
    data=dict(
        black_income=subset.average_black,
        gap=subset.gap,
        city=subset.index,
    )
)
p = plt.figure(tools='hover,reset,save',
               title='', width=530, height=530,
               x_axis_label="Average Black Income",
               y_axis_label="Black-white income gap")
p.scatter(subset.average_black, subset.gap, size=5, color="black", alpha=0.05, source=source)
hover = p.select(dict(type=HoverTool))
hover.tooltips = [
    ("City", "@city"),
    ("Average Black Income ", "@black_income"),
    ("B-W income gap", "@gap"),
]
plt.show(p)

source = plt.ColumnDataSource(
data=dict(
black_income=subset.average_black,
gap=subset.gap,
city=subset.index,
)
)
p = plt.figure(tools='hover,reset,save',
title='', width=530, height=530,
x_axis_label="Average Black Income",
y_axis_label="Black-white income gap")
p.scatter(subset.average_black, subset.gap, size=5, color="black", alpha=0.05, source=source)
hover = p.select(dict(type=HoverTool))
hover.tooltips = [
("City", "@city"),
("Average Black Income ", "@black_income"),
("B-W income gap", "@gap"),
]
plt.show(p)





Copied!







source = plt.ColumnDataSource(
    data=dict(
        black_income=subset.average_black,
        gap=subset.gap,
        city=subset.index,
    )
)
p = plt.figure(tools='hover,reset,save',
               title='', width=530, height=530,
               x_axis_label="Average Black Income",
               y_axis_label="Black-white income gap")
p.scatter(subset.average_black, subset.gap, size=5, color="black", alpha=0.05, source=source)
hover = p.select(dict(type=HoverTool))
hover.tooltips = [
    ("City", "@city"),
    ("Average Black Income ", "@black_income"),
    ("B-W income gap", "@gap"),
]
plt.show(p)

source = plt.ColumnDataSource(
data=dict(
black_income=subset.average_black,
gap=subset.gap,
city=subset.index,
)
)
p = plt.figure(tools='hover,reset,save',
title='', width=530, height=530,
x_axis_label="Average Black Income",
y_axis_label="Black-white income gap")
p.scatter(subset.average_black, subset.gap, size=5, color="black", alpha=0.05, source=source)
hover = p.select(dict(type=HoverTool))
hover.tooltips = [
("City", "@city"),
("Average Black Income ", "@black_income"),
("B-W income gap", "@gap"),
]
plt.show(p)

	Estimate; Total: - Less than $10,000	Estimate; Total: - $10,000 to $14,999	Estimate; Total: - $15,000 to $19,999	Estimate; Total: - $20,000 to $24,999	Estimate; Total: - $25,000 to $29,999	Estimate; Total: - $30,000 to $34,999	Estimate; Total: - $35,000 to $39,999	Estimate; Total: - $40,000 to $44,999	Estimate; Total: - $45,000 to $49,999	Estimate; Total: - $50,000 to $59,999	Estimate; Total: - $60,000 to $74,999	Estimate; Total: - $75,000 to $99,999	Estimate; Total: - $100,000 to $124,999	Estimate; Total: - $125,000 to $149,999	Estimate; Total: - $150,000 to $199,999	Estimate; Total: - $200,000 or more
Geography
Abanda CDP, Alabama	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
Abbeville city, Alabama	81	47	120	7	30	5	47	16	15	6	20	4	0	12	0	0
Adamsville city, Alabama	40	11	9	11	28	109	17	61	21	52	38	112	38	33	5	0
Addison town, Alabama	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0
Akron town, Alabama	26	18	6	16	14	21	2	7	0	0	0	0	0	8	0	0

	Estimate; Total: - Less than $10,000	Estimate; Total: - $10,000 to $14,999	Estimate; Total: - $15,000 to $19,999	Estimate; Total: - $20,000 to $24,999	Estimate; Total: - $25,000 to $29,999	Estimate; Total: - $30,000 to $34,999	Estimate; Total: - $35,000 to $39,999	Estimate; Total: - $40,000 to $44,999	Estimate; Total: - $45,000 to $49,999	Estimate; Total: - $50,000 to $59,999	Estimate; Total: - $60,000 to $74,999	Estimate; Total: - $75,000 to $99,999	Estimate; Total: - $100,000 to $124,999	Estimate; Total: - $125,000 to $149,999	Estimate; Total: - $150,000 to $199,999	Estimate; Total: - $200,000 or more	average
Geography
Abanda CDP, Alabama	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	0	NaN
Abbeville city, Alabama	81	47	120	7	30	5	47	16	15	6	20	4	0	12	0	0	27993.902439

	Id	Id2	Geography	Estimate; Total:	Margin of Error; Total:	Estimate; Total: - Less than $10,000	Margin of Error; Total: - Less than $10,000	Estimate; Total: - $10,000 to $14,999	Margin of Error; Total: - $10,000 to $14,999	Estimate; Total: - $15,000 to $19,999	...	Estimate; Total: - $75,000 to $99,999	Margin of Error; Total: - $75,000 to $99,999	Estimate; Total: - $100,000 to $124,999	Margin of Error; Total: - $100,000 to $124,999	Estimate; Total: - $125,000 to $149,999	Margin of Error; Total: - $125,000 to $149,999	Estimate; Total: - $150,000 to $199,999	Margin of Error; Total: - $150,000 to $199,999	Estimate; Total: - $200,000 or more	Margin of Error; Total: - $200,000 or more
0	1600000US0100100	100100	Abanda CDP, Alabama	23	25	0	11	11	17	0	...	0	11	0	11	0	11	0	11	0	11
1	1600000US0100124	100124	Abbeville city, Alabama	580	107	48	37	37	21	66	...	59	37	50	28	13	13	8	12	3	6

ReproduceIt: FiveThirtyEight - How Baltimore’s Young Black Men Are Boxed In

Cleaning data¶

Black¶

White¶

Combined¶

Subset: 10% of black population¶

Interactive¶

Conclusion¶

	average_black	average_white
Geography
Abanda CDP, Alabama	NaN	22934.782609
Abbeville city, Alabama	27993.902439	49422.413793
Adamsville city, Alabama	58901.709402	53250.750751
Addison town, Alabama	NaN	44384.328358
Akron town, Alabama	29576.271186	29398.148148