For a more detailed look into the state of AP and DE offering (as well as total enrollments) in 2015-2016, the filtered high schools (for more details, see intital filtration methodology) can be broken down into four categories based on whether they only offer DE, only AP, Both, or Neither. The results are broken down herein in the follow ways:
- Nationally
- Schools
- Enrollments
- By School 9-12th Grade Enrollment
- Schools
- Enrollments
- By School Locale
- Schools
- Enrollments
- By School Non-White Student Percentage
- Schools
- Enrollments
- By State
- Schools
- Enrollments
- By Region (Accreditation)
- Schools
- Enrollments
- By Region (Census)
- Schools
- Enrollments
- By Region (NACEP)
- Schools
- Enrollments
from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode,iplot
init_notebook_mode(connected = True)
%matplotlib inline
sns.set_style('whitegrid')
plt.rc('axes', titlesize = 14, titleweight = 'bold', labelweight = 'bold')
hs_uncleaned = pd.read_csv('../filtered_data/04_filter_final.csv', dtype = {'LEAID':np.object})
from my_functions.extra_functions import hs_enrollment_averager, missing_value_mapper
hs = hs_uncleaned.copy()
hs = hs.applymap(missing_value_mapper)
hs['total_enrollment'] = hs['TOT_ENR_M'] + hs['TOT_ENR_F']
hs['hs_total_enrollment'] = hs_enrollment_averager(hs)
def make_ap_only(ap_flag, de_flag):
if ap_flag == 'Yes' and de_flag == 'No':
return 1
return 0
def make_de_only(ap_flag, de_flag):
if ap_flag =='No' and de_flag == 'Yes':
return 1
return 0
def make_neither(ap_flag, de_flag):
if ap_flag == 'No' and de_flag == 'No':
return 1
return 0
def make_both(ap_flag, de_flag):
if ap_flag == 'Yes' and de_flag == 'Yes':
return 1
return 0
hs['AP Only'] = hs.apply(lambda row: make_ap_only(row['SCH_APENR_IND'],row['SCH_DUAL_IND']), axis=1)
hs['DE Only'] = hs.apply(lambda row: make_de_only(row['SCH_APENR_IND'],row['SCH_DUAL_IND']), axis=1)
hs['Neither AP DE'] = hs.apply(lambda row: make_neither(row['SCH_APENR_IND'],row['SCH_DUAL_IND']), axis=1)
hs['Both AP DE'] = hs.apply(lambda row: make_both(row['SCH_APENR_IND'],row['SCH_DUAL_IND']), axis=1)
"""Set up DataFrames for each combination of DE/AP"""
hs_AP_Only = hs.copy()
hs_AP_Only = hs_AP_Only[hs_AP_Only['AP Only'] == 1]
hs_de_only = hs.copy()
hs_de_only = hs_de_only[hs_de_only['DE Only'] == 1]
hs_Neither_AP_DE = hs.copy()
hs_Neither_AP_DE = hs_Neither_AP_DE[hs_Neither_AP_DE['Neither AP DE'] == 1]
hs_Both_AP_DE = hs.copy()
hs_Both_AP_DE = hs_Both_AP_DE[hs_Both_AP_DE['Both AP DE'] == 1]
"""Additional Helper Functions for Analyses Below"""
from my_functions.extra_functions import de_ap_enrollments, add_de_ap_pcts
num_total_schools = len(hs)
num_de_only_schools = len(hs_de_only)
num_AP_Only_schools = len(hs_AP_Only)
num_both_schools = len(hs_Both_AP_DE)
num_neither_schools = len(hs_Neither_AP_DE)
national_school_breakdown = pd.DataFrame({'National':
[num_de_only_schools, num_AP_Only_schools,
num_both_schools, num_neither_schools, num_total_schools]},
index = ['DE Only', 'AP Only', 'Both AP DE', 'Neither AP DE', 'Total']).T
national_school_breakdown_with_pcts = add_de_ap_pcts(national_school_breakdown)
national_school_breakdown_with_pcts
with plt.rc_context({'lines.linewidth': 5, 'font.weight':'bold'}):
national_school_breakdown.T.drop('Total').plot.pie(y='National',autopct='%1.1f%%',
startangle = -45, shadow = True, explode = [.05 for i in range(len(national_school_breakdown.T)-1)],
colormap='Paired', figsize = (5,5))
plt.title('All HS by AP/DE Offering Status')
plt.legend([])
plt.ylabel('')
plt.xticks(fontdict={'fontweight':'bold'})
plt.axis('equal')
de_ap_enrollments(hs_de_only, hs_AP_Only, hs_Neither_AP_DE, hs_Both_AP_DE)
- Schools were grouped into four categories based on how many students were enrolled in grades 9-12 in each one:
- <100
- 100 - 499
- 500 - 1199
- 1200+
from my_functions.extra_functions import school_sizer
hs_AP_Only['size_group'] = hs_AP_Only['hs_total_enrollment'].apply(lambda x: school_sizer(x))
hs_de_only['size_group'] = hs_de_only['hs_total_enrollment'].apply(lambda x: school_sizer(x))
hs_Neither_AP_DE['size_group'] = hs_Neither_AP_DE['hs_total_enrollment'].apply(lambda x: school_sizer(x))
hs_Both_AP_DE['size_group'] = hs_Both_AP_DE['hs_total_enrollment'].apply(lambda x: school_sizer(x))
"""Set up Grouped DataFrames"""
hs_de_only_size = hs_de_only.groupby('size_group')['LEAID'].count()
hs_de_only_size.rename('DE Only', inplace=True);
hs_AP_Only_size = hs_AP_Only.groupby('size_group')['LEAID'].count()
hs_AP_Only_size.rename('AP Only', inplace=True);
hs_Neither_AP_DE_size = hs_Neither_AP_DE.groupby('size_group')['LEAID'].count()
hs_Neither_AP_DE_size.rename('Neither AP DE', inplace=True);
hs_Both_AP_DE_size = hs_Both_AP_DE.groupby('size_group')['LEAID'].count()
hs_Both_AP_DE_size.rename('Both AP DE', inplace=True);
by_size = pd.concat([hs_de_only_size, hs_AP_Only_size, hs_Neither_AP_DE_size, hs_Both_AP_DE_size], axis = 1)
by_size['Total'] = by_size['DE Only'] + by_size['AP Only'] + by_size['Both AP DE'] + by_size['Neither AP DE']
by_size = by_size.rename({1:'<100', 2:'100-499', 3:'500-1199', 4:'>1200'})
by_size = add_de_ap_pcts(by_size)
by_size.index.names = ['HS Student Enrollment']
by_size
enrollments_by_size = de_ap_enrollments(hs_de_only, hs_AP_Only, hs_Neither_AP_DE, hs_Both_AP_DE, groupby_field = 'size_group')
enrollments_by_size = enrollments_by_size.rename({1:'<100', 2:'100-499', 3:'500-1199', 4:'>1200'})
enrollments_by_size.index.names = ['HS Student Enrollment']
enrollments_by_size
- Schools were grouped into four categories based on their locale (as defined by the NCES):
- City: "Territory inside an Urbanized Area and inside a Principal City"
- Rural: "Census-defined rural territory"
- Suburban: "Territory outside a Principal City and inside an Urbanized Area"
- Town: "Territory inside an Urban Cluster"
locale_map = {11: 'City', 12: 'City', 13: 'City', 14: 'City',
21: 'Suburban', 22: 'Suburban', 23: 'Suburban', 24: 'Suburban',
31: 'Town', 32: 'Town', 33: 'Town', 34: 'Town',
41: 'Rural', 42: 'Rural', 43: 'Rural', 44: 'Rural'}
hs_de_only['locale'] = hs_de_only['LOCALE15'].map(locale_map)
hs_AP_Only['locale'] = hs_AP_Only['LOCALE15'].map(locale_map)
hs_Neither_AP_DE['locale'] = hs_Neither_AP_DE['LOCALE15'].map(locale_map)
hs_Both_AP_DE['locale'] = hs_Both_AP_DE['LOCALE15'].map(locale_map)
"""Set up Grouped DataFrames"""
hs_de_only_locale = hs_de_only.groupby('locale')['LEAID'].count()
hs_de_only_locale.rename('DE Only', inplace=True);
hs_AP_Only_locale = hs_AP_Only.groupby('locale')['LEAID'].count()
hs_AP_Only_locale.rename('AP Only', inplace=True);
hs_Neither_AP_DE_locale = hs_AP_Only.groupby('locale')['LEAID'].count()
hs_Neither_AP_DE_locale.rename('Neither AP DE', inplace=True);
hs_Both_AP_DE_locale = hs_Both_AP_DE.groupby('locale')['LEAID'].count()
hs_Both_AP_DE_locale.rename('Both AP DE', inplace=True);
by_locale = pd.concat([hs_de_only_locale, hs_AP_Only_locale, hs_Neither_AP_DE_locale, hs_Both_AP_DE_locale], axis = 1)
by_locale['Total'] = by_locale['DE Only'] + by_locale['AP Only'] + by_locale['Both AP DE'] + by_locale['Neither AP DE']
by_locale = add_de_ap_pcts(by_locale)
by_locale.index.names = ['Locale']
by_locale
locale_enrollments = de_ap_enrollments(hs_de_only, hs_AP_Only,
hs_Neither_AP_DE, hs_Both_AP_DE, groupby_field = 'locale')
locale_enrollments.index.names = ['Locale']
locale_enrollments
hs_de_only['pct_eth'] = (hs_de_only['total_enrollment'] - hs_de_only['SCH_ENR_WH_F'] - hs_de_only['SCH_ENR_WH_M']) / hs_de_only['total_enrollment']
hs_AP_Only['pct_eth'] = (hs_AP_Only['total_enrollment'] - hs_AP_Only['SCH_ENR_WH_F'] - hs_AP_Only['SCH_ENR_WH_M']) / hs_AP_Only['total_enrollment']
hs_Neither_AP_DE['pct_eth'] = (hs_Neither_AP_DE['total_enrollment'] - hs_Neither_AP_DE['SCH_ENR_WH_F'] - hs_Neither_AP_DE['SCH_ENR_WH_M']) / hs_Neither_AP_DE['total_enrollment']
hs_Both_AP_DE['pct_eth'] = (hs_Both_AP_DE['total_enrollment'] - hs_Both_AP_DE['SCH_ENR_WH_F'] - hs_Both_AP_DE['SCH_ENR_WH_M']) / hs_Both_AP_DE['total_enrollment']
from my_functions.extra_functions import eth_grouper
hs_de_only['eth_quintile'] = hs_de_only['pct_eth'].apply(lambda x: eth_grouper(x))
hs_AP_Only['eth_quintile'] = hs_AP_Only['pct_eth'].apply(lambda x: eth_grouper(x))
hs_Neither_AP_DE['eth_quintile'] = hs_Neither_AP_DE['pct_eth'].apply(lambda x: eth_grouper(x))
hs_Both_AP_DE['eth_quintile'] = hs_Both_AP_DE['pct_eth'].apply(lambda x: eth_grouper(x))
"""Set up Grouped DataFrames"""
hs_de_only_eth = hs_de_only.groupby('eth_quintile')['LEAID'].count()
hs_de_only_eth.rename('DE Only', inplace = True);
hs_AP_Only_eth = hs_AP_Only.groupby('eth_quintile')['LEAID'].count()
hs_AP_Only_eth.rename('AP Only', inplace = True);
hs_Neither_AP_DE_eth = hs_Neither_AP_DE.groupby('eth_quintile')['LEAID'].count()
hs_Neither_AP_DE_eth.rename('Neither AP DE', inplace = True);
hs_Both_AP_DE_eth = hs_Both_AP_DE.groupby('eth_quintile')['LEAID'].count()
hs_Both_AP_DE_eth.rename('Both AP DE', inplace = True);
by_eth = pd.concat([hs_de_only_eth, hs_AP_Only_eth, hs_Neither_AP_DE_eth, hs_Both_AP_DE_eth], axis = 1)
by_eth['Total'] = by_eth['DE Only'] + by_eth['AP Only'] + by_eth['Both AP DE'] + by_eth['Neither AP DE']
by_eth = by_eth.rename({1:'0-20%', 2:'21-40%', 3:'41-60%', 4:'61-80%', 5:'81-100%'})
by_eth = add_de_ap_pcts(by_eth)
by_eth.index.name = 'Non-White %'
by_eth
enrollment_by_eth = de_ap_enrollments(hs_de_only, hs_AP_Only, hs_Neither_AP_DE, hs_Both_AP_DE, groupby_field = 'eth_quintile')
enrollment_by_eth = enrollment_by_eth.rename({1:'0-20%', 2:'21-40%', 3:'41-60%', 4:'61-80%', 5:'81-100%'})
enrollment_by_eth.index.names = ['Non-White %']
enrollment_by_eth
"""Which states have many schools offering DE but not AP?"""
hs_de_only_state = hs_de_only.groupby('LEA_STATE')['LEAID'].count().rename('DE Only')
hs_AP_Only_state = hs_AP_Only.groupby('LEA_STATE')['LEAID'].count().rename('AP Only')
hs_Neither_AP_DE_state = hs_Neither_AP_DE.groupby('LEA_STATE')['LEAID'].count().rename('Neither AP DE')
hs_Both_AP_DE_state = hs_Both_AP_DE.groupby('LEA_STATE')['LEAID'].count().rename('Both AP DE')
by_state = pd.concat([hs_de_only_state, hs_AP_Only_state, hs_Neither_AP_DE_state, hs_Both_AP_DE_state], axis = 1)
by_state = by_state.fillna(0).astype(int)
by_state['Total'] = by_state['DE Only'] + by_state['AP Only'] + by_state['Both AP DE'] + by_state['Neither AP DE']
by_state = add_de_ap_pcts(by_state)
by_state.index.names = ['State']
by_state
enrollments_by_state = de_ap_enrollments(hs_de_only, hs_AP_Only, hs_Neither_AP_DE, hs_Both_AP_DE, groupby_field = 'LEA_STATE')
enrollments_by_state.index.names = ['State']
enrollments_by_state
- Regions are broken down according to the regional accreditation guidelines.
- Higher Learning Commission (HLC): Arkansas, Arizona, Colorado, Iowa, Illinois, Indiana, Kansas, Michigan,
Minnesota, Missouri, North Dakota, Nebraska, New Mexico, Ohio, Oklahoma, South Dakota, Wisconsin,
West Virginia, and Wyoming.
- Middle States Commission on Higher Education (MSCHE): New York, New Jersey, Pennsylvania, Delaware,
Maryland, the District of Columbia.
- New England Association of Schools and Colleges (NEASC): Connecticut, Maine, Massachusetts, New Hampshire,
Rhode Island, and Vermont.
- Northwest Commission on Colleges and Universities (NWCCU): Alaska, Idaho, Montana, Nevada, Oregon,
Utah, and Washington.
- Southern Association of Colleges and Schools (SACS): Alabama, Florida, Georgia, Kentucky,
Louisiana, Mississippi, North Carolina, South Carolina, Tennessee, Texas and Virginia.
- Western Association of Schools and Colleges (WASC): Hawaii, California
"""Set up the Region Column"""
from my_functions.extra_functions import region_mapper
hs_de_only['region_accred'] = hs_de_only['LEA_STATE'].apply(lambda x: region_mapper(x))
hs_AP_Only['region_accred'] = hs_AP_Only['LEA_STATE'].apply(lambda x: region_mapper(x))
hs_Neither_AP_DE['region_accred'] = hs_Neither_AP_DE['LEA_STATE'].apply(lambda x: region_mapper(x))
hs_Both_AP_DE['region_accred'] = hs_Both_AP_DE['LEA_STATE'].apply(lambda x: region_mapper(x))
"""Set up Grouped DataFrames"""
hs_de_only_region = hs_de_only.groupby('region_accred')['LEAID'].count()
hs_de_only_region.rename('DE Only', inplace=True);
hs_AP_Only_region = hs_AP_Only.groupby('region_accred')['LEAID'].count()
hs_AP_Only_region.rename('AP Only', inplace=True);
hs_Neither_AP_DE_region = hs_Neither_AP_DE.groupby('region_accred')['LEAID'].count()
hs_Neither_AP_DE_region.rename('Neither AP DE', inplace=True);
hs_Both_AP_DE_region = hs_Both_AP_DE.groupby('region_accred')['LEAID'].count()
hs_Both_AP_DE_region.rename('Both AP DE', inplace=True);
by_region = pd.concat([hs_de_only_region, hs_AP_Only_region, hs_Neither_AP_DE_region, hs_Both_AP_DE_region], axis = 1)
by_region['Total'] = by_region['DE Only'] + by_region['AP Only'] + by_region['Both AP DE'] + by_region['Neither AP DE']
by_region = add_de_ap_pcts(by_region)
by_region.index.names = ['Region']
by_region
region_enrollments = de_ap_enrollments(hs_de_only, hs_AP_Only, hs_Neither_AP_DE,
hs_Both_AP_DE, groupby_field = 'region_accred')
region_enrollments.index.name = 'Region'
region_enrollments
- Regions are broken down according to the regional accreditation guidelines.
- New England - Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, and Vermont
- Mid-Atlantic - New Jersey, New York, and Pennsylvania
- East North Central - Illinois, Indiana, Michigan, Ohio, and Wisconsin
- West North Central - Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, and South Dakota
- South Atlantic - Delaware, Florida, Georgia, Maryland, North Carolina, South Carolina,
Virginia, District of Columbia, and West Virginia
- East South Central - Alabama, Kentucky, Mississippi, and Tennessee
- West South Central - Arkansas, Louisiana, Oklahoma, and Texas
- Mountain - Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, and Wyoming
- Pacific - Alaska, California, Hawaii, Oregon, and Washington
"""Set up the Region Column"""
from my_functions.extra_functions import region_mapper_census
hs_de_only['region_census'] = hs_de_only['LEA_STATE'].apply(lambda x: region_mapper_census(x))
hs_AP_Only['region_census'] = hs_AP_Only['LEA_STATE'].apply(lambda x: region_mapper_census(x))
hs_Neither_AP_DE['region_census'] = hs_Neither_AP_DE['LEA_STATE'].apply(lambda x: region_mapper_census(x))
hs_Both_AP_DE['region_census'] = hs_Both_AP_DE['LEA_STATE'].apply(lambda x: region_mapper_census(x))
"""Set up Grouped DataFrames"""
hs_de_only_region_census = hs_de_only.groupby('region_census')['LEAID'].count()
hs_de_only_region_census.rename('DE Only', inplace=True);
hs_AP_Only_region_census = hs_AP_Only.groupby('region_census')['LEAID'].count()
hs_AP_Only_region_census.rename('AP Only', inplace=True);
hs_Neither_AP_DE_region_census = hs_Neither_AP_DE.groupby('region_census')['LEAID'].count()
hs_Neither_AP_DE_region_census.rename('Neither AP DE', inplace=True);
hs_Both_AP_DE_region_census = hs_Both_AP_DE.groupby('region_census')['LEAID'].count()
hs_Both_AP_DE_region_census.rename('Both AP DE', inplace=True);
by_region_census = pd.concat([hs_de_only_region_census, hs_AP_Only_region_census, hs_Neither_AP_DE_region_census, hs_Both_AP_DE_region_census], axis = 1)
by_region_census['Total'] = by_region_census['DE Only'] + by_region_census['AP Only'] + by_region_census['Both AP DE'] + by_region_census['Neither AP DE']
by_region_census = add_de_ap_pcts(by_region_census)
by_region_census.index.names = ['Region']
by_region_census
region_enrollments_census = de_ap_enrollments(hs_de_only, hs_AP_Only, hs_Neither_AP_DE,
hs_Both_AP_DE, groupby_field = 'region_census')
region_enrollments_census.index.name = 'Region'
region_enrollments_census
- Regions are broken down according to the regional accreditation guidelines.
- 1 - New York, New Jersey, Pennslyvania, Delware, Maryland, Washington DC, Connecticut, Maine,
Massachusettes, New Hampshire, Rhode Island, Vermont, Virginia
- 2 - Michigan, Indiana, Ohio, West Virigina, Kentukcy, Tennessee, North Carolina, South Carolina,
Alabama, Mississippi, Georgia, Florida
- 3 - Illinois, Missouri, Arkansas, Louisiana, Oklahoma, Kansas, Texas
- 4 - Montana, Idaho, Wyoming, North Dakota, South Dakota, Nebraska, Iowa, Minnesota, Wisconsin
- 5 - Oregan, Washington, California, Nevada, Utah, Arizona, Colorado, New Mexico, Alaska, Hawaii
"""Set up the Region Column"""
from my_functions.extra_functions import region_mapper_nacep
hs_de_only['region_nacep'] = hs_de_only['LEA_STATE'].apply(lambda x: region_mapper_nacep(x))
hs_AP_Only['region_nacep'] = hs_AP_Only['LEA_STATE'].apply(lambda x: region_mapper_nacep(x))
hs_Neither_AP_DE['region_nacep'] = hs_Neither_AP_DE['LEA_STATE'].apply(lambda x: region_mapper_nacep(x))
hs_Both_AP_DE['region_nacep'] = hs_Both_AP_DE['LEA_STATE'].apply(lambda x: region_mapper_nacep(x))
"""Set up Grouped DataFrames"""
hs_de_only_region_nacep = hs_de_only.groupby('region_nacep')['LEAID'].count()
hs_de_only_region_nacep.rename('DE Only', inplace=True);
hs_AP_Only_region_nacep = hs_AP_Only.groupby('region_nacep')['LEAID'].count()
hs_AP_Only_region_nacep.rename('AP Only', inplace=True);
hs_Neither_AP_DE_region_nacep = hs_Neither_AP_DE.groupby('region_nacep')['LEAID'].count()
hs_Neither_AP_DE_region_nacep.rename('Neither AP DE', inplace=True);
hs_Both_AP_DE_region_nacep = hs_Both_AP_DE.groupby('region_nacep')['LEAID'].count()
hs_Both_AP_DE_region_nacep.rename('Both AP DE', inplace=True);
by_region_nacep = pd.concat([hs_de_only_region_nacep, hs_AP_Only_region_nacep, hs_Neither_AP_DE_region_nacep, hs_Both_AP_DE_region_nacep], axis = 1)
by_region_nacep['Total'] = by_region_nacep['DE Only'] + by_region_nacep['AP Only'] + by_region_nacep['Both AP DE'] + by_region_nacep['Neither AP DE']
by_region_nacep = add_de_ap_pcts(by_region_nacep)
by_region_nacep.index.names = ['Region']
by_region_nacep
region_enrollments_nacep = de_ap_enrollments(hs_de_only, hs_AP_Only, hs_Neither_AP_DE,
hs_Both_AP_DE, groupby_field = 'region_nacep')
region_enrollments_nacep.index.name = 'Region'
region_enrollments_nacep