Exactly how prevalent are dual enrollment and advanced placement programs across the country? In the 2015-2016 CRDC survey, schools were asked to report whether or not they offered either of these programs, respectively. Looking at the 18,667 high schools from the filtered CRDC dataset (for more details, see the intital filtration methodology), the results are reported below in the follow ways:
- Nationally
- By School 9-12th Grade Enrollment
- By School Locale
- By School Non-White Student Percentage
- By School Vocational Status
- By State
- By Region (Accreditation)
- By Region (Census)
- By Region (NACEP)
from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.plotly as py
import plotly.graph_objs as go
from plotly.offline import init_notebook_mode,iplot
init_notebook_mode(connected = True)
from my_functions.extra_functions import flag_grouper
%matplotlib inline
sns.set_style('whitegrid')
plt.rc('axes', titlesize = 14, titleweight = 'bold', labelweight = 'bold')
hs = pd.read_csv('../filtered_data/04_filter_final.csv', dtype = {'LEAID':np.object})
hs_total = len(hs)
de_total = len(hs[hs['SCH_DUAL_IND'] == 'Yes'])
de_offering_rate = round(de_total / hs_total * 100, 1)
ap_total = len(hs[hs['SCH_APENR_IND'] == 'Yes'])
ap_offering_rate = round(ap_total / hs_total * 100, 1)
order = ['# HS Schools', '# Schools Offering DE', 'DE Offering Rate',
'# Schools Offering AP', 'AP Offering Rate']
pd.DataFrame({'# HS Schools': hs_total,
'# Schools Offering DE': de_total,
'DE Offering Rate': de_offering_rate,
'# Schools Offering AP': ap_total,
'AP Offering Rate': ap_offering_rate},
index = ['National'])[order]
- Schools were grouped into four categories based on how many students were enrolled in grades 9-12 in each one:
- <100
- 100 - 499
- 500 - 1199
- 1200+
"""Create a Size-of-School Classifier"""
hs['total_enrollment'] = hs['TOT_ENR_M'] + hs['TOT_ENR_F']
from my_functions.extra_functions import hs_enrollment_averager
hs['hs_total_enrollment'] = hs_enrollment_averager(hs) # Calculating HS Students from the Total_Enrollments in Schools
"""Assigning Size Groups based on HS Enrollments"""
from my_functions.extra_functions import school_sizer
hs['size_group'] = hs['hs_total_enrollment'].apply(lambda x: school_sizer(x))
de_by_size = flag_grouper(hs, 'size_group', 'DE')
ap_by_size = flag_grouper(hs, 'size_group', 'AP')
hs_by_size = pd.merge(de_by_size, ap_by_size, on = '# of HS Schools')
hs_by_size = hs_by_size.rename({0: '<100', 1: '100-499', 2:'500-1199', 3:'1200+'})
hs_by_size.index.names = ['HS Student Enrollment']
hs_by_size
fig, (ax1,ax2) = plt.subplots(1,2, figsize = (10,4))
plt.sca(ax1)
hs_by_size.plot.bar(y = 'DE Offering Rate', ax = ax1)
plt.xticks(rotation = 0)
plt.legend([])
plt.xlabel('')
plt.title('Percentage of Schools Offering DE\nBy Size ')
plt.yticks(np.arange(0,110,10))
plt.xticks([0,1,2,3], ['<100', '100-499', '500-1199', '>1200'])
plt.ylim([0,100])
plt.sca(ax2)
hs_by_size.plot.bar(y = 'AP Offering Rate', ax=ax2)
plt.xticks(rotation = 0)
plt.legend([])
plt.xlabel('')
plt.title('Percentage of Schools Offering AP\nBy Size ')
plt.yticks(np.arange(0,110, 10))
plt.xticks([0,1,2,3], ['<100', '100-499', '500-1199', '>1200'])
plt.ylim([0,100])
- Schools were grouped into four categories based on their locale (as defined by the NCES):
- City: "Territory inside an Urbanized Area and inside a Principal City"
- Rural: "Census-defined rural territory"
- Suburban: "Territory outside a Principal City and inside an Urbanized Area"
- Town: "Territory inside an Urban Cluster"
"""Creating a Locale Classifer"""
from my_functions.extra_functions import locale_map
hs['locale_group'] = hs['LOCALE15'].map(locale_map)
de_by_locale = flag_grouper(hs, 'locale_group', 'DE')
ap_by_locale = flag_grouper(hs, 'locale_group', 'AP')
hs_by_locale = pd.merge(de_by_locale, ap_by_locale, on = '# of HS Schools')
hs_by_locale = hs_by_locale.rename({0: 'City', 1: 'Rural', 2: 'Suburban', 3: 'Town'})
hs_by_locale.index.names = ['Locale']
hs_by_locale
fig, (ax1,ax2) = plt.subplots(1,2, figsize = (10,4))
plt.sca(ax1)
hs_by_locale.plot.bar(y = 'DE Offering Rate', ax = ax1)
plt.xticks(rotation = 0)
plt.legend([])
plt.xlabel('')
plt.title('Percentage of Schools Offering DE\nBy Locale ')
plt.yticks(np.arange(0,110,10))
# plt.xticks([0,1,2,3], [''])
plt.ylim([0,100])
plt.sca(ax2)
hs_by_locale.plot.bar(y = 'AP Offering Rate', ax=ax2)
plt.xticks(rotation = 0)
plt.legend([])
plt.xlabel('')
plt.title('Percentage of Schools Offering AP\nBy Locale ')
plt.yticks(np.arange(0,110, 10))
# plt.xticks([0,1,2,3], ['<100', '100-499', '500-1199', '>1200'])
plt.ylim([0,100])
"""Calculate the percent ethnicity of all schools in the set and group schools into quintiles"""
hs['pct_eth'] = (hs['total_enrollment'] - hs['SCH_ENR_WH_F'] - hs['SCH_ENR_WH_M']) / hs['total_enrollment']
from my_functions.extra_functions import eth_grouper
hs['eth_quintile'] = hs['pct_eth'].apply(lambda pct: eth_grouper(pct))
"""What does the distribution of the schools in the dataset look based on ethncitiy?"""
hs.pct_eth.plot.hist(bins=100, xlim = [0,1], edgecolor = 'k')
plt.title('Distribution of Schools by Non-White Percentages')
plt.xlabel('Ethnicity Percentage')
# plt.savefig(fname = './Visualizations/By_ethnicity_dist_200.png', dpi = 200)
de_by_eth = flag_grouper(hs, 'eth_quintile', 'DE')
ap_by_eth = flag_grouper(hs, 'eth_quintile', 'AP')
hs_by_eth = pd.merge(de_by_eth, ap_by_eth, on = '# of HS Schools')
hs_by_eth = hs_by_eth.rename({0: '0-20%', 1: '21-40%', 2: '41-60%', 3: '61-80%', 4: '81-100%'})
hs_by_eth.index.names = ['Non-White %']
hs_by_eth
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (10,4))
plt.sca(ax2)
hs_by_eth.plot.bar(y = 'AP Offering Rate', ax = ax2)
plt.legend([])
plt.title('Percentage of Schools with AP\nBy Ethnicity Percentage Quintiles')
plt.xticks(rotation = 0)
plt.yticks(np.arange(0,110,10))
plt.ylim([0,100])
plt.xlabel('')
plt.sca(ax1)
hs_by_eth.plot.bar(y = 'DE Offering Rate', ax = ax1)
plt.legend([])
plt.title('Percentage of Schools with DE\nBy Ethnicity Percentage Quintiles')
plt.xticks(rotation = 0)
plt.yticks(np.arange(0,110,10))
plt.ylim([0,100])
plt.xlabel('')
"""Very high percentage non-minority schools by Locale"""
eth_group1_locales = hs[hs.eth_quintile == 1].groupby('locale_group')['LEAID'].count().reset_index()
eth_group1_locales['pct'] = round(eth_group1_locales['LEAID'] / eth_group1_locales['LEAID'].sum() * 100, 1)
eth_group1_locales = eth_group1_locales.set_index('locale_group')
with plt.rc_context({'lines.linewidth': 5, 'font.weight':'bold', 'font.size':11}):
eth_group1_locales.plot.pie(y='LEAID',autopct='%1.1f%%',
startangle = 55, shadow = True, explode = [.05 for i in range(4)],
colormap='Pastel2', figsize = (5,5))
plt.title('High Schools with <20% non-white students by locale')
plt.legend([])
plt.ylabel('')
plt.xticks(fontdict={'fontweight':'bold'})
plt.axis('equal')
de_by_voc = flag_grouper(hs, 'SCH_TYPE', 'DE')
ap_by_voc = flag_grouper(hs, 'SCH_TYPE', 'AP')
hs_by_voc = pd.merge(de_by_voc, ap_by_voc, on = '# of HS Schools')
hs_by_voc = hs_by_voc.rename({0: 'Regular', 1: 'Vocational'})
hs_by_voc.index.names = ['School Type']
hs_by_voc
fig, (ax1, ax2) = plt.subplots(1,2, figsize = (10,4))
plt.sca(ax1)
hs_by_voc.plot.bar(y = 'DE Offering Rate', ax = ax1)
plt.legend([])
plt.title('Percentage of Schools with DE\nBy Vocational Status')
plt.xticks(rotation = 0)
plt.yticks(np.arange(0,110,10))
plt.ylim([0,100])
plt.xlabel('')
plt.sca(ax2)
hs_by_voc.plot.bar(y = 'AP Offering Rate', ax = ax2)
plt.legend([])
plt.title('Percentage of Schools with AP\nBy Vocational Status')
plt.xticks(rotation = 0)
plt.yticks(np.arange(0,110,10))
plt.ylim([0,100])
plt.xlabel('')
- Consult the ScatterGeo .png file to see the output.
# """Map the DE Flags to Values for the ScatterGeo Plot"""
# de_map = {'Yes':1, 'No':0}
# dual_enroll_bin = hs.SCH_DUAL_IND.map(de_map)
# scl = [ [0,"rgb(5, 10, 172)"], [1,"rgb(0, 200, 100)"] ]
# data = [ dict(
# type = 'scattergeo',
# locationmode = 'USA-states',
# lon = hs['LON1516'],
# lat = hs['LAT1516'],
# mode = 'markers',
# marker = dict(
# size = 5,
# opacity = 0.8,
# autocolorscale = False,
# line = dict(
# width=1,
# color='rgba(255,255,255,1)'
# ),
# colorscale = scl,
# cmin = 0,
# color = dual_enroll_bin,
# cmax = 1,
# ))]
# layout = dict(
# geo = dict(
# scope='usa',
# projection=dict( type='albers usa' ),
# ),
# )
# fig = dict( data=data, layout=layout )
# iplot(fig)
# # iplot(fig, image = 'png', filename = '02_ScatterGeo_DE_1516', image_width = 2750, image_height=2500)
# print("To see the scattergeo, uncomment code and change the size of the markers (to 3-5) -- Very cumbersome Graph")
- Sorted by DE Offering Rate in Descending Order
de_by_state = flag_grouper(hs, 'LEA_STATE', 'DE', True)
ap_by_state = flag_grouper(hs, 'LEA_STATE', 'AP')
hs_by_state = pd.merge(de_by_state, ap_by_state, on = '# of HS Schools')
hs_by_state = hs_by_state.set_index('LEA_STATE')
hs_by_state.index.names = ['State']
hs_by_state.sort_values('DE Offering Rate', ascending=False)
de_data = dict(type = 'choropleth',
locations = hs_by_state.index,
locationmode = 'USA-states',
colorscale = [[0.0, 'rgb(255,255,255)'], [.2, 'rgb(255,221,221)'], [.4, 'rgb(255, 200, 200)'], [0.8, 'rgb(255, 50, 50)'], [1.0, 'rgb(180,0,0)']],
reversescale = False,
z = hs_by_state['DE Offering Rate'])
de_layout = dict(title = '2015-2016 DE-Offered Percentages',
geo = {'scope':'usa'})
de_choromap = go.Figure(data = [de_data], layout = de_layout)
iplot(de_choromap)
# iplot(de_choromap, image = 'png', filename='./Visualizations/by_state_de_choropleth')
ap_data = dict(type = 'choropleth',
locations = hs_by_state.index,
locationmode = 'USA-states',
colorscale = [[0.0, 'rgb(255,255,255)'], [.2, 'rgb(255,221,221)'], [.4, 'rgb(255, 200, 200)'], [0.8, 'rgb(255, 50, 50)'], [1.0, 'rgb(180,0,0)']],
reversescale = False,
z = hs_by_state['AP Offering Rate'])
ap_layout = dict(title = '2015-2016 AP-Offered Percentages',
geo = {'scope':'usa'})
ap_choromap = go.Figure(data = [ap_data], layout = ap_layout)
iplot(ap_choromap)
# iplot(ap_choromap, image = 'png', filename='./Visualizations/by_state_ap_choropleth')
plt.figure(figsize=(6,6))
plt.scatter(x=hs_by_state['DE Offering Rate'], y=hs_by_state['AP Offering Rate'])
plt.ylim([0,100])
plt.xlim([0,100])
plt.ylabel('%High Schools in State Offering AP')
plt.xlabel('%High Schools in State Offering DE')
plt.yticks(np.arange(0,110,10))
plt.xticks(np.arange(0,110,10))
plt.title('States: AP v DE Offering Rates')
plt.show()
- Regions are broken down according to the regional accreditation guidelines.
- Higher Learning Commission (HLC): Arkansas, Arizona, Colorado, Iowa, Illinois, Indiana, Kansas, Michigan,
Minnesota, Missouri, North Dakota, Nebraska, New Mexico, Ohio, Oklahoma, South Dakota, Wisconsin,
West Virginia, and Wyoming.
- Middle States Commission on Higher Education (MSCHE): New York, New Jersey, Pennsylvania, Delaware,
Maryland, the District of Columbia.
- New England Association of Schools and Colleges (NEASC): Connecticut, Maine, Massachusetts, New Hampshire,
Rhode Island, and Vermont.
- Northwest Commission on Colleges and Universities (NWCCU): Alaska, Idaho, Montana, Nevada, Oregon,
Utah, and Washington.
- Southern Association of Colleges and Schools (SACS): Alabama, Florida, Georgia, Kentucky,
Louisiana, Mississippi, North Carolina, South Carolina, Tennessee, Texas and Virginia.
- Western Association of Schools and Colleges (WASC): Hawaii, California
"""Categorize schools into regions"""
from my_functions.extra_functions import region_mapper
hs['region_accred'] = hs['LEA_STATE'].apply(lambda x: region_mapper(x))
de_by_region = flag_grouper(hs, 'region_accred', 'DE', True)
ap_by_region = flag_grouper(hs, 'region_accred', 'AP')
hs_by_region = pd.merge(de_by_region, ap_by_region, on = '# of HS Schools')
hs_by_region = hs_by_region.set_index('region_accred')
hs_by_region.index.names = ['Regions']
hs_by_region
fig, (ax1,ax2) = plt.subplots(1,2, figsize = (10,6))
plt.sca(ax1)
hs_by_region.plot.barh(y = 'DE Offering Rate', ax=ax1)
plt.title('Percentage of Schools Offering DE\n by Region (Accreditation)')
plt.yticks(rotation = 0)
plt.legend([])
plt.xticks(np.arange(0,110,10))
plt.xlim([0,100])
plt.ylabel('')
plt.sca(ax2)
hs_by_region.plot.barh(y = 'AP Offering Rate', ax=ax2)
plt.title('Percentage of Schools Offering AP\n by Region (Accreditation)')
plt.yticks(rotation = 0)
plt.legend([])
plt.xticks(np.arange(0,110,10))
plt.xlim([0,100])
plt.ylabel('')
plt.tight_layout()
- Regions are broken down according to the regional accreditation guidelines.
- New England - Connecticut, Maine, Massachusetts, New Hampshire, Rhode Island, and Vermont
- Mid-Atlantic - New Jersey, New York, and Pennsylvania
- East North Central - Illinois, Indiana, Michigan, Ohio, and Wisconsin
- West North Central - Iowa, Kansas, Minnesota, Missouri, Nebraska, North Dakota, and South Dakota
- South Atlantic - Delaware, Florida, Georgia, Maryland, North Carolina, South Carolina,
Virginia, District of Columbia, and West Virginia
- East South Central - Alabama, Kentucky, Mississippi, and Tennessee
- West South Central - Arkansas, Louisiana, Oklahoma, and Texas
- Mountain - Arizona, Colorado, Idaho, Montana, Nevada, New Mexico, Utah, and Wyoming
- Pacific - Alaska, California, Hawaii, Oregon, and Washington
"""Categorize schools into regions"""
from my_functions.extra_functions import region_mapper_census
hs['region_census'] = hs['LEA_STATE'].apply(lambda x: region_mapper_census(x))
de_by_region_census = flag_grouper(hs, 'region_census', 'DE', True)
ap_by_region_census = flag_grouper(hs, 'region_census', 'AP')
hs_by_region_census = pd.merge(de_by_region_census, ap_by_region_census, on = '# of HS Schools')
hs_by_region_census = hs_by_region_census.set_index('region_census')
hs_by_region_census.index.names = ['Regions']
hs_by_region_census
fig, (ax1,ax2) = plt.subplots(1,2, figsize = (10,6))
plt.sca(ax1)
hs_by_region_census.plot.barh(y = 'DE Offering Rate', ax=ax1)
plt.title('Percentage of Schools Offering DE\n by Region (Census)')
plt.yticks(rotation = 0)
plt.legend([])
plt.xticks(np.arange(0,110,10))
plt.xlim([0,100])
plt.ylabel('')
plt.sca(ax2)
hs_by_region_census.plot.barh(y = 'AP Offering Rate', ax=ax2)
plt.title('Percentage of Schools Offering AP\n by Region (Census)')
plt.yticks(rotation = 0)
plt.legend([])
plt.xticks(np.arange(0,110,10))
plt.xlim([0,100])
plt.ylabel('')
plt.tight_layout()
- Regions are broken down according to the regional accreditation guidelines.
- 1 - New York, New Jersey, Pennslyvania, Delware, Maryland, Washington DC, Connecticut, Maine,
Massachusettes, New Hampshire, Rhode Island, Vermont, Virginia
- 2 - Michigan, Indiana, Ohio, West Virigina, Kentukcy, Tennessee, North Carolina, South Carolina,
Alabama, Mississippi, Georgia, Florida
- 3 - Illinois, Missouri, Arkansas, Louisiana, Oklahoma, Kansas, Texas
- 4 - Montana, Idaho, Wyoming, North Dakota, South Dakota, Nebraska, Iowa, Minnesota, Wisconsin
- 5 - Oregan, Washington, California, Nevada, Utah, Arizona, Colorado, New Mexico, Alaska, Hawaii
"""Categorize schools into regions"""
from my_functions.extra_functions import region_mapper_nacep
hs['region_nacep'] = hs['LEA_STATE'].apply(lambda x: region_mapper_nacep(x))
de_by_region_nacep = flag_grouper(hs, 'region_nacep', 'DE', True)
ap_by_region_nacep = flag_grouper(hs, 'region_nacep', 'AP')
hs_by_region_nacep = pd.merge(de_by_region_nacep, ap_by_region_nacep, on = '# of HS Schools')
hs_by_region_nacep = hs_by_region_nacep.set_index('region_nacep')
hs_by_region_nacep.index.names = ['Regions']
hs_by_region_nacep
fig, (ax1,ax2) = plt.subplots(1,2, figsize = (10,6))
plt.sca(ax1)
hs_by_region_nacep.plot.barh(y = 'DE Offering Rate', ax=ax1)
plt.title('Percentage of Schools Offering DE\n by Region (NACEP)')
plt.yticks(rotation = 0)
plt.legend([])
plt.xticks(np.arange(0,110,10))
plt.xlim([0,100])
plt.ylabel('')
plt.sca(ax2)
hs_by_region_nacep.plot.barh(y = 'AP Offering Rate', ax=ax2)
plt.title('Percentage of Schools Offering AP\n by Region (NACEP)')
plt.yticks(rotation = 0)
plt.legend([])
plt.xticks(np.arange(0,110,10))
plt.xlim([0,100])
plt.ylabel('')
plt.tight_layout()