The basis for this entire project relies on the accuracy of establishing a dataset that includes only 'Traditional High Schools.' The definition we have chosen to use centers around schools that contain 11th or 12th grades, non-special education, non-juvenile justice, non-alternative schools; however, other filters include removing virtual schools, adult schools, schools without a matching National Center for Education Statistics (NCES) identifier (some excepetions). Note that the dataset used herein inlcudes both the 2015-2016 CRDC school information, but also information gathered in the 2015-2016 NCES National Survey. Below is the official filtration procedure:
- Join the uncompiled 2015-2016 NCES dataset into one dataset
- Join the compiled NCES dataset with the 2015-2016 CRDC dataset
- Filter Out (Dataset Attribute in Parentheses)
- Special Education, Alternative, Juvenile Justice Schools (CRDC)
- Schools without 11th or 12 Grade (CRDC)
- Virtual Schools (NCES)
- Schools reported as 'Elementary', 'Middle', or 'Other' (NCES)
- Special Education, Alternative/Other, and "Adult" Schools (NCES)
- Recover Some Schools that did not have matching NCES identifiers
- Join recovered schools with dataset
- Remove any remaining schools that did not match
from IPython.display import HTML
HTML('''<script>
code_show=true;
function code_toggle() {
if (code_show){
$('div.input').hide();
} else {
$('div.input').show();
}
code_show = !code_show
}
$( document ).ready(code_toggle);
</script>
<form action="javascript:code_toggle()"><input type="submit" value="Click here to toggle on/off the raw code."></form>''')
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from my_functions import combokey_converter
%matplotlib inline
sns.set_style('whitegrid')
plt.rc('axes', titlesize = 14, titleweight = 'bold', labelweight = 'bold')
Used combokey_convert.converter to create a csv-compatible "COMBOKEY"
crdc_1516 = pd.read_csv('../filtered_data/00_crdc_1516_initial.csv',
dtype = {'LEAID':np.object})
crdc_1516['COMBOKEY'] = combokey_converter.convert(crdc_1516, 'LEAID', 'SCHID')
The nces_1516 Data was recorded in separate files (each with different numbers of schools), so they are joined to avoid corruption/loss of data.
Files
1. Characteristics
2. Directory
3. Geographic
Like the crdc data, the combokey field was generated using my combokey_converter.convert function.
nces_1516_characteristics = pd.read_csv('../filtered_data/01_nces_1516_initial_school_characteristics.csv')
nces_1516_characteristics['combokey'] = combokey_converter.convert(nces_1516_characteristics, 'LEAID', 'SCHID')
nces_1516_directory = pd.read_csv('../filtered_data/01_nces_1516_initial_school_directory.csv')
nces_1516_directory['combokey'] = combokey_converter.convert(nces_1516_directory, 'LEAID', 'SCHID')
First Join: Directory + Characteristics
nces_1516 = nces_1516_characteristics.set_index('combokey').join(nces_1516_directory.set_index('combokey'),
how = 'inner', lsuffix = 'dir_')
str(len(nces_1516.index)) + ' Schools Matched.'
nces_1516 = nces_1516.drop(['LEAIDdir_', 'SCHIDdir_', 'SCH_NAMEdir_'], axis = 1)
Second Join: combined + geo
nces_1516_geo = pd.read_csv('../filtered_data/01_nces_1516_initial_geographic.csv', dtype = {'LOCALE15': np.object})
nces_1516_geo['combokey'] = combokey_converter.convert(nces_1516_geo, 'LEAID', 'SCHID')
nces_1516_test = nces_1516.join(nces_1516_geo.set_index('combokey'), how = 'inner', rsuffix = 'dir_')
nces_1516_full = nces_1516_test.drop(['LEAIDdir_', 'SCHIDdir_', 'NAME'], axis = 1)
str(len(nces_1516_full.index)) + ' Schools Matched'
# nces_1516_full.to_csv('../filtered_data/01_nces_1516_initial_combined_ccd.csv')
crdc_nces1516_test = crdc_1516.set_index('COMBOKEY').join(nces_1516_full, how = 'left', rsuffix=('_'))
str(crdc_nces1516_test[crdc_nces1516_test.SCH_NAME_.isnull()].LEAID.count()) + ' Schools Did Not Have Matching NCES Identifiers'
crdc_nces_1516 = crdc_nces1516_test.drop(['LEA_NAME_', 'LEAID_', 'SCHID_', 'SCH_NAME_'], axis = 1)
str(len(crdc_nces_1516.index)) + ' Total Schools in the Combined DataFrame'
crdc_nces_1516 = crdc_nces_1516.fillna('Missing')
# crdc_nces_1516.to_csv('../filtered_data/03_crdc_nces_1516_raw_combined.csv')
filter1_crdc_nces_1516 = crdc_nces_1516.copy()
from my_functions.extra_functions import students_in_11_or_12
filter1_crdc_nces_1516['Students_in_11_12'] = filter1_crdc_nces_1516.apply(lambda row: students_in_11_or_12(row['SCH_GRADE_G11'], row['SCH_GRADE_G12']), axis = 1)
filtered_out = filter1_crdc_nces_1516[(filter1_crdc_nces_1516.Students_in_11_12 == 'No')]
filter1_crdc_nces_1516 = filter1_crdc_nces_1516[(filter1_crdc_nces_1516.Students_in_11_12 == 'Yes')]
- Schools that answered 'No' to each of those three questions on the CRDC Survey.
- I also used a keyword filter to remove any remaining "Juvenile Justice"-eque Institutions.
filter2_crdc_nces_1516 = filter1_crdc_nces_1516.copy()
filtered_out = filtered_out.append(filter2_crdc_nces_1516[(filter2_crdc_nces_1516.JJ == 'Yes') |
(filter2_crdc_nces_1516.SCH_STATUS_ALT == 'Yes') |
(filter2_crdc_nces_1516.SCH_STATUS_SPED == 'Yes')])
filter2_crdc_nces_1516 = filter2_crdc_nces_1516[(filter2_crdc_nces_1516.JJ == 'No') &
(filter2_crdc_nces_1516.SCH_STATUS_ALT == 'No') &
(filter2_crdc_nces_1516.SCH_STATUS_SPED == 'No')]
def jj_keyword_remove(name):
kws = ['behavioral', 'juvenile', 'correction']
for kw in kws:
if kw in name.strip().lower():
return False
return True
filter2_crdc_nces_1516 = filter2_crdc_nces_1516[filter2_crdc_nces_1516.SCH_NAME.apply(lambda x: jj_keyword_remove(x))]
filter2_crdc_nces_1516 = filter2_crdc_nces_1516[filter2_crdc_nces_1516.LEA_NAME.apply(lambda x: jj_keyword_remove(x))]
- Remove any Schools that reported 'Yes' to the Virtual Schools Question<br>
- Remove Schools that have certain keyword that likely indicate an online school
filter3_crdc_nces_1516 = filter2_crdc_nces_1516.copy()
filtered_out = filtered_out.append(filter3_crdc_nces_1516[filter3_crdc_nces_1516.VIRTUAL == 'Yes'])
filter3_crdc_nces_1516 = filter3_crdc_nces_1516[filter3_crdc_nces_1516.VIRTUAL != 'Yes']
def any_missed_virtuals(name):
kws = ['virtual', 'cyber', 'electronic', 'internet', 'online', 'distance']
for kw in kws:
if kw in name.strip().lower():
return False
return True
filtered_out = filtered_out.append(filter3_crdc_nces_1516[~filter3_crdc_nces_1516.SCH_NAME.apply(lambda x: any_missed_virtuals(x))])
filter3_crdc_nces_1516 = filter3_crdc_nces_1516[filter3_crdc_nces_1516.SCH_NAME.apply(lambda x: any_missed_virtuals(x))]
Even with the Lowest/Highest Grade filter, I wanted to ensure that no non-typical high schools (as reported by the NCES's LEVEL Field) are retained.
filter4_crdc_nces_1516 = filter3_crdc_nces_1516.copy()
filtered_out = filtered_out.append(filter4_crdc_nces_1516[(filter4_crdc_nces_1516.LEVEL == 'N') |
(filter4_crdc_nces_1516.LEVEL == '1') |
(filter4_crdc_nces_1516.LEVEL == '2')])
filter4_crdc_nces_1516 = filter4_crdc_nces_1516[(filter4_crdc_nces_1516.LEVEL == 'Missing') |
(filter4_crdc_nces_1516.LEVEL == '3') |
(filter4_crdc_nces_1516.LEVEL == '4')]
Removed Schools with a SCH_TYPE that was not 1 (Regular) or 3 (Vocational). Culls additional "Special Education", and "Alternative/Other" schools.
filter5_crdc_nces_1516 = filter4_crdc_nces_1516.copy()
filtered_out = filtered_out.append(filter5_crdc_nces_1516[(filter5_crdc_nces_1516.SCH_TYPE == 2) |
(filter5_crdc_nces_1516.SCH_TYPE == 4)])
filter5_crdc_nces_1516 = filter5_crdc_nces_1516[(filter5_crdc_nces_1516.SCH_TYPE == 'Missing') |
(filter5_crdc_nces_1516.SCH_TYPE == 1) |
(filter5_crdc_nces_1516.SCH_TYPE == 3)]
Mini-Filter: Remove schools with 'adult' in the Name (CRDC)
filtered_out = filtered_out.append(filter5_crdc_nces_1516[filter5_crdc_nces_1516.SCH_NAME.str.contains('adult', case=False)])
filter5_crdc_nces_1516 = filter5_crdc_nces_1516[~filter5_crdc_nces_1516.SCH_NAME.str.contains('adult', case=False)]
With nearly 1200 schools missing NCES data, including schools from prominent districts like "NEW YORK CITY PUBLIC SCHOOLS" and "Green Dot Public Schools," it is important to try to recover as much of these schools as possible.
The problem that I found was that the CRDC lumped a number of school districts together; therefore, the combokeys of schools in these districts do not match those of the NCES.
I tried a number of methods to try to properly join these missing schools:
- Using only the school name: This had difficulties because there are many schools that share the same name, so
when a join is implemented, these schools are given all of the values of the other schools (i.e. it creates a
lot of duplicate values).
- Using the NCES data from 2013: This was also problematic, as most of the same schools that were missing in
this dataset were also constrained to the same problem in the 2013-2014 dataset.
- Using the District and the name together: This also suffered from the fact that the CRDC data combines some
school districts; therefore, the names of the districts still did not match up.
- **Finally, I used a combination of the name of the school and the state: There were only a handfull in the
dataset containing duplicate names (which were removed from the Dataset).**<br><br>
Next, I recovered the remaining schools in the 'New York City Public Schools District', because it was clear that they were simply missing due to a LEA reporting error in the CRDC data. This process was two-parted:
- First, Because it seemed as though most of these remaining New York schools had the incorrect LEAID,
I used the the school id and state abreviation to create a unique identifier.
- Second, I used the NCES database to manually search for the remaining schools correct their combokey
Finally, I performed the same (nces-provided field)-filtration steps on the recovered data. Then, I hand-removed duplicate values by checking the original filtered data for matching records.
filter5_missing_leas = filter5_crdc_nces_1516[filter5_crdc_nces_1516.LEVEL == 'Missing'].groupby('LEA_NAME')['LEAID'].count().sort_values(ascending = False)
# filter5_missing_leas.to_csv('../filtered_data/04_inital_filter_missing_LEAs.csv')
"""How many missing schools?"""
filter5_missing_schools = filter5_crdc_nces_1516[filter5_crdc_nces_1516.LEVEL == 'Missing']
len(filter5_missing_schools.index);
# filter5_missing_schools.to_csv('../filtered_data/04_intital_filter_missing_schools.csv')
Manipulate missing schools and original nces data --> join
filter5_schname_state = filter5_missing_schools.copy()
filter5_schname_state = filter5_schname_state.reset_index()
filter5_schname_state['SCH_NAME'] = filter5_schname_state['SCH_NAME'].apply(lambda x: x.lower())
filter5_schname_state['SCH_NAME_ST_NUM'] = filter5_schname_state.SCH_NAME + filter5_schname_state.LEA_STATE
"""How many duplicate schools in the filter5 dataset?"""
filter5_schname_state.groupby('SCH_NAME_ST_NUM')['SCH_NAME_ST_NUM'].count().sort_values(ascending = False).head(10);
filter5_schname_state[filter5_schname_state.SCH_NAME_ST_NUM == 'performance learning centerGA']
nces_1516_schname_state = nces_1516_full.copy()
nces_1516_schname_state = nces_1516_schname_state.reset_index()
nces_1516_schname_state['SCH_NAME'] = nces_1516_schname_state['SCH_NAME'].apply(lambda x: x.lower())
nces_1516_schname_state['SCH_NAME_ST_NUM'] = nces_1516_schname_state.SCH_NAME + nces_1516_schname_state.STABR
"""Join the NCES and filter5 datasets on the SCH_NAME_ST_NUM column"""
schname_combined = filter5_schname_state.set_index('SCH_NAME_ST_NUM').join(nces_1516_schname_state.set_index('SCH_NAME_ST_NUM'), how = 'left', rsuffix = '_')
"""How many schools have duplicated values?"""
schname_combined.SCH_NAME_.value_counts().sort_values(ascending = False).head(10);
"""How may more schools were matched?"""
len(schname_combined[schname_combined.SCH_NAME_.notnull()].index)
"""How many schools still did not have a match?"""
len(schname_combined[schname_combined.SCH_NAME_.isnull()].index)
schname_combined_missing = schname_combined.copy()
schname_combined_missing = schname_combined_missing[schname_combined_missing.SCH_NAME_.isnull()]
schname_combined_missing_ny = schname_combined_missing.copy()
schname_combined_missing_ny = schname_combined_missing_ny[schname_combined_missing_ny['LEA_NAME'] == 'NEW YORK CITY PUBLIC SCHOOLS']
print(len(schname_combined_missing_ny.index))
print(schname_combined_missing_ny.SCHID.nunique())
schname_combined_missing_ny = schname_combined_missing_ny.drop(['TITLEI_', 'STABR_', 'SCH_TYPE_TEXT_', 'SCH_TYPE_',
'LEVEL_', 'VIRTUAL_', 'GSLO_', 'GSHI_',
'NMCNTY15_', 'LOCALE15_', 'LAT1516_', 'LON1516_', 'combokey',
'LEAID_', 'LEA_NAME_', 'SCH_NAME_', 'SCHID_'], axis = 1)
def schid_state_maker(schid, state):
schid = str(schid).zfill(5)
return schid + state
schname_combined_missing_ny['schid_state'] = schname_combined_missing_ny.apply(lambda row: schid_state_maker(row['SCHID'], row['LEA_STATE']), axis = 1)
nces_for_missing_ny = nces_1516_full.copy()
nces_for_missing_ny['schid_state'] = nces_for_missing_ny.apply(lambda row: schid_state_maker(row['SCHID'], row['STABR']), axis = 1)
missing_ny_joined = schname_combined_missing_ny.set_index('schid_state').join(nces_for_missing_ny.reset_index().set_index('schid_state'), how = 'left', rsuffix = "_")
""" Join the missing NY schools with NCES """
missing_ny_joined[missing_ny_joined.LEVEL_.notnull()][['SCH_NAME','SCH_NAME_']]
""" Dealing with remaining missing NY Schools """
missing_ny_2 = missing_ny_joined.copy()
missing_ny_2 = missing_ny_2[missing_ny_2.LEVEL_.isnull()]
missing_ny_2 = missing_ny_2.drop(['TITLEI_', 'STABR_', 'SCH_TYPE_TEXT_', 'SCH_TYPE_',
'LEVEL_', 'VIRTUAL_', 'GSLO_', 'GSHI_',
'NMCNTY15_', 'LOCALE15_', 'LAT1516_', 'LON1516_', 'combokey',
'LEAID_', 'LEA_NAME_', 'SCH_NAME_', 'SCHID_'], axis = 1)
"""Recovering the missing Combokeys using the NCES website and manually filling the values in"""
missing_ny_2['actual_combokey'] = pd.Series(np.resize(0, len(missing_ny_2.index)), dtype = np.object)
missing_ny_2.at["99874NY", 'actual_combokey'] = "='360007706372'"
missing_ny_2.at["99933NY", 'actual_combokey'] = "='360008106380'"
missing_ny_2.at["99968NY", 'actual_combokey'] = "='360007606296'"
missing_ny_2.at["99992NY", 'actual_combokey'] = "='360009706274'"
missing_ny_2.at["99995NY", 'actual_combokey'] = "='360009506273'"
""" Join again on the NCES """
missing_ny_2_joined = missing_ny_2.set_index('actual_combokey').join(nces_1516_full, how = 'left', rsuffix = '_')
"""How many matched?"""
len(missing_ny_2_joined[missing_ny_2_joined.LEVEL_.notnull()].index)
Concatenate the two recovered Missing NY Schools sets
missing_ny_joined_matching = missing_ny_joined[missing_ny_joined.LEVEL_.notnull()]
all_missing_ny_recovered = missing_ny_2_joined.append(missing_ny_joined_matching)
Join the original recovered schools (using schname_st identifier) with the recovered NY schools
recovered_schools = schname_combined.copy()
recovered_schools = recovered_schools.fillna("Missing")
recovered_schools = recovered_schools[recovered_schools['SCH_NAME_'] != "Missing"]
recovered_schools_all = recovered_schools.append(all_missing_ny_recovered)
Reformat the Columns -- Need to make sure that the recovered schools dataset's columns match the original filtered dataset's columns (required for concatenating the two sets properly)
"""Drop original nces columns (the ones with missing values)"""
recovered_schools_all = recovered_schools_all.drop(['TITLEI', 'STABR', 'SCH_TYPE_TEXT', 'SCH_TYPE', 'LEVEL', 'VIRTUAL', 'GSLO', 'GSHI',
'NMCNTY15', 'LOCALE15', 'LAT1516', 'LON1516', 'combokey',
'LEAID_', 'LEA_NAME_', 'SCH_NAME_', 'SCHID_'], axis = 1)
"""Rename new matching columns to replace the columns above (necessary for a proper concatenation later)"""
recovered_schools_all = recovered_schools_all.rename(lambda x: x.strip('_'), axis = 'columns')
recovered_schools_all = recovered_schools_all.set_index('COMBOKEY')
"""Do the columns between the original filtered set and recovered missing values set match"""
print(len(recovered_schools_all.columns.values))
print(len(filter5_crdc_nces_1516.columns.values))
""" How many schools recovered? """
len(recovered_schools_all.index)
NCES-Reported High Schools
recovered_schools_filter1 = recovered_schools_all.copy()
filtered_out = filtered_out.append(recovered_schools_filter1[(recovered_schools_filter1.LEVEL == '1') |
(recovered_schools_filter1.LEVEL == '2') |
(recovered_schools_filter1.LEVEL == 'N')])
recovered_schools_filter1 = recovered_schools_filter1[(recovered_schools_filter1.LEVEL == '3') |
(recovered_schools_filter1.LEVEL == '4')]
Non-Virtual Schools
recovered_schools_filter2 = recovered_schools_filter1.copy()
filtered_out = filtered_out.append(recovered_schools_filter2[recovered_schools_filter1.VIRTUAL == 'Yes'])
recovered_schools_filter2 = recovered_schools_filter2[recovered_schools_filter2.VIRTUAL != 'Yes']
NCES-Reported Regular / Vocational
recovered_schools_filter3 = recovered_schools_filter2.copy()
filtered_out = filtered_out.append(recovered_schools_filter3[(recovered_schools_filter3.SCH_TYPE == 2) |
(recovered_schools_filter3.SCH_TYPE == 4)])
recovered_schools_filter3 = recovered_schools_filter3[(recovered_schools_filter3.SCH_TYPE == 1) |\
(recovered_schools_filter3.SCH_TYPE == 3)]
Remove Schools with 'Adult' in the Name
filtered_out = filtered_out.append(recovered_schools_filter3[recovered_schools_filter3.SCH_NAME.str.contains('Adult', case=False)])
recovered_schools_filter3 = recovered_schools_filter3[~recovered_schools_filter3.SCH_NAME.str.contains('Adult', case=False)]
Clean Duplicate Values
recovered_schools_filter3.groupby('SCH_NAME')['SCH_NAME'].count().sort_values(ascending = False).head(5)
"""NOTE: the community collaborative charter duplication appears to be legit (two campuses of the same school?)"""
"""Beacon High School in Dutchess County is already in the filter5 dataset -- Remove"""
recovered_schools_filter4 = recovered_schools_filter3.copy()
recovered_schools_filter4 = recovered_schools_filter4[(recovered_schools_filter4.SCH_NAME != 'beacon high school') | (recovered_schools_filter4.NMCNTY15 != 'Dutchess County')]
"""Both of the performance learning centers here actually matched to a different 'performance learning center' record;
therefore, they should both be removed"""
recovered_schools_filter4 = recovered_schools_filter4[recovered_schools_filter4.SCH_NAME != 'performance learning center']
"""The University High in Irvine was already accounted for; therefore, needs to be removed from the recovered"""
recovered_schools_filter4 = recovered_schools_filter4[(recovered_schools_filter4.SCH_NAME != 'university high') | (recovered_schools_filter4.NMCNTY15 != 'Orange County')]
"""'How many final recovered values?'"""
str(len(recovered_schools_filter4.index)) + ' Recovered High Schools'
Finally, I concatenated the recovered high schools with the original filtered set.
I ensured that no duplicate values were added in the process.
"""Remove the missing values"""
filter6_crdc_nces_1516 = filter5_crdc_nces_1516.copy()
filter6_crdc_nces_1516 = filter6_crdc_nces_1516[filter6_crdc_nces_1516.LEVEL != "Missing"]
"""How many initial Duplicates?
Interesting enough, these duplicates appear to legitimate; the problem seems to be that the schools actually have
different names (e.g. "The ADAIR Co. High"'s are actually supposed to be labeled ADAIR Co. R-I High and ADAIR Co. R-II BRASHEAR)"""
filter6_crdc_nces_1516.groupby(['STABR','SCH_NAME','NMCNTY15'])['SCH_NAME'].count().sort_values(ascending=False).head()
"""Any dulications in the recovered schools?
The community collaborative charter schools are two different schools."""
recovered_schools_filter4.groupby(['STABR','SCH_NAME','NMCNTY15'])['SCH_NAME'].count().sort_values(ascending=False).head()
# filtered_and_recovered = pd.concat([filter6_crdc_nces_1516, recovered_schools_filter4])
filtered_and_recovered = filter6_crdc_nces_1516.append(recovered_schools_filter4)
"""Do the numbers of columns match?"""
print(len(filter6_crdc_nces_1516.columns.values))
len(filtered_and_recovered.columns.values)
"""Because Columns are stored as dictionaries, there is no inherent order to the columns -- Pandas automatically
uses an alphabetical sort on an append/concatenation. I reorded the columns to show the SCH Name first"""
schName = ['SCH_NAME']
reorder = schName + [c for c in filtered_and_recovered.columns if c not in schName]
filtered_and_recovered = filtered_and_recovered[reorder]
"""No added duplicate records"""
filtered_and_recovered.groupby(['STABR','SCH_NAME','NMCNTY15'])['SCH_NAME'].count().sort_values(ascending=False).head(6)
# filtered_and_recovered.to_csv('../filtered_data/04_filter_final.csv')
final_missing = schname_combined[(schname_combined.SCH_NAME_.isnull()) & (schname_combined.LEA_NAME != 'NEW YORK CITY PUBLIC SCHOOLS')]
""" How many final missing schools? """
len(final_missing.index)
# final_missing.to_csv('../filtered_data/04_final_missing_NCES.csv')
""" Top remaining unaccounted districts """
final_missing.groupby('LEA_NAME')['LEAID'].count().sort_values(ascending = False).head(10)
filtered_out = filtered_out.append(final_missing)
# filtered_out.to_csv('../filtered_data/04_filtered_out_schools.csv')
from my_functions.extra_functions import (hs_enrollment_averager, missing_value_mapper,
add_enrollment_columns, enrollment_summary)
"""Adding Enrollment Columns to the Filtered Datasets"""
crdc_1516_enroll = add_enrollment_columns(crdc_1516)
filter_1_enroll = add_enrollment_columns(filter1_crdc_nces_1516)
filter_2_enroll = add_enrollment_columns(filter2_crdc_nces_1516)
filter_3_enroll = add_enrollment_columns(filter3_crdc_nces_1516)
filter_4_enroll = add_enrollment_columns(filter4_crdc_nces_1516)
filter_5_enroll = add_enrollment_columns(filter5_crdc_nces_1516)
filtered_and_recovered_enroll = add_enrollment_columns(filtered_and_recovered)
"""Set up the enrollment summary DataFrames"""
tot_enr = enrollment_summary(crdc_1516_enroll, 'Total CRDC Schools')
filter1_enr = enrollment_summary(filter_1_enroll, 'Remove Schools without 11th or 12th Grade (CRDC)')
filter2_enr = enrollment_summary(filter_2_enroll, 'Remove Special-Education, Alternative, and Juvenile Justice Schools (CRDC)')
filter3_enr = enrollment_summary(filter_3_enroll, 'Remove Virtual Schools (NCES)')
filter4_enr = enrollment_summary(filter_4_enroll, 'Remove Elementary, Middle, and "N" Schools (NCES)')
filter5_enr = enrollment_summary(filter_5_enroll, 'Remove Special Education, Alternative/Other, and "Adult" Schools (NCES)')
filter_final_enr = enrollment_summary(filtered_and_recovered_enroll, 'Remove the Non-Matching NCES Schools (Final Total)')
"""Compile enrollment summary DataFrames"""
pd.concat([tot_enr, filter1_enr, filter2_enr, filter3_enr, filter4_enr, filter5_enr, filter_final_enr])