#Clone my git repository
!git clone https://github.com/aidanhussain/cmps3160project.git
%cd cmps3160project

fatal: destination path 'cmps3160project' already exists and is not an empty directory.
/content/drive/MyDrive/Colab Notebooks/cmps3160project


#Import all the libraries we'll need
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
import scipy.stats as stats
import numpy as np
import statsmodels.api as sm


us_populations_raw = pd.read_csv("us_pop_by_state.csv")
us_populations_raw.dtypes #Check dtypes of each column

rank                float64
state                object
state_code           object
2020_census           int64
percent_of_total    float64
dtype: object


us_populations_raw.head(10)


us_populations_tidy = us_populations_raw.copy()
us_populations_tidy = us_populations_tidy[:-1] #Drop total row
us_populations_tidy = us_populations_tidy.rename({'2020_census': 'Population'}, axis=1)
us_populations_tidy = us_populations_tidy.set_index('state_code')
us_populations_tidy.at['DC', 'state'] = 'District of Columbia' #Rename Washington D.C. entry
us_populations_tidy = us_populations_tidy.reset_index()
us_populations_tidy.head(10)


mass_shootings_raw = pd.read_csv("Mass shooting data.csv")
mass_shootings_raw.dtypes #Check dtypes of each column; 'Incident Date' needs to be converted to datetime

mass_shootings_raw['Incident Date'] = pd.to_datetime(mass_shootings_raw['Incident Date']) #Convert 'Incident Date' to datetime

mass_shootings_raw.dtypes #Check dtypes of each column

Incident ID                int64
Incident Date     datetime64[ns]
State                     object
City Or County            object
Address                   object
# Killed                   int64
# Injured                  int64
Operations               float64
dtype: object


mass_shootings_raw.head(10)


#Sum of # Killed by State
mass_shootings_raw.groupby(['State'])['# Killed'].sum().sort_values(ascending=False).head(10)

State
Texas             229
California        214
Illinois          152
Florida           134
Louisiana          84
Pennsylvania       83
Missouri           83
Ohio               79
North Carolina     77
Nevada             72
Name: # Killed, dtype: int64


#Sum of # Killed + # Injured by State
mass_shootings_raw['# Killed or Injured'] = mass_shootings_raw['# Killed'] + mass_shootings_raw['# Injured']
mass_shootings_raw.groupby(['State'])['# Killed or Injured'].sum().sort_values(ascending=False).head(10)

State
Illinois        1069
California       969
Texas            754
Florida          649
Nevada           561
Pennsylvania     495
Louisiana        463
Ohio             424
New York         410
Missouri         344
Name: # Killed or Injured, dtype: int64


#Includes all shootings
mass_shootings_clean = mass_shootings_raw.copy()
mass_shootings_clean = mass_shootings_clean[(mass_shootings_clean['Incident Date'] > '2016-11-08') & (mass_shootings_clean['Incident Date'] < '2020-11-03')] #Filter the DF to the correct dates
mass_shootings_clean = mass_shootings_clean[['State','# Killed','# Injured','# Killed or Injured']]
mass_shootings_clean = mass_shootings_clean.groupby('State').sum()
mass_shootings_clean.head(10)


#Includes shooting with 8+ deaths
mass_shootings_clean_minimim8 = mass_shootings_raw.copy()
mass_shootings_clean_minimim8 = mass_shootings_clean_minimim8[(mass_shootings_clean_minimim8['Incident Date'] > '2016-11-08') & (mass_shootings_clean_minimim8['Incident Date'] < '2020-11-03')] #Filter the DF to the correct dates
mass_shootings_clean_minimim8 = mass_shootings_clean_minimim8[mass_shootings_clean_minimim8['# Killed'] >= 8]
mass_shootings_clean_minimim8 = mass_shootings_clean_minimim8[['State','# Killed','# Injured','# Killed or Injured']]
mass_shootings_clean_minimim8 = mass_shootings_clean_minimim8.groupby('State').sum()
mass_shootings_clean_minimim8.head(10)


#Includes all shootings
mass_shootings_clean_normalized = mass_shootings_clean.merge(us_populations_tidy,how = 'left', left_on='State', right_on = 'state')
mass_shootings_clean_normalized['# Killed per Capita'] = mass_shootings_clean_normalized['# Killed'] / mass_shootings_clean_normalized['Population']
mass_shootings_clean_normalized['# Injured per Capita'] = mass_shootings_clean_normalized['# Injured'] / mass_shootings_clean_normalized['Population']
mass_shootings_clean_normalized['# Killed or Injured per Capita'] = mass_shootings_clean_normalized['# Killed or Injured'] / mass_shootings_clean_normalized['Population']
mass_shootings_clean_normalized = mass_shootings_clean_normalized[['state','# Killed per Capita','# Injured per Capita','# Killed or Injured per Capita']]
mass_shootings_clean_normalized.head(10)


#Only includes shootings with 8+ deaths
mass_shootings_clean_normalized_minimum8 = mass_shootings_clean_minimim8.merge(us_populations_tidy,how = 'left', left_on='State', right_on = 'state')
mass_shootings_clean_normalized_minimum8['# Killed per Capita'] = mass_shootings_clean_normalized_minimum8['# Killed'] / mass_shootings_clean_normalized_minimum8['Population']
mass_shootings_clean_normalized_minimum8['# Injured per Capita'] = mass_shootings_clean_normalized_minimum8['# Injured'] / mass_shootings_clean_normalized_minimum8['Population']
mass_shootings_clean_normalized_minimum8['# Killed or Injured per Capita'] = mass_shootings_clean_normalized_minimum8['# Killed or Injured'] / mass_shootings_clean_normalized_minimum8['Population']
mass_shootings_clean_normalized_minimum8 = mass_shootings_clean_normalized_minimum8[['state','# Killed per Capita','# Injured per Capita','# Killed or Injured per Capita']]
mass_shootings_clean_normalized_minimum8.head(10)


election_results_2016_raw = pd.read_csv("2016_election_results.csv")
election_results_2016_raw.dtypes #Check dtypes of each column; need to remove extraneous column...

election_results_2016_raw = election_results_2016_raw.iloc[:,0:3] #Tidy up data
election_results_2016_raw.head(10)


election_results_2016_tidy = election_results_2016_raw.copy()
election_results_2016_tidy['Democrat Proportion'] = election_results_2016_tidy['Hillary Clinton'] / (election_results_2016_tidy['Hillary Clinton'] + election_results_2016_tidy['Donald Trump'])
election_results_2016_tidy['Republican Proportion'] = election_results_2016_tidy['Donald Trump'] / (election_results_2016_tidy['Hillary Clinton'] + election_results_2016_tidy['Donald Trump'])
election_results_2016_tidy = election_results_2016_tidy.drop(['Hillary Clinton','Donald Trump'], axis = 1)

election_results_2016_tidy.sort_values(by='State').head(10)


election_results_2020_raw = pd.read_csv("democratic_vs_republican_votes_by_usa_state_2020.csv")
election_results_2020_raw.dtypes #Check dtypes of each column

election_results_2020_raw.head(10)


election_results_2020_tidy = election_results_2020_raw.copy()
election_results_2020_tidy['Democrat Proportion'] = election_results_2020_tidy['DEM'] / (election_results_2020_tidy['DEM'] + election_results_2020_tidy['REP'])
election_results_2020_tidy['Republican Proportion'] = election_results_2020_tidy['REP'] / (election_results_2020_tidy['DEM'] + election_results_2020_tidy['REP'])
election_results_2020_tidy = election_results_2020_tidy.drop(['DEM','REP','usa_state','usa_state_code','percent_democrat'], axis = 1)

election_results_2020_tidy.sort_values(by='state').head(10)


#Let's visualize the states with the largest population
graph_vals = us_populations_tidy.copy()
graph_vals = graph_vals[['state','Population']]
graph_vals = graph_vals.sort_values(by='Population', ascending = False)
graph_vals['Population'] = graph_vals['Population'] / 1e6
graph_vals = graph_vals.set_index('state').head(10).iloc[0:]

graph_vals.plot.bar()
plt.ylabel('Population (in millions)')
plt.xlabel('State')
plt.title('Top 10 US States by Population')
plt.xticks(rotation=45)
plt.show()


#Let's visualize the # of gun deaths per quarter
graph_vals = mass_shootings_raw.copy()
graph_vals = graph_vals[['Incident Date','# Killed']]
graph_vals['Incident Month'] = graph_vals['Incident Date'].dt.to_period('Q')
graph_vals = graph_vals.groupby('Incident Month').sum()

graph_vals.plot.bar()

plt.ylabel('# Killed')
plt.xlabel('Time Period')
plt.title('Mass Shooting Deaths by Quarter')
plt.xticks(rotation=45)
plt.show()

<ipython-input-188-966e79a0d50b>:5: FutureWarning: The default value of numeric_only in DataFrameGroupBy.sum is deprecated. In a future version, numeric_only will default to False. Either specify numeric_only or select only columns which should be valid for the function.
  graph_vals = graph_vals.groupby('Incident Month').sum()


graph_vals.mean()/3 #Calculate average number of mass shooting deaths per month

# Killed    35.033333
dtype: float64


import matplotlib.pyplot as plt
import pandas as pd

graph_vals = mass_shootings_clean_normalized.copy()
graph_vals = graph_vals[['state','# Killed per Capita']]
graph_vals.set_index('state', inplace=True)
graph_vals = graph_vals.sort_values(by='# Killed per Capita', ascending = False)
graph_vals.plot(kind='bar', figsize=(15, 7))

plt.title('Mass Shootings Deaths per Capita by State')
plt.xlabel('State')
plt.ylabel('# Killed per Capita')

plt.legend(title='Metrics', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()


graph_vals = mass_shootings_clean_normalized_minimum8.copy()
graph_vals = graph_vals[['state','# Killed per Capita']]
graph_vals.set_index('state', inplace=True)
graph_vals = graph_vals.sort_values(by='# Killed per Capita', ascending = False)
graph_vals.plot(kind='bar', figsize=(15, 7))

plt.title('Mass Shootings Deaths per Capita by State, Minimum 8 Deaths')
plt.xlabel('State')
plt.ylabel('# Killed per Capita')

plt.legend(title='Metrics', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()


# Sort by Democrat voting proportion
election_results_2016_tidy.sort_values(by='Democrat Proportion', ascending=False, inplace=True)

# Create a stacked bar chart
plt.figure(figsize=(15, 8))
plt.bar(election_results_2016_tidy['State'], election_results_2016_tidy['Democrat Proportion'], label='Democrat Vote Proportion', color='blue')
plt.bar(election_results_2016_tidy['State'], election_results_2016_tidy['Republican Proportion'], bottom=election_results_2016_tidy['Democrat Proportion'], label='Republican Vote Proportion', color='red')
plt.xlabel('State')
plt.ylabel('Vote Proportion')
plt.title('2016 Presidential Election Results by State')
plt.xticks(rotation=90)
plt.legend()
plt.show()


# Sort by Democrat voting proportion
election_results_2020_tidy.sort_values(by='Democrat Proportion', ascending=False, inplace=True)

# Create a stacked bar chart
plt.figure(figsize=(15, 8))
plt.bar(election_results_2020_tidy['state'], election_results_2020_tidy['Democrat Proportion'], label='Democrat Vote Proportion', color='blue')
plt.bar(election_results_2020_tidy['state'], election_results_2020_tidy['Republican Proportion'], bottom=election_results_2020_tidy['Democrat Proportion'], label='Republican Vote Proportion', color='red')
plt.xlabel('State')
plt.ylabel('Vote Proportion')
plt.title('2020 Presidential Election Results by State')
plt.xticks(rotation=90)
plt.legend()
plt.show()


election_results_df = election_results_2016_tidy.merge(election_results_2020_tidy, how = 'outer', left_on='State', right_on = 'state', suffixes=('_2016', '_2020'))
election_results_df = election_results_df.drop(['state'], axis=1)
election_results_df.head(10)


election_results_df['Democrat Proportion Change'] = election_results_df['Democrat Proportion_2020'] - election_results_df['Democrat Proportion_2016']
election_results_df['Republican Proportion Change'] = election_results_df['Republican Proportion_2020'] - election_results_df['Republican Proportion_2016']


election_results_df.sort_values(by='Democrat Proportion Change', ascending = False).head(5)


election_results_df.sort_values(by='Republican Proportion Change', ascending = False).head(5)


election_results_df = election_results_df.sort_values(by='Democrat Proportion Change', ascending = False)

states = election_results_df['State']
dem_change = election_results_df['Democrat Proportion Change']

fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 8))

# Plotting Democrat Proportion Change
sns.barplot(x=states, y=dem_change, ax=axes, color="blue")
axes.set_title('Change in Democrat Voting Proportion (2016-2020)')
axes.set_ylabel('Proportion Change')
axes.set_xlabel('State')
axes.set_xticklabels(states, rotation=90)

plt.tight_layout()

plt.show()


#Calculate the overall voting factor
election_results_overallfactor = election_results_df.merge(us_populations_tidy,how = 'left', left_on='State', right_on = 'state')
election_results_overallfactor = election_results_overallfactor[['State','Democrat Proportion_2016','Republican Proportion_2016','Democrat Proportion_2020','Republican Proportion_2020','Democrat Proportion Change','Republican Proportion Change','Population']]
election_results_overallfactor = election_results_overallfactor[['Democrat Proportion Change','Population']]
total_population = election_results_overallfactor['Population'].sum()
election_results_overallfactor['Contribution to Overall Factor'] = election_results_overallfactor['Population'] / total_population
election_results_overallfactor['Contribution to Overall Factor'] = election_results_overallfactor['Contribution to Overall Factor'] * election_results_overallfactor['Democrat Proportion Change']
election_results_overallfactor = election_results_overallfactor[['Contribution to Overall Factor']].sum()
election_results_overallfactor

Contribution to Overall Factor    0.007143
dtype: float64


#Create columns to store our new adjusted calculations
election_results_df['Democrat Proportion Change Adjusted'] = election_results_df['Democrat Proportion Change'] - election_results_overallfactor[0]
election_results_df['Republican Proportion Change Adjusted'] = election_results_df['Republican Proportion Change'] + election_results_overallfactor[0]

#Clean up the DF by dropping unnecessary columns
election_results_df_clean = election_results_df[['State','Democrat Proportion Change Adjusted','Republican Proportion Change Adjusted']]
election_results_df_clean.head(10)


election_results_df_clean = election_results_df_clean.sort_values(by='Democrat Proportion Change Adjusted', ascending = False)

states = election_results_df['State']
dem_change = election_results_df['Democrat Proportion Change Adjusted']

fig, axes = plt.subplots(nrows=1, ncols=1, figsize=(10, 8))

# Plotting Democrat Proportion Change
sns.barplot(x=states, y=dem_change, ax=axes, color="blue")
axes.set_title('Change in Democrat Voting Proportion, Adjusted (2016-2020)')
axes.set_ylabel('Proportion Change')
axes.set_xlabel('State')
axes.set_xticklabels(states, rotation=90)

plt.tight_layout()

plt.show()


regression_data = mass_shootings_clean_normalized.merge(election_results_df_clean,how = 'left', left_on='state', right_on = 'State')
regression_data = regression_data[['State','# Killed per Capita','# Injured per Capita','# Killed or Injured per Capita','Democrat Proportion Change Adjusted']] #Keep only columns we want, reorder columns to dependent variable is last
regression_data.head(10)


x_features = ['# Killed per Capita', '# Injured per Capita', '# Killed or Injured per Capita']

#From Demo 8
def regress_with_stats(df_penrose, observations):
  fig, ax = plt.subplots(1, 3, figsize=(15,5), sharex=False)

  for i,o in enumerate(observations):
      slope, intercept, r_value, p_value, std_err = stats.linregress(df_penrose[o],
                                                                     df_penrose['Democrat Proportion Change Adjusted'])
      # Pack these into a nice title
      diag_str = "p-value=%.1g\nr-value=%.3f\nstd err.=%.3f\nslope=%.3f\nintercept=%.3f" % (p_value, r_value, std_err, slope, intercept)
      df_penrose.plot.scatter(x=o, y='Democrat Proportion Change Adjusted', title=diag_str, ax=ax[i])
      y_pred = df_penrose[o] * slope + intercept
      # Make points and line
      pts = np.linspace(df_penrose[o].min(), df_penrose[o].max(), 500)
      line = slope * pts + intercept
      ax[i].plot(pts, line, lw=1, color='red')



regress_with_stats(regression_data, x_features)


df_ind = regression_data[['# Killed per Capita','# Injured per Capita']]
df_target = regression_data['Democrat Proportion Change Adjusted']

X = df_ind
y = df_target

model = sm.OLS(y, X).fit()

predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()


df_ind = regression_data[['# Killed or Injured per Capita']]
df_target = regression_data['Democrat Proportion Change Adjusted']

X = df_ind
y = df_target

model = sm.OLS(y, X).fit()

predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()


df_ind = regression_data[['# Killed per Capita']]
df_target = regression_data['Democrat Proportion Change Adjusted']

X = df_ind
y = df_target

model = sm.OLS(y, X).fit()

predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()


from scipy.stats import zscore

#Add 2016 Republican Proportion to regression data dataframe
regression_data_new = regression_data.merge(election_results_2016_tidy, how = 'left', on='State')
regression_data_new = regression_data_new[['# Killed per Capita','# Injured per Capita','Democrat Proportion Change Adjusted','Republican Proportion']]
regression_data_new = regression_data_new.rename(columns = {'Republican Proportion' : '2016 Republican Proportion'})



regression_data_scaled = regression_data_new.apply(zscore) #Scale our regression data using Z-Score standardization
regression_data_scaled.head(10)


df_ind = regression_data_scaled[['# Killed per Capita', '2016 Republican Proportion']]
df_target = regression_data_scaled['Democrat Proportion Change Adjusted']

X = df_ind
y = df_target

model = sm.OLS(y, X).fit()

predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()


regression_data = mass_shootings_clean_normalized_minimum8.merge(election_results_df_clean,how = 'left', left_on='state', right_on = 'State')
regression_data = regression_data[['State','# Killed per Capita','# Injured per Capita','# Killed or Injured per Capita','Democrat Proportion Change Adjusted']] #Keep only columns we want, reorder columns to dependent variable is last
regression_data.head(10)


x_features = ['# Killed per Capita', '# Injured per Capita', '# Killed or Injured per Capita']

#From Demo 8
def regress_with_stats(df_penrose, observations):
  fig, ax = plt.subplots(1, 3, figsize=(15,5), sharex=False)

  for i,o in enumerate(observations):
      slope, intercept, r_value, p_value, std_err = stats.linregress(df_penrose[o],
                                                                     df_penrose['Democrat Proportion Change Adjusted'])
      # Pack these into a nice title
      diag_str = "p-value=%.1g\nr-value=%.3f\nstd err.=%.3f\nslope=%.3f\nintercept=%.3f" % (p_value, r_value, std_err, slope, intercept)
      df_penrose.plot.scatter(x=o, y='Democrat Proportion Change Adjusted', title=diag_str, ax=ax[i])
      y_pred = df_penrose[o] * slope + intercept
      # Make points and line
      pts = np.linspace(df_penrose[o].min(), df_penrose[o].max(), 500)
      line = slope * pts + intercept
      ax[i].plot(pts, line, lw=1, color='red')



regress_with_stats(regression_data, x_features)


df_ind = regression_data[['# Killed per Capita']]
df_target = regression_data['Democrat Proportion Change Adjusted']

X = df_ind
y = df_target

model = sm.OLS(y, X).fit()

predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

/usr/local/lib/python3.10/dist-packages/scipy/stats/_stats_py.py:1806: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=8
  warnings.warn("kurtosistest only valid for n>=20 ... continuing "


from scipy.stats import zscore

#Add 2016 Republican Proportion to regression data dataframe
regression_data_new = regression_data.merge(election_results_2016_tidy, how = 'left', on='State')
regression_data_new = regression_data_new[['# Killed per Capita','# Injured per Capita','Democrat Proportion Change Adjusted','Republican Proportion']]
regression_data_new = regression_data_new.rename(columns = {'Republican Proportion' : '2016 Republican Proportion'})



regression_data_scaled = regression_data_new.apply(zscore) #Scale our regression data using Z-Score standardization
regression_data_scaled.head(10)


df_ind = regression_data_scaled[['# Killed per Capita', '2016 Republican Proportion']]
df_target = regression_data_scaled['Democrat Proportion Change Adjusted']

X = df_ind
y = df_target

model = sm.OLS(y, X).fit()

predictions = model.predict(X) # make the predictions by the model

# Print out the statistics
model.summary()

/usr/local/lib/python3.10/dist-packages/scipy/stats/_stats_py.py:1806: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=8
  warnings.warn("kurtosistest only valid for n>=20 ... continuing "


#Download as HTML...
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


%cd /content/drive/MyDrive/'Colab Notebooks'

/content/drive/MyDrive/Colab Notebooks


%%shell
jupyter nbconvert --to html 'CMPS3160 Voting Project.ipynb'

[NbConvertApp] Converting notebook CMPS3160 Voting Project.ipynb to html
[NbConvertApp] Writing 725785 bytes to CMPS3160 Voting Project.html

	rank	state	state_code	2020_census	percent_of_total
0	1.0	California	CA	39538223	0.1191
1	2.0	Texas	TX	29145505	0.0874
2	3.0	Florida	FL	21538187	0.0647
3	4.0	New York	NY	20201249	0.0586
4	5.0	Pennsylvania	PA	13002700	0.0386
5	6.0	Illinois	IL	12801989	0.0382
6	7.0	Ohio	OH	11799448	0.0352
7	8.0	Georgia	GA	10711908	0.0320
8	9.0	North Carolina	NC	10439388	0.0316
9	10.0	Michigan	MI	10077331	0.0301

	state_code	rank	state	Population	percent_of_total
0	CA	1.0	California	39538223	0.1191
1	TX	2.0	Texas	29145505	0.0874
2	FL	3.0	Florida	21538187	0.0647
3	NY	4.0	New York	20201249	0.0586
4	PA	5.0	Pennsylvania	13002700	0.0386
5	IL	6.0	Illinois	12801989	0.0382
6	OH	7.0	Ohio	11799448	0.0352
7	GA	8.0	Georgia	10711908	0.0320
8	NC	9.0	North Carolina	10439388	0.0316
9	MI	10.0	Michigan	10077331	0.0301

	Incident ID	Incident Date	State	City Or County	Address	# Killed	# Injured	Operations
0	1978561	2021-04-15	District of Columbia	Washington	1800 block of West Virginia Ave NE	0	4	NaN
1	1978635	2021-04-15	Indiana	Indianapolis	8951 Mirabel Rd	8	5	NaN
2	1978652	2021-04-15	Illinois	Chicago	600 block of N Sawyer Ave	0	4	NaN
3	1978616	2021-04-15	Florida	Pensacola	700 Truman Ave	0	6	NaN
4	1976538	2021-04-13	Maryland	Baltimore	2300 block of Hoffman St	0	4	NaN
5	1975296	2021-04-12	Illinois	Chicago	I-290 and S Damen Ave	1	3	NaN
6	1974943	2021-04-11	Kansas	Wichita	200 block of N Battin St	1	3	NaN
7	1975004	2021-04-11	Washington	Seattle	306 23rd Ave S	0	4	NaN
8	1974088	2021-04-10	Tennessee	Memphis	4315 S 3rd St	1	3	NaN
9	1973692	2021-04-10	Missouri	Koshkonong	US-63 and MO-F	1	3	NaN

	# Killed	# Injured	# Killed or Injured
State
Alabama	52	184	236
Alaska	1	8	9
Arizona	19	58	77
Arkansas	17	120	137
California	191	653	844
Colorado	22	106	128
Connecticut	10	59	69
Delaware	8	25	33
District of Columbia	13	131	144
Florida	112	401	513

	state	# Killed per Capita	# Injured per Capita	# Killed or Injured per Capita
0	Alabama	0.000010	0.000037	0.000047
1	Alaska	0.000001	0.000011	0.000012
2	Arizona	0.000003	0.000008	0.000011
3	Arkansas	0.000006	0.000040	0.000045
4	California	0.000005	0.000017	0.000021
5	Colorado	0.000004	0.000018	0.000022
6	Connecticut	0.000003	0.000016	0.000019
7	Delaware	0.000008	0.000025	0.000033
8	District of Columbia	0.000019	0.000190	0.000209
9	Florida	0.000005	0.000019	0.000024

Names: Aidan Hussain¶

Project Title: The Effects of Mass Shootings on Presidential Voting Trends¶

Website: https://aidanhussain.github.io/cmps3160project/¶

DATA & PREPROCESSING¶

Data Set 1: US Population by State¶

Data Set 2: US Mass Shootings¶

Data Set 3: Results of the 2016 Presidential Elections in the United States¶

Data Set 4: 2020 USA Presidential Election Results¶

EXPLORATORY DATA ANALYSIS¶

HYPOTHESIS & FURTHER SET-UP¶

Combining Data Sets¶

MODEL & CONCLUSION¶

The Path Forward: 2 Possible Models¶

Performing Regression Analysis¶

Conclusion¶

Collaboration Plan¶

	state	# Killed per Capita	# Injured per Capita	# Killed or Injured per Capita
0	California	3.287958e-07	5.058396e-08	3.793797e-07
1	Florida	7.892958e-07	7.892958e-07	1.578592e-06
2	Mississippi	2.701535e-06	3.376919e-07	3.039227e-06
3	Nevada	1.900397e-05	1.420466e-04	1.610506e-04
4	Ohio	8.474973e-07	1.440745e-06	2.288243e-06
5	Pennsylvania	8.459781e-07	5.383497e-07	1.384328e-06
6	Texas	2.641917e-06	2.744849e-06	5.386765e-06
7	Virginia	1.506130e-06	4.634246e-07	1.969555e-06

	State	Hillary Clinton	Donald Trump
0	Alabama	34.7	62.7
1	Alaska	37.6	52.8
2	Arizona	45.5	49.0
3	Arkansas	33.7	60.6
4	California	62.3	31.9
5	Colorado	48.2	43.3
6	Connecticut	54.7	41.0
7	Delaware	53.4	41.9
8	District of Columbia	92.8	4.2
9	Florida	47.8	49.0

	State	Democrat Proportion	Republican Proportion
0	Alabama	0.356263	0.643737
1	Alaska	0.415929	0.584071
2	Arizona	0.481481	0.518519
3	Arkansas	0.357370	0.642630
4	California	0.661359	0.338641
5	Colorado	0.526776	0.473224
6	Connecticut	0.571578	0.428422
7	Delaware	0.560336	0.439664
8	District of Columbia	0.956701	0.043299
9	Florida	0.493802	0.506198

	state	DEM	REP	usa_state	usa_state_code	percent_democrat
0	Alabama	843473	1434159	Alabama	AL	37.032892
1	Alaska	45758	80999	Alaska	AK	36.098993
2	Arizona	1643664	1626679	Arizona	AZ	50.259682
3	Arkansas	420985	761251	Arkansas	AR	35.609218
4	California	9315259	4812735	California	CA	65.934760
5	Colorado	1753416	1335253	Colorado	CO	56.769307
6	Connecticut	1059252	699079	Connecticut	CT	60.241900
7	Delaware	295413	199857	Delaware	DE	59.646859
8	District of Columbia	258561	14449	District of Columbia	DC	94.707520
9	Florida	5294767	5667834	Florida	FL	48.298456

	state	Democrat Proportion	Republican Proportion
0	Alabama	0.370329	0.629671
1	Alaska	0.360990	0.639010
2	Arizona	0.502597	0.497403
3	Arkansas	0.356092	0.643908
4	California	0.659348	0.340652
5	Colorado	0.567693	0.432307
6	Connecticut	0.602419	0.397581
7	Delaware	0.596469	0.403531
8	District of Columbia	0.947075	0.052925
9	Florida	0.482985	0.517015

	State	Democrat Proportion Change Adjusted	Republican Proportion Change Adjusted
16	Colorado	0.033774	-0.033774
13	Delaware	0.028990	-0.028990
40	Nebraska	0.028789	-0.028789
17	Maine	0.028580	-0.028580
20	New Hampshire	0.026959	-0.026959
37	Kansas	0.026304	-0.026304
50	Wyoming	0.025055	-0.025055
11	Connecticut	0.023698	-0.023698
38	Montana	0.020595	-0.020595
19	Minnesota	0.020438	-0.020438

Dep. Variable:	Democrat Proportion Change Adjusted	R-squared (uncentered):	0.083
Model:	OLS	Adj. R-squared (uncentered):	0.041
Method:	Least Squares	F-statistic:	1.980
Date:	Sun, 10 Dec 2023	Prob (F-statistic):	0.150
Time:	16:54:44	Log-Likelihood:	115.91
No. Observations:	46	AIC:	-227.8
Df Residuals:	44	BIC:	-224.2
Df Model:	2
Covariance Type:	nonrobust

	coef	std err	t	P>\|t\|	[0.025	0.975]
# Killed per Capita	1653.4746	842.391	1.963	0.056	-44.253	3351.202
# Injured per Capita	-273.8772	145.056	-1.888	0.066	-566.218	18.463

Omnibus:	23.754	Durbin-Watson:	1.900
Prob(Omnibus):	0.000	Jarque-Bera (JB):	38.468
Skew:	-1.588	Prob(JB):	4.43e-09
Kurtosis:	6.159	Cond. No.	12.7

Omnibus:	27.820	Durbin-Watson:	1.878
Prob(Omnibus):	0.000	Jarque-Bera (JB):	53.646
Skew:	-1.754	Prob(JB):	2.24e-12
Kurtosis:	6.960	Cond. No.	1.00

	State	Democrat Proportion_2016	Republican Proportion_2016	Democrat Proportion_2020	Republican Proportion_2020
0	District of Columbia	0.956701	0.043299	0.947075	0.052925
1	Hawaii	0.674620	0.325380	0.650426	0.349574
2	California	0.661359	0.338641	0.659348	0.340652
3	Vermont	0.652081	0.347919	0.671562	0.328438
4	Massachusetts	0.646872	0.353128	0.667834	0.332166
5	Maryland	0.639875	0.360125	0.648533	0.351467
6	New York	0.617861	0.382139	0.564811	0.435189
7	Illinois	0.590095	0.409905	0.579234	0.420766
8	Washington	0.587662	0.412338	0.603309	0.396691
9	Rhode Island	0.582983	0.417017	0.603370	0.396630

	State	Democrat Proportion_2016	Republican Proportion_2016	Democrat Proportion_2020	Republican Proportion_2020	Democrat Proportion Change	Republican Proportion Change
16	Colorado	0.526776	0.473224	0.567693	0.432307	0.040917	-0.040917
13	Delaware	0.560336	0.439664	0.596469	0.403531	0.036133	-0.036133
40	Nebraska	0.364793	0.635207	0.400725	0.599275	0.035932	-0.035932
17	Maine	0.516129	0.483871	0.551852	0.448148	0.035723	-0.035723
20	New Hampshire	0.502110	0.497890	0.536212	0.463788	0.034102	-0.034102

	State	Democrat Proportion_2016	Republican Proportion_2016	Democrat Proportion_2020	Republican Proportion_2020	Democrat Proportion Change	Republican Proportion Change
32	Alaska	0.415929	0.584071	0.360990	0.639010	-0.054939	0.054939
6	New York	0.617861	0.382139	0.564811	0.435189	-0.053050	0.053050
1	Hawaii	0.674620	0.325380	0.650426	0.349574	-0.024195	0.024195
33	Mississippi	0.409184	0.590816	0.395477	0.604523	-0.013706	0.013706
7	Illinois	0.590095	0.409905	0.579234	0.420766	-0.010861	0.010861

	# Killed per Capita	# Injured per Capita	Democrat Proportion Change Adjusted	2016 Republican Proportion
0	0.968178	0.304632	0.048431	0.942122
1	-0.918970	-0.439247	-3.502862	0.460929
2	-0.647380	-0.520192	0.411214	-0.067733
3	-0.019845	0.397921	-0.741233	0.933193
4	-0.190833	-0.277030	-0.778971	-1.518397
5	-0.405121	-0.223702	1.430293	-0.433021
6	-0.622933	-0.281479	0.911743	-0.794337
7	0.491780	-0.024243	1.184073	-0.703673
8	2.753904	4.741135	-1.170850	-3.900255
9	-0.113279	-0.216209	-1.232157	-0.167092

Omnibus:	23.555	Durbin-Watson:	1.988
Prob(Omnibus):	0.000	Jarque-Bera (JB):	37.667
Skew:	-1.584	Prob(JB):	6.62e-09
Kurtosis:	6.100	Cond. No.	1.00

Omnibus:	35.854	Durbin-Watson:	2.022
Prob(Omnibus):	0.000	Jarque-Bera (JB):	99.764
Skew:	-2.073	Prob(JB):	2.17e-22
Kurtosis:	8.905	Cond. No.	1.33

Omnibus:	0.500	Durbin-Watson:	0.621
Prob(Omnibus):	0.779	Jarque-Bera (JB):	0.465
Skew:	0.031	Prob(JB):	0.793
Kurtosis:	1.821	Cond. No.	1.00

	# Killed per Capita	# Injured per Capita	Democrat Proportion Change Adjusted	2016 Republican Proportion
0	-0.552904	-0.396303	-0.406560	-2.279994
1	-0.474667	-0.380480	-1.175525	0.111112
2	-0.149782	-0.390153	-1.427815	1.318644
3	2.619958	2.645367	-0.196133	-0.160686
4	-0.464778	-0.366525	-0.066067	0.627843
5	-0.465037	-0.385855	0.384399	0.074417
6	-0.159911	-0.338590	1.279250	0.691584
7	-0.352878	-0.387460	1.608451	-0.382920

Omnibus:	0.702	Durbin-Watson:	0.374
Prob(Omnibus):	0.704	Jarque-Bera (JB):	0.556
Skew:	0.255	Prob(JB):	0.757
Kurtosis:	1.813	Cond. No.	1.04