The code aims to provide transparency for the merging and treatment of variables for the Multidimensional Measures of Militarization (m3) Dataset (version 1). Please reach out to rodrigo.bolanos.suarez@bicc.de in case of any doubts or feedback. The dataset contains information for 159 countries spanning the years 1990 to 2020.
check.packages <- function(pkg){
new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
if (length(new.pkg))
install.packages(new.pkg, dependencies = TRUE,
repos = "https://cran.rstudio.com")
sapply(pkg, require, character.only = TRUE)
}
#installing the packages
check.packages(c("tidyverse","countrycode","readxl",
"WDI","haven","ggplot2","lubridate",
"car","olsrr","writexl","DBI","RSQLite"))
## tidyverse countrycode readxl WDI haven ggplot2
## TRUE TRUE TRUE TRUE TRUE TRUE
## lubridate car olsrr writexl DBI RSQLite
## TRUE TRUE TRUE TRUE TRUE TRUE
#The first file loads the detailed equipment per country
#conscription ----
#define the path
path_conscription<-"Conscription_for_merge.xlsx"
#read the data
# this dataset has Northen Cyprus
conscription_data <- read_excel(path_conscription,1)
#m3 dataset -----
#define the path
path_m3<-"PRM3_master.xlsx"
#read the data
m3_data <- read_excel(path_m3,1)
#gmi dataset 2021 -----
path_gmi<-"GMI_2021.xlsx"
gmi_data <- read_excel(path_gmi,1)
##milpol dataset ----
path_milpol<-"02MilPol_final_updated_june2023.dta"
milpol_data<-read_dta(path_milpol)
#economic_m3 dataset ---
ecom3_data <- read_dta("Rdataset_3M_filtered_clean.dta")
#economic_m3 dataset NEW ----
path_ecom3new<-"ecom3_data_new.xlsx"
ecom3_data_new <- read_excel(path_ecom3new,1)
#this dataset is the same as ecom3_data after running all the cleaning stated here but manually changing some values
#dissagreggated gmi dataset ----
dsg_gmi_data<-read_csv("disaggregate_gmi_2021.csv")
#conscription ----
conscription_names<-c("country_iso"="ISO",
"year"="Year",
"country.name"="Country",
"com_mil_serv"="ComMilServ",
"com_mil_serv_gen"="ComMilServ_Gen",
"com_mil_serv_sur_min" ="ComMilServ_Dur_min",
"com_mil_serv_dur_max" = "ComMilServ_Dur_max",
"com_mil_serv_dur_min"="ComMilServ_Dur_min",
"alt_civ_serv"="AltCivServ",
"mobilisation"="Mobilisation")
#com_mil_serv_sur_min should not exist, however will drop it at the end (not to mess w/rest of the code)
conscription_data<-conscription_data%>%
select(all_of(conscription_names))
#m3 dataset -----
m3_names<-c("country.name"="country_text",
"country_iso"="country_id",
"country_cowcode"="cowcode",
"year","mil_origin","mil_leader",
"mil_mod","mil_veto","mil_repress",
"mil_repress_count","mil_impun")
m3_data<-m3_data%>%
select(all_of(m3_names))
#gmi dataset -----
gmi_names<-c("country_iso","country.name","region",
"gmi_2000","gmi_2001","gmi_2002",
"gmi_2003","gmi_2004","gmi_2005",
"gmi_2006","gmi_2007","gmi_2008",
"gmi_2009","gmi_2010","gmi_2011",
"gmi_2012","gmi_2013","gmi_2014",
"gmi_2015","gmi_2016","gmi_2017",
"gmi_2018","gmi_2019","gmi_2020")
gmi_data<-gmi_data%>%
select(all_of(gmi_names))
##milpol dataset ----
milpol_names <- c("year" = "year",
"country.name" = "countryname_english",
"milpol_cripre" ="MilPol_CriPre" ,
"max_conf_milpol_cripre"="Max_Conf_MilPol_CriPre",
"p_a_milpol_cripre"="p_a_MilPol_CriPre",
"milpol_lawenf" = "MilPol_LawEnf" ,
"max_conf_milpol_lawenf" ="Max_Conf_MilPol_LawEnf" ,
"p_a_milpol_lawenf" ="p_a_MilPol_LawEnf" ,
"milpol_peapre" ="MilPol_PeaPre" ,
"max_conf_milpol_peapre" = "Max_Conf_MilPol_PeaPre",
"p_a_milpol_peapre" = "p_a_MilPol_PeaPre",
"milpol_dummy" ="MilPol_Dummy" ,
"milpol_sum" = "MilPol_Sum"
)
milpol_data<-milpol_data%>%
select(all_of(milpol_names))
#economic_m3 dataset ---
ecom3_names<- c("state_abb" = "StateAbb",
"ccode", "country.name" = "StateName",
"year", "pop_total" = "PopTotal",
"overmillion_bool"="OverMillion",
"military_bool" = "Military",
"dup", "dup2", "mil_eco_dummy",
"mil_eco_inf", "mil_eco_own", "mil_eco_share",
"mil_eco_dom", "mil_eco_small", "mil_eco_medium",
"mil_eco_large", "mil_eco_vlarge")
ecom3_data<-ecom3_data %>%
select(all_of(ecom3_names))
#disaggregated gmi ----
dsg_gmi_names <- c("bicc_code", "country.name" = "country_en",
"region", "year", "milex_gdp", "milex_health",
"milpara_pop", "milpara_phy", "reserve_pop",
"weapons_pop", "milex_gdp_norm", "milex_health_norm",
"milpara_pop_norm","milpara_phy_norm",
"reserve_pop_norm","weapons_pop_norm")
dsg_gmi_data<-dsg_gmi_data %>%
select(all_of(dsg_gmi_names)) %>%
select(-c("bicc_code","region")) #"weapons_pop" include again
#conscription ----
#define a vector of codes that do not appear in the countrycode package
#matched unambiguously: Czechoslovakia, Kosovo, Republic of Serbia and Montenero, Yemen, North, Yemen, South, Yugoslavia
cust_name_iso <- c("Kosovo" = "XKX","Czechoslovakia" ="CSK",
"Yugoslavia" = "YUG", "Yemen, North" = "YAR",
"Yemen, South" = "YMD", "Republic of Serbia and Montenegro" = "SCG",
"Soviet Union"="SUN", "Northern Cyprus" = "CYN")
#conscription -----
conscription_data<- conscription_data %>%
mutate(country_iso3c=countrycode(country.name,origin='country.name',destination='iso3c', custom_match = cust_name_iso))%>%
select(country_iso3c, everything())
#m3 -----
##matched unambiguously: Czechoslovakia, Kosovo
m3_data<-m3_data %>%
mutate(country.name=ifelse((year>1992 & year<=2006 & country.name=="Yugoslavia/Serbia"),"Republic of Serbia and Montenegro",country.name)) %>%
mutate(country.name = ifelse(year<=1992 & country.name=="Yugoslavia/Serbia","Yugoslavia",country.name)) %>%
mutate(country.name=ifelse(year>=2007 & country.name=="Yugoslavia/Serbia","Republic of Serbia",country.name))
m3_data<- m3_data %>%
mutate(country_iso3c=countrycode(country.name,origin='country.name',destination='iso3c', custom_match = cust_name_iso))%>%
select(country_iso3c, everything())
#gmi -----
gmi_data<-gmi_data%>%
mutate(country_iso3c=countrycode(country.name,origin='country.name',destination='iso3c', custom_match = cust_name_iso))%>%
select(country_iso3c, everything())
#milpol ----
milpol_data<-milpol_data%>%
mutate(country_iso3c=countrycode(country.name,origin='country.name',destination='iso3c', custom_match = cust_name_iso))%>%
select(country_iso3c, everything())
#economic_m3 ----
ecom3_data<-ecom3_data%>%
mutate(country.name=ifelse((year>1992 & year<=2006 & country.name=="Yugoslavia/Serbia"),"Republic of Serbia and Montenegro",country.name)) %>%
mutate(country.name = ifelse(year<=1992 & country.name=="Yugoslavia/Serbia","Yugoslavia",country.name)) %>%
mutate(country.name=ifelse(year>=2007 & country.name=="Yugoslavia/Serbia","Republic of Serbia",country.name))
ecom3_data<-ecom3_data%>%
mutate(country_iso3c=countrycode(country.name,origin='country.name',destination='iso3c', custom_match = cust_name_iso))%>%
select(country_iso3c, everything())
#disaggregated gmi ----
dsg_gmi_data<-dsg_gmi_data%>%
mutate(country_iso3c=countrycode(country.name,origin='country.name',destination='iso3c', custom_match = cust_name_iso))%>%
select(country_iso3c, everything())
#conscription ----
#conscription_data<-conscription_data%>%
# mutate(across(where(is.numeric), ~na_if(., -999)))
conscription_data<-conscription_data%>%
mutate(no_military_service=ifelse(com_mil_serv_gen==-888,1,0)) %>%
mutate(no_civil_service=ifelse(alt_civ_serv==-888,1,0))
#m3 ----
#m3_data<-m3_data%>%
# mutate(across(where(is.numeric), ~na_if(., -999)))
# cleaning new ecom3data set
ecom3_data_new<-ecom3_data_new %>%
select(-c("countryname_standard"))
# Convert "NA" (as text) to actual NA in character columns
ecom3_data_new <- ecom3_data_new %>%
mutate(across(where(is.character), ~na_if(., "NA")))
#This will later help verify the merging is correct
#gmi ----
gmi_data<-gmi_data%>%
rename(.,countryname_gmi=country.name) %>%
rename(.,country_iso_gmi=country_iso)
#conscription ----
conscription_data<-conscription_data%>%
rename(.,countryname_conscription=country.name)%>%
rename(.,country_iso_conscription=country_iso)
#m3 ----
m3_data<-m3_data%>%
rename(.,countryname_m3=country.name)%>%
rename(.,country_iso_m3=country_iso)
milpol_data<-milpol_data%>%
rename(.,countryname_milpol=country.name)
#economic m3 ----
ecom3_data<-ecom3_data %>%
rename(.,countryname_ecom3 = country.name)
#dissagregated gmi----
dsg_gmi_data<-dsg_gmi_data %>%
rename(.,countryname_dsg_gmi = country.name)
#For conscription data ----
#Serbia & Montenegro (1993-2006) ; Serbia (2007-2021)
conscription_selection<-conscription_data%>%
filter(str_detect(countryname_conscription,"Serbia"))
conscription_selection <- conscription_selection %>%
filter((countryname_conscription == "Republic of Serbia" & year >= 2007) | (countryname_conscription == "Republic of Serbia and Montenegro" & year <= 2006))%>%
mutate(countryname_conscription="Republic of Serbia") %>%
mutate(country_iso3c="SRB")
conscription_data <- conscription_data %>%
filter(!str_detect(countryname_conscription, "Serbia"))
conscription_data <- rbind(conscription_data, conscription_selection)
#Yemen before 1995
columns_to_set_to_na <- c("com_mil_serv",
"com_mil_serv_gen",
"com_mil_serv_sur_min",
"com_mil_serv_dur_max",
"com_mil_serv_dur_min",
"alt_civ_serv",
"mobilisation",
"no_military_service",
"no_civil_service")
conscription_data <- conscription_data %>%
mutate(across(all_of(columns_to_set_to_na), ~ifelse(year < 1995 & countryname_conscription == "Yemen", NA, .)))
#For M3 data ----
m3_selection<-m3_data%>%
filter(str_detect(countryname_m3,"Serbia"))
m3_selection<-m3_selection%>%
filter((countryname_m3 == "Republic of Serbia" & year >= 2007) | (countryname_m3 == "Republic of Serbia and Montenegro" & year <= 2006))%>%
mutate(countryname_m3="Republic of Serbia") %>%
mutate(country_iso3c="SRB")
m3_data <- m3_data %>%
filter(!str_detect(countryname_m3, "Serbia"))
m3_data <- rbind(m3_data, m3_selection)
#Yemens
m3_variables <- c("country_iso_m3","country_cowcode","mil_origin", "mil_leader",
"mil_mod", "mil_veto", "mil_repress",
"mil_repress_count", "mil_impun")
m3_yemens_df<- expand.grid(
countryname_m3 = c("Yemen, North", "Yemen, South"),
year = 1990:2021
)
m3_yemens_df<-cbind(m3_yemens_df, setNames( lapply(m3_variables, function(x) x=NA), m3_variables))
m3_yemens_df$country_iso3c <- ifelse(m3_yemens_df$countryname_m3 == "Yemen, North", "YAR",
ifelse(m3_yemens_df$countryname_m3 == "Yemen, South", "YMD", NA))
m3_data <- rbind(m3_data, m3_yemens_df)
#For GMI ----
#Nothing needed
#ForMilpol----
#Solving Yugoslavia/Serbia issue for 1991 and 1992
milpol_selection<-milpol_data%>%
filter(str_detect(countryname_milpol,"Serbia"))
milpol_selection<-milpol_selection%>%
mutate(countryname_milpol=ifelse(countryname_milpol == "Republic of Serbia" & year <= 1992, "Yugoslavia" ,countryname_milpol)) %>%
mutate(country_iso3c=ifelse(countryname_milpol == "Yugoslavia", "YUG",country_iso3c))
milpol_data <- milpol_data %>%
filter(!str_detect(countryname_milpol, "Serbia"))
milpol_data <- rbind(milpol_data, milpol_selection)
#Solving Yemen before 1991
#"Yemen, North" = "YAR", "Yemen, South" = "YMD"
columns_to_update <- c("milpol_cripre", "max_conf_milpol_cripre", "p_a_milpol_cripre",
"milpol_lawenf", "max_conf_milpol_lawenf", "p_a_milpol_lawenf",
"milpol_peapre",
"max_conf_milpol_peapre",
"p_a_milpol_peapre", "milpol_dummy", "milpol_sum")
milpol_data <- milpol_data %>%
mutate(across(all_of(columns_to_update), ~ifelse(countryname_milpol=="Yemen" & year < 1991, NA, .)))
milpol_yemens_df<- expand.grid(
countryname_milpol = c("Yemen, North", "Yemen, South"),
year = 1990:2021
)
milpol_yemens_df<-cbind(milpol_yemens_df, setNames( lapply(columns_to_update, function(x) x=NA), columns_to_update))
milpol_yemens_df$country_iso3c <- ifelse(milpol_yemens_df$countryname_milpol == "Yemen, North", "YAR",
ifelse(milpol_yemens_df$countryname_milpol == "Yemen, South", "YMD", NA))
milpol_data <- rbind(milpol_data, milpol_yemens_df)
#For economics M3 data ----
ecom3_selection<-ecom3_data%>%
filter(str_detect(countryname_ecom3,"Serbia"))
ecom3_selection<-ecom3_selection%>%
filter((countryname_ecom3 == "Republic of Serbia" & year >= 2007) | (countryname_ecom3 == "Republic of Serbia and Montenegro" & year <= 2006))%>%
mutate(countryname_ecom3="Republic of Serbia") %>%
mutate(country_iso3c="SRB")
ecom3_data <- ecom3_data %>%
filter(!str_detect(countryname_ecom3, "Serbia"))
ecom3_data <- rbind(ecom3_data, ecom3_selection)
#For dissagregated GMI ----
#Nothing needed
#economic m3 -----
# Variables to standarize
vars_to_standardize <- c("mil_eco_inf", "mil_eco_own", "mil_eco_share",
"mil_eco_dom", "mil_eco_small", "mil_eco_medium",
"mil_eco_large", "mil_eco_vlarge"
)
# Mutate() to create new variables with "_std" suffix and apply norm() function -defined before-
ecom3_data <- ecom3_data %>%
mutate(across(all_of(vars_to_standardize), ~norm(.), .names = "{col}_std"))
# STD for ecom3_data_new
# Convert specified variables to numeric
ecom3_data_new <- ecom3_data_new %>%
mutate(across(all_of(vars_to_standardize), as.numeric))
ecom3_data_new <- ecom3_data_new %>%
mutate(across(all_of(vars_to_standardize), ~norm(.), .names = "{col}_std"))
#Setting simple reference table for countries and years
#Select the countries ----
countryyears_df<- m3_data%>%
select(countryname_standard=countryname_m3,country_iso3c) %>%
distinct(country_iso3c, .keep_all = TRUE)
#Add country_cown and country_cowc ----
custom_cowc=c()
custom_cown=c()
countryyears_df<- countryyears_df %>%
mutate(country_cowc=countrycode(country_iso3c,origin='iso3c',destination='cowc', custom_match = custom_cowc)) %>%
mutate(country_cown =countrycode(country_iso3c,origin='iso3c',destination='cown', custom_match = custom_cown))
# Make cown an integer variable
countryyears_df$country_cown <- as.integer(countryyears_df$country_cown)
#Set the reference years ----
years <- 1990:2021
#create the dataframe ----
countryyears_df <- countryyears_df %>%
crossing(year = years)
#Add region ----
countryyears_df<- countryyears_df %>%
mutate(region_wb=countrycode(country_iso3c,origin='iso3c',destination='region')) %>%
mutate(region_wb=replace_na(region_wb,"Europe & Central Asia"))
#Add continent ----
countryyears_df<- countryyears_df %>%
mutate(continent=countrycode(country_iso3c,origin='iso3c',destination='continent')) %>%
mutate(continent=ifelse(country_iso3c=="SUN","Europe/Asia",continent)) %>%
mutate(continent=replace_na(continent,"Europe"))
# Merge by left join
merged_df<-countryyears_df %>%
left_join(conscription_data,by=c("country_iso3c","year")) %>%
left_join(m3_data, by=c("country_iso3c","year")) %>%
left_join(gmi_data,by=c("country_iso3c","year")) %>%
left_join(milpol_data,by=c("country_iso3c","year")) %>%
left_join(ecom3_data_new,by=c("country_iso3c","year")) %>% #using new corrected dataset
left_join(dsg_gmi_data, by=c("country_iso3c","year"))
# Make year an integer variable
merged_df$year <- as.integer(merged_df$year)
# un.name.en: United Nations English country name
# Names without match: ABW: Aruba, CSK: Czechoslovakia, CUW: Curacao, GGY: Guernsey, GRL: Greenland, HKG: Hong Kong, IMN: Isle of Man, JEY: Jersey, MAC:Macao, SCG: Republic of Serbia and Montenegro, SXM: Sint Maarten, TWN: Republic of China (Taiwan), VAT: Vatican, XKX: Republic of Kosovo, YAR: North Yemen, YMD: South Yemen, YUG: Yugoslavia
un_countrynames=c("YUG"="Socialist Federal Republic of Yugoslavia",
"SUN"=" Union of Soviet Socialist Republics",
"YMD" = "Democratic Yemen",
"YAR" ="Yemen Arab Republic",
"TUR" = "Türkiye",
"CSK"="Czechoslovakia",
"CZE" = "Czechia")
merged_df<-merged_df%>%
mutate(countryname_un=countrycode(country_iso3c,origin='iso3c',destination='un.name.en',custom_match = un_countrynames))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `countryname_un = countrycode(...)`.
## Caused by warning:
## ! Some values were not matched unambiguously: TWN, XKX
# Die Daten der Sowjetunion werden für das Jahr 1992 auf Russland übertragen.
merged_df$com_mil_serv <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "1", merged_df$com_mil_serv)
merged_df$com_mil_serv_gen <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "NA", merged_df$com_mil_serv_gen)
merged_df$com_mil_serv_sur_min <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "24", merged_df$com_mil_serv_sur_min)
merged_df$com_mil_serv_dur_max <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "24", merged_df$com_mil_serv_dur_max)
merged_df$com_mil_serv_dur_min <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "24", merged_df$com_mil_serv_dur_min)
merged_df$alt_civ_serv <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "NA", merged_df$alt_civ_serv)
merged_df$mobilisation <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "0", merged_df$mobilisation)
merged_df$no_military_service <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "NA", merged_df$no_military_service)
merged_df$no_civil_service <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "NA", merged_df$no_civil_service)
# Drop observations based on conditions
merged_df <- merged_df[!(merged_df$countryname_standard == "Yemen, North" & merged_df$year > 1990), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Yemen, South" & merged_df$year > 1990), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Yugoslavia" & merged_df$year > 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Czechoslovakia" & merged_df$year > 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Soviet Union" & merged_df$year > 1991), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Yemen" & merged_df$year < 1991), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Republic of Serbia" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Bosnia and Herzegovina" & merged_df$year < 2006), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Croatia" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Slovenia" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Macedonia" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Czech Republic" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Slovakia" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Lithuania" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Georgia" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Estonia" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Latvia" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Ukraine" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Moldova" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Kyrgyzstan" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Uzbekistan" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Tajikistan" & merged_df$year < 1994), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Armenia" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Azerbaijan" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Turkmenistan" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Belarus" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Russia" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Kazakhstan" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Eritrea" & merged_df$year < 1994), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Kuwait" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Namibia" & merged_df$year < 1991), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Somalia" & merged_df$year < 2009), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Cyprus" & merged_df$year < 2003), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Ethiopia" & merged_df$year %in% 1991:1996), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Gabon" & merged_df$year < 1991), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Equatorial Guinea" & merged_df$year < 2009), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Haiti" & merged_df$year %in% 1995:2017), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Qatar" & merged_df$year < 2006), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Timor-Leste" & merged_df$year < 2007), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Bahrain" & merged_df$year < 2007), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Angola" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Eswatini" & merged_df$year < 1998), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Guinea-Bissau" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "South Sudan" & merged_df$year < 2012), ]
# Create a named vector for renaming
change_variables <- c("milpol_crime"="milpol_cripre",
"p_a_milpol_crime" = "p_a_milpol_cripre",
"milpol_law"="milpol_lawenf",
"p_a_milpol_law"="p_a_milpol_lawenf",
"milpol_peace"="milpol_peapre",
"p_a_milpol_peace"="p_a_milpol_peapre",
"milex_healthexp"="milex_health",
"pers_to_pop"="milpara_pop",
"pers_to_phy"="milpara_phy",
"hwi_norm"="weapons_pop_norm",
"hwi"="weapons_pop")
# Rename
merged_df <- merged_df %>%
rename(., all_of(change_variables))
# Drop variables
drop_variables <- c("com_mil_serv_sur_min","no_military_service","no_civil_service","gmi",
"overmillion_bool","military_bool") #"dup","dup2
merged_df <- merged_df %>%
select(-one_of(drop_variables))
# Variables to drop in version 1 but include in version 2
drop_variables_v1 <- c("milpol_sum","mobilisation","mil_eco_inf")
merged_df <- merged_df %>%
select(-one_of(drop_variables_v1))
convert_to_numeric <- c("com_mil_serv",
"com_mil_serv_gen",
"com_mil_serv_dur_max",
"com_mil_serv_dur_min",
"alt_civ_serv",
#"mobilisation", #dropped for version 1
"pop_total")
merged_df <- merged_df %>%
mutate(across(all_of(convert_to_numeric), ~ifelse(. == "NA", NA, .))) %>%
mutate(across(all_of(convert_to_numeric), as.numeric))
merged_df <- merged_df %>%
mutate_if(is.numeric, ~ifelse(. == -999, NA, .))
#Select columns
merged_df<-merged_df%>%
select(-c("countryname_m3","countryname_gmi","country_iso_conscription",
"country_iso_m3","country_iso_gmi","region","countryname_milpol",
"countryname_conscription","country_cowcode","countryname_dsg_gmi"))
#this are of the old ecom3 dataset: "countryname_ecom3","state_abb", #"ccode"
#Arrange columns
merged_df<-merged_df%>%
select(year,country_iso3c,everything())
#Filter rows without un-name
merged_df<-merged_df%>%
filter(!is.na(countryname_un))
#Filter the years
merged_df<-merged_df%>%
filter(year!="2021")
#create a file to verify manually
name_of_file=str_c("merged_military_dataset",sep = "_",Sys.Date(),lubridate::hour(Sys.time()),lubridate::minute(Sys.time()),".csv")
write_csv(merged_df,name_of_file)
db_file=str_c("merged_military_dataset",sep = "_",Sys.Date(),lubridate::hour(Sys.time()),lubridate::minute(Sys.time()),".sqlite")
# Create an SQLite connection
con <- dbConnect(RSQLite::SQLite(), dbname = db_file)
# Specify column types
#column_types <- c(year = "INTEGER", everything())
# Write the data frame to an SQLite table
dbWriteTable(con, "m3_dataset", merged_df, overwrite = TRUE) #, col_types = column_types)
# Close the connection
dbDisconnect(con)
#evaluation turned false, turn on when needed
collinearity_tests <- function(response_vars, data) {
vif_tol_results <- list()
eigen_cindex_results <- list()
for (response_var in response_vars) {
# Fit the linear regression model
formula <- paste(response_var, "~ .")
lm_model <- lm(formula, data = data)
# Calculate VIF and tolerance
vif_tol_results[[response_var]] <- olsrr::ols_vif_tol(lm_model)
# Calculate eigenvalues and condition index
eigen_cindex_results[[response_var]] <- olsrr::ols_eigen_cindex(lm_model)
}
return(list(vif_tol_results, eigen_cindex_results))
}
#evaluation turned false, turn on when needed
#change names and get rid of -888 and -999
# Non standarized vars
variables_list_a <- c("milex_gdp","milex_health","milpara_pop","milpara_phy","reserve_pop",
"weapons_pop_norm", #this is the HWI
"mil_leader", "mil_origin", "mil_mod", "mil_impun", "mil_veto",
"mil_repress", "mil_police", "com_mil_serv", "mil_eco_dummy", "mil_eco_inf")
collinearity_df_a<-merged_df[variables_list_a]
collinearity_df_a$com_mil_serv <- as.numeric(collinearity_df_a$com_mil_serv)
# Standarized vars
variables_list_b <- c("milex_gdp_norm","milex_health_norm","milpara_pop_norm","milpara_phy_norm",
"reserve_pop_norm",
"weapons_pop_norm", #this is the HWI
"mil_leader", "mil_origin", "mil_mod", "mil_impun", "mil_veto",
"mil_repress", "mil_police", "com_mil_serv", "mil_eco_dummy", "mil_eco_inf_std")
collinearity_df_b<-merged_df[variables_list_b]
collinearity_df_b$com_mil_serv <- as.numeric(collinearity_df_a$com_mil_serv)
#evaluation turned false, turn on when needed
file_name=str_c("collinearity_results_non",sep = "_",Sys.Date(),lubridate::hour(Sys.time()),lubridate::minute(Sys.time()),".xlsx")
# Create an Excel workbook
wb <- openxlsx::createWorkbook()
# Loop through the first list in results
for (i in 1:length(results[[1]])) {
# Get the name of the variable for the sheet
var_name <- names(results[[1]])[i]
# Create a new sheet for each entry in the first list
sheet_name <- paste("viftol", var_name, sep = "_")
openxlsx::addWorksheet(wb, sheetName =sheet_name) #sheetName = paste("Sheet_1_", i, sep = "")
# Convert the list of results to a data frame and transpose it
results_df <- as.data.frame(results[[1]][[i]])
# Write the results of the variable to the sheet
openxlsx::writeData(wb, sheet = i, x = results_df, startCol = 1, startRow = 1)
}
# Loop through the second list in results
for (i in 1:length(results[[2]])) {
# Get the name of the variable for the sheet
var_name <- names(results[[2]])[i]
# Create a new sheet with the variable name and index
sheet_name <- paste("eigenindex", var_name, sep = "_")
openxlsx::addWorksheet(wb, sheetName =sheet_name)
# Convert the list of results to a data frame and transpose it
results_df <- as.data.frame(results[[2]][[i]])
# Write the results of the variable to the sheet
openxlsx::writeData(wb, sheet = i + length(results[[1]]), x = results_df, startCol = 1, startRow = 1)
}
# Save the Excel workbook to a file
openxlsx::saveWorkbook(wb, file_name)
#evaluation turned false, turn on when needed
file_name=str_c("collinearity_results_std",sep = "_",Sys.Date(),lubridate::hour(Sys.time()),lubridate::minute(Sys.time()),".xlsx")
# Create an Excel workbook
wb <- openxlsx::createWorkbook()
# Loop through the first list in results
for (i in 1:length(results_b[[1]])) {
# Get the name of the variable for the sheet
var_name <- names(results_b[[1]])[i]
# Create a new sheet for each entry in the first list
sheet_name <- paste("viftol", var_name, sep = "_")
openxlsx::addWorksheet(wb, sheetName =sheet_name) #sheetName = paste("Sheet_1_", i, sep = "")
# Convert the list of results to a data frame and transpose it
results_df <- as.data.frame(results_b[[1]][[i]])
# Write the results of the variable to the sheet
openxlsx::writeData(wb, sheet = i, x = results_df, startCol = 1, startRow = 1)
}
# Loop through the second list in results
for (i in 1:length(results_b[[2]])) {
# Get the name of the variable for the sheet
var_name <- names(results_b[[2]])[i]
# Create a new sheet with the variable name and index
sheet_name <- paste("eigenindex", var_name, sep = "_")
openxlsx::addWorksheet(wb, sheetName =sheet_name)
# Convert the list of results to a data frame and transpose it
results_df <- as.data.frame(results_b[[2]][[i]])
# Write the results of the variable to the sheet
openxlsx::writeData(wb, sheet = i + length(results_b[[1]]), x = results_df, startCol = 1, startRow = 1)
}
# Save the Excel workbook to a file
openxlsx::saveWorkbook(wb, file_name)