ABOUT

The code aims to provide transparency for the merging and treatment of variables for the Multidimensional Measures of Militarization (m3) Dataset (version 1). Please reach out to rodrigo.bolanos.suarez@bicc.de in case of any doubts or feedback. The dataset contains information for 159 countries spanning the years 1990 to 2020.

SETUP

#.rs.restartR() to restart session in R

rm(list = ls())
cat("\014") #for clearing console

check.packages <- function(pkg){
  new.pkg <- pkg[!(pkg %in% installed.packages()[, "Package"])]
  if (length(new.pkg))
    install.packages(new.pkg, dependencies = TRUE,
                     repos = "https://cran.rstudio.com")
  sapply(pkg, require, character.only = TRUE)
}
#installing the packages 
check.packages(c("tidyverse","countrycode","readxl",
                 "WDI","haven","ggplot2","lubridate",
                 "car","olsrr","writexl","DBI","RSQLite"))

##   tidyverse countrycode      readxl         WDI       haven     ggplot2 
##        TRUE        TRUE        TRUE        TRUE        TRUE        TRUE 
##   lubridate         car       olsrr     writexl         DBI     RSQLite 
##        TRUE        TRUE        TRUE        TRUE        TRUE        TRUE

## Normalization function
norm <- function(v) {
    max <- max(v, na.rm = TRUE)
    min <- min(v, na.rm = TRUE)
    return((v - min) / (max - min))
}

PREPARING DATA

Loading the files

#The first file loads the detailed equipment per country

#conscription ----

#define the path
path_conscription<-"Conscription_for_merge.xlsx"

#read the data
# this dataset has Northen Cyprus
conscription_data <- read_excel(path_conscription,1)  

#m3 dataset -----

#define the path
path_m3<-"PRM3_master.xlsx"

#read the data
m3_data <- read_excel(path_m3,1)

#gmi dataset 2021 -----

path_gmi<-"GMI_2021.xlsx"

gmi_data <- read_excel(path_gmi,1)

##milpol dataset ----

path_milpol<-"02MilPol_final_updated_june2023.dta"

milpol_data<-read_dta(path_milpol)

#economic_m3 dataset ---

ecom3_data <- read_dta("Rdataset_3M_filtered_clean.dta")

#economic_m3 dataset NEW ----

path_ecom3new<-"ecom3_data_new.xlsx"
ecom3_data_new <- read_excel(path_ecom3new,1)
#this dataset is the same as ecom3_data after running all the cleaning stated here but manually changing some values

#dissagreggated gmi dataset ----

dsg_gmi_data<-read_csv("disaggregate_gmi_2021.csv")

Standarize column names

#conscription ----

conscription_names<-c("country_iso"="ISO",
                      "year"="Year",
                      "country.name"="Country",
                      "com_mil_serv"="ComMilServ",
                      "com_mil_serv_gen"="ComMilServ_Gen",
                      "com_mil_serv_sur_min" ="ComMilServ_Dur_min",
                      "com_mil_serv_dur_max" = "ComMilServ_Dur_max",
                      "com_mil_serv_dur_min"="ComMilServ_Dur_min",
                      "alt_civ_serv"="AltCivServ",
                      "mobilisation"="Mobilisation")   

#com_mil_serv_sur_min should not exist, however will drop it at the end (not to mess w/rest of the code)

conscription_data<-conscription_data%>%
  select(all_of(conscription_names))

#m3 dataset -----

m3_names<-c("country.name"="country_text",
            "country_iso"="country_id",
            "country_cowcode"="cowcode",
            "year","mil_origin","mil_leader",
            "mil_mod","mil_veto","mil_repress",
            "mil_repress_count","mil_impun")

m3_data<-m3_data%>%
  select(all_of(m3_names))

#gmi dataset  -----

gmi_names<-c("country_iso","country.name","region",
             "gmi_2000","gmi_2001","gmi_2002",
             "gmi_2003","gmi_2004","gmi_2005",
             "gmi_2006","gmi_2007","gmi_2008",
             "gmi_2009","gmi_2010","gmi_2011",
             "gmi_2012","gmi_2013","gmi_2014",
             "gmi_2015","gmi_2016","gmi_2017",
             "gmi_2018","gmi_2019","gmi_2020")

gmi_data<-gmi_data%>%
  select(all_of(gmi_names))

##milpol dataset ----

milpol_names <- c("year" = "year",
                  "country.name" = "countryname_english",
                   "milpol_cripre" ="MilPol_CriPre" ,
                   "max_conf_milpol_cripre"="Max_Conf_MilPol_CriPre",
                   "p_a_milpol_cripre"="p_a_MilPol_CriPre",
                   "milpol_lawenf" = "MilPol_LawEnf" ,
                    "max_conf_milpol_lawenf" ="Max_Conf_MilPol_LawEnf" ,
                   "p_a_milpol_lawenf" ="p_a_MilPol_LawEnf" ,
                    "milpol_peapre" ="MilPol_PeaPre" ,
                    "max_conf_milpol_peapre" = "Max_Conf_MilPol_PeaPre",
                    "p_a_milpol_peapre" = "p_a_MilPol_PeaPre",
                    "milpol_dummy" ="MilPol_Dummy" ,
                   "milpol_sum" = "MilPol_Sum"
                  )


milpol_data<-milpol_data%>%
  select(all_of(milpol_names))

#economic_m3 dataset ---

ecom3_names<- c("state_abb" = "StateAbb",
                "ccode", "country.name" = "StateName", 
                "year", "pop_total" = "PopTotal",  
                "overmillion_bool"="OverMillion",
                "military_bool" = "Military",
                "dup", "dup2", "mil_eco_dummy", 
                "mil_eco_inf", "mil_eco_own", "mil_eco_share",
                "mil_eco_dom", "mil_eco_small", "mil_eco_medium",
                "mil_eco_large", "mil_eco_vlarge")

ecom3_data<-ecom3_data %>%
  select(all_of(ecom3_names))

#disaggregated gmi ----

dsg_gmi_names <- c("bicc_code", "country.name" = "country_en",
                   "region", "year", "milex_gdp", "milex_health",
                   "milpara_pop", "milpara_phy", "reserve_pop",
                   "weapons_pop", "milex_gdp_norm", "milex_health_norm",
                   "milpara_pop_norm","milpara_phy_norm",
                   "reserve_pop_norm","weapons_pop_norm")

dsg_gmi_data<-dsg_gmi_data %>%
  select(all_of(dsg_gmi_names)) %>%
  select(-c("bicc_code","region")) #"weapons_pop" include again

Setting the types

conscription_data<-type_convert(conscription_data)
m3_data<-type_convert(m3_data)

Setting country codes

#conscription ----

#define a vector of codes that do not appear in the countrycode package
#matched unambiguously: Czechoslovakia, Kosovo, Republic of Serbia and Montenero, Yemen, North, Yemen, South, Yugoslavia

cust_name_iso <- c("Kosovo" = "XKX","Czechoslovakia" ="CSK",
                   "Yugoslavia" = "YUG", "Yemen, North" = "YAR",
                   "Yemen, South" = "YMD", "Republic of Serbia and Montenegro" = "SCG",
                   "Soviet Union"="SUN", "Northern Cyprus" = "CYN") 

#conscription -----
conscription_data<- conscription_data %>% 
  mutate(country_iso3c=countrycode(country.name,origin='country.name',destination='iso3c', custom_match = cust_name_iso))%>%
  select(country_iso3c, everything())


#m3 -----
##matched unambiguously:  Czechoslovakia, Kosovo

m3_data<-m3_data %>%
  mutate(country.name=ifelse((year>1992 & year<=2006 & country.name=="Yugoslavia/Serbia"),"Republic of Serbia and Montenegro",country.name)) %>%
  mutate(country.name = ifelse(year<=1992 & country.name=="Yugoslavia/Serbia","Yugoslavia",country.name)) %>%
  mutate(country.name=ifelse(year>=2007 & country.name=="Yugoslavia/Serbia","Republic of Serbia",country.name))


m3_data<- m3_data %>% 
  mutate(country_iso3c=countrycode(country.name,origin='country.name',destination='iso3c', custom_match = cust_name_iso))%>%
  select(country_iso3c, everything())

#gmi -----

gmi_data<-gmi_data%>%
  mutate(country_iso3c=countrycode(country.name,origin='country.name',destination='iso3c', custom_match = cust_name_iso))%>%
  select(country_iso3c, everything())

#milpol ----

milpol_data<-milpol_data%>%
  mutate(country_iso3c=countrycode(country.name,origin='country.name',destination='iso3c', custom_match = cust_name_iso))%>%
  select(country_iso3c, everything())

#economic_m3 ----

ecom3_data<-ecom3_data%>%
  mutate(country.name=ifelse((year>1992 & year<=2006 & country.name=="Yugoslavia/Serbia"),"Republic of Serbia and Montenegro",country.name)) %>%
  mutate(country.name = ifelse(year<=1992 & country.name=="Yugoslavia/Serbia","Yugoslavia",country.name)) %>%
  mutate(country.name=ifelse(year>=2007 & country.name=="Yugoslavia/Serbia","Republic of Serbia",country.name))

ecom3_data<-ecom3_data%>%
  mutate(country_iso3c=countrycode(country.name,origin='country.name',destination='iso3c', custom_match = cust_name_iso))%>%
  select(country_iso3c, everything())

#disaggregated gmi ----

dsg_gmi_data<-dsg_gmi_data%>%
  mutate(country_iso3c=countrycode(country.name,origin='country.name',destination='iso3c', custom_match = cust_name_iso))%>%
  select(country_iso3c, everything())

Adjusting format

#gmi -----

#pivot to long
cols.num <- gmi_names[-c(1,2,3)]

gmi_data<-gmi_data%>%
    tidyr::pivot_longer(
      cols = all_of(cols.num), 
      names_to = "year", 
      names_prefix = "gmi_",
      values_to = "gmi" 
    )

#convert year to numeric
gmi_data<-gmi_data%>%
  mutate(year=as.numeric(year))

Cleaning the data

#conscription ----

#conscription_data<-conscription_data%>% 
#  mutate(across(where(is.numeric), ~na_if(., -999))) 

conscription_data<-conscription_data%>%
  mutate(no_military_service=ifelse(com_mil_serv_gen==-888,1,0)) %>%
  mutate(no_civil_service=ifelse(alt_civ_serv==-888,1,0))


#m3 ----

#m3_data<-m3_data%>% 
#  mutate(across(where(is.numeric), ~na_if(., -999))) 

# cleaning new ecom3data set

ecom3_data_new<-ecom3_data_new %>%
  select(-c("countryname_standard"))

# Convert "NA" (as text) to actual NA in character columns
ecom3_data_new <- ecom3_data_new %>%
  mutate(across(where(is.character), ~na_if(., "NA")))

Identify country names per dataset

#This will later help verify the merging is correct

#gmi ----
gmi_data<-gmi_data%>%
  rename(.,countryname_gmi=country.name) %>%
  rename(.,country_iso_gmi=country_iso)

#conscription ----

conscription_data<-conscription_data%>%
  rename(.,countryname_conscription=country.name)%>%
  rename(.,country_iso_conscription=country_iso)

#m3 ----

m3_data<-m3_data%>%
  rename(.,countryname_m3=country.name)%>%
  rename(.,country_iso_m3=country_iso)

milpol_data<-milpol_data%>%
  rename(.,countryname_milpol=country.name)

#economic m3 ----

ecom3_data<-ecom3_data %>%
  rename(.,countryname_ecom3 = country.name)

#dissagregated gmi----

dsg_gmi_data<-dsg_gmi_data %>%
  rename(.,countryname_dsg_gmi = country.name)

Solving country continuity

#For conscription data ----
#Serbia & Montenegro (1993-2006) ; Serbia (2007-2021)

conscription_selection<-conscription_data%>%
  filter(str_detect(countryname_conscription,"Serbia"))

conscription_selection <- conscription_selection %>%
  filter((countryname_conscription == "Republic of Serbia" & year >= 2007) | (countryname_conscription == "Republic of Serbia and Montenegro" & year <= 2006))%>%
  mutate(countryname_conscription="Republic of Serbia")  %>%
  mutate(country_iso3c="SRB")

conscription_data <- conscription_data %>%
  filter(!str_detect(countryname_conscription, "Serbia")) 

conscription_data <- rbind(conscription_data, conscription_selection)

#Yemen before 1995
columns_to_set_to_na <- c("com_mil_serv", 
                          "com_mil_serv_gen", 
                          "com_mil_serv_sur_min", 
                          "com_mil_serv_dur_max",
                          "com_mil_serv_dur_min", 
                          "alt_civ_serv", 
                          "mobilisation",
                          "no_military_service",
                           "no_civil_service")

conscription_data <- conscription_data %>% 
  mutate(across(all_of(columns_to_set_to_na), ~ifelse(year < 1995 & countryname_conscription == "Yemen", NA, .)))


#For M3 data ----

m3_selection<-m3_data%>%
  filter(str_detect(countryname_m3,"Serbia"))

m3_selection<-m3_selection%>%
  filter((countryname_m3 == "Republic of Serbia" & year >= 2007) | (countryname_m3 == "Republic of Serbia and Montenegro" & year <= 2006))%>%
  mutate(countryname_m3="Republic of Serbia")  %>%
  mutate(country_iso3c="SRB")

m3_data <- m3_data %>%
  filter(!str_detect(countryname_m3, "Serbia"))

m3_data <- rbind(m3_data, m3_selection)

#Yemens

m3_variables <- c("country_iso_m3","country_cowcode","mil_origin", "mil_leader",
                  "mil_mod", "mil_veto", "mil_repress", 
                  "mil_repress_count", "mil_impun")

m3_yemens_df<- expand.grid(
  countryname_m3 = c("Yemen, North", "Yemen, South"),
  year = 1990:2021
)

m3_yemens_df<-cbind(m3_yemens_df, setNames( lapply(m3_variables, function(x) x=NA), m3_variables))

m3_yemens_df$country_iso3c <- ifelse(m3_yemens_df$countryname_m3 == "Yemen, North", "YAR",
                                         ifelse(m3_yemens_df$countryname_m3 == "Yemen, South", "YMD", NA))

m3_data <- rbind(m3_data, m3_yemens_df)

#For GMI ----
#Nothing needed 

#ForMilpol----
#Solving Yugoslavia/Serbia issue for 1991 and 1992

milpol_selection<-milpol_data%>%
  filter(str_detect(countryname_milpol,"Serbia"))

milpol_selection<-milpol_selection%>%
  mutate(countryname_milpol=ifelse(countryname_milpol == "Republic of Serbia" & year <= 1992, "Yugoslavia" ,countryname_milpol)) %>%
  mutate(country_iso3c=ifelse(countryname_milpol == "Yugoslavia", "YUG",country_iso3c))
         
milpol_data <- milpol_data %>%
  filter(!str_detect(countryname_milpol, "Serbia"))

milpol_data <- rbind(milpol_data, milpol_selection)
    

#Solving Yemen before 1991
#"Yemen, North" = "YAR", "Yemen, South" = "YMD"

columns_to_update <- c("milpol_cripre", "max_conf_milpol_cripre", "p_a_milpol_cripre",
                       "milpol_lawenf", "max_conf_milpol_lawenf", "p_a_milpol_lawenf",
                       "milpol_peapre",
                       "max_conf_milpol_peapre",
                       "p_a_milpol_peapre", "milpol_dummy", "milpol_sum")

milpol_data <- milpol_data %>% 
  mutate(across(all_of(columns_to_update), ~ifelse(countryname_milpol=="Yemen" & year < 1991, NA, .)))


milpol_yemens_df<- expand.grid(
  countryname_milpol = c("Yemen, North", "Yemen, South"),
  year = 1990:2021
)

milpol_yemens_df<-cbind(milpol_yemens_df, setNames( lapply(columns_to_update, function(x) x=NA), columns_to_update))

milpol_yemens_df$country_iso3c <- ifelse(milpol_yemens_df$countryname_milpol == "Yemen, North", "YAR",
                                         ifelse(milpol_yemens_df$countryname_milpol == "Yemen, South", "YMD", NA))

milpol_data <- rbind(milpol_data, milpol_yemens_df)


#For economics M3 data ----

ecom3_selection<-ecom3_data%>%
  filter(str_detect(countryname_ecom3,"Serbia"))

ecom3_selection<-ecom3_selection%>%
  filter((countryname_ecom3 == "Republic of Serbia" & year >= 2007) | (countryname_ecom3 == "Republic of Serbia and Montenegro" & year <= 2006))%>%
  mutate(countryname_ecom3="Republic of Serbia")  %>%
  mutate(country_iso3c="SRB")

ecom3_data <- ecom3_data %>%
  filter(!str_detect(countryname_ecom3, "Serbia"))

ecom3_data <- rbind(ecom3_data, ecom3_selection)


#For dissagregated GMI ----
#Nothing needed

Min-max standarization

#economic m3 -----

# Variables to standarize
vars_to_standardize <- c("mil_eco_inf", "mil_eco_own", "mil_eco_share",
                         "mil_eco_dom", "mil_eco_small", "mil_eco_medium",
                         "mil_eco_large", "mil_eco_vlarge"
                         )

# Mutate() to create new variables with "_std" suffix and apply norm() function -defined before-
ecom3_data <- ecom3_data %>%
  mutate(across(all_of(vars_to_standardize), ~norm(.), .names = "{col}_std"))


# STD for ecom3_data_new

# Convert specified variables to numeric
ecom3_data_new <- ecom3_data_new %>%
  mutate(across(all_of(vars_to_standardize), as.numeric))

ecom3_data_new <- ecom3_data_new %>%
  mutate(across(all_of(vars_to_standardize), ~norm(.), .names = "{col}_std"))

MERGING

Setting reference countries

#Setting simple reference table for countries and years 

#Select the countries ----
countryyears_df<- m3_data%>%
  select(countryname_standard=countryname_m3,country_iso3c)  %>%
  distinct(country_iso3c, .keep_all = TRUE)

#Add country_cown and country_cowc ----

custom_cowc=c()
custom_cown=c()

countryyears_df<- countryyears_df %>% 
  mutate(country_cowc=countrycode(country_iso3c,origin='iso3c',destination='cowc', custom_match = custom_cowc)) %>%
  mutate(country_cown =countrycode(country_iso3c,origin='iso3c',destination='cown', custom_match = custom_cown))

# Make cown an integer variable
countryyears_df$country_cown <- as.integer(countryyears_df$country_cown)

#Set the reference years ----
years <- 1990:2021

#create the dataframe ----
countryyears_df <- countryyears_df %>%
  crossing(year = years)

#Add region  ----
countryyears_df<- countryyears_df %>% 
  mutate(region_wb=countrycode(country_iso3c,origin='iso3c',destination='region')) %>%
  mutate(region_wb=replace_na(region_wb,"Europe & Central Asia"))

#Add continent ----
countryyears_df<- countryyears_df %>% 
  mutate(continent=countrycode(country_iso3c,origin='iso3c',destination='continent')) %>%
  mutate(continent=ifelse(country_iso3c=="SUN","Europe/Asia",continent)) %>%
  mutate(continent=replace_na(continent,"Europe"))

Do the merging

# Merge by left join

merged_df<-countryyears_df %>%
  left_join(conscription_data,by=c("country_iso3c","year")) %>%
  left_join(m3_data, by=c("country_iso3c","year")) %>%
  left_join(gmi_data,by=c("country_iso3c","year"))  %>%
  left_join(milpol_data,by=c("country_iso3c","year")) %>%
  left_join(ecom3_data_new,by=c("country_iso3c","year")) %>%   #using new corrected dataset
  left_join(dsg_gmi_data, by=c("country_iso3c","year"))

# Make year an integer variable
merged_df$year <- as.integer(merged_df$year)

CLEANING

Clean and arrange

# un.name.en: United Nations English country name
# Names without match: ABW: Aruba, CSK: Czechoslovakia, CUW: Curacao, GGY: Guernsey, GRL: Greenland, HKG: Hong Kong, IMN: Isle of Man, JEY: Jersey, MAC:Macao, SCG: Republic of Serbia and Montenegro, SXM: Sint Maarten, TWN: Republic of China (Taiwan),  VAT: Vatican,  XKX: Republic of Kosovo,  YAR: North Yemen,  YMD: South Yemen, YUG: Yugoslavia

un_countrynames=c("YUG"="Socialist Federal Republic of Yugoslavia",
                  "SUN"=" Union of Soviet Socialist Republics",
                  "YMD" = "Democratic Yemen", 
                  "YAR" ="Yemen Arab Republic", 
                  "TUR" = "Türkiye",
                  "CSK"="Czechoslovakia", 
                  "CZE" = "Czechia")


merged_df<-merged_df%>%
  mutate(countryname_un=countrycode(country_iso3c,origin='iso3c',destination='un.name.en',custom_match = un_countrynames))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `countryname_un = countrycode(...)`.
## Caused by warning:
## ! Some values were not matched unambiguously: TWN, XKX

#Rearrange the columns
merged_df <- merged_df %>%
  select(starts_with("countryname_"), starts_with("country_iso_"), everything())

#Order the columns
merged_df<-merged_df%>%
   arrange(-desc(country_iso3c))

#Rename variable

merged_df <- merged_df %>%
  rename(mil_police = milpol_dummy)

Partner dataset clean

# Die Daten der Sowjetunion werden für das Jahr 1992 auf Russland übertragen.
merged_df$com_mil_serv <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "1", merged_df$com_mil_serv)
merged_df$com_mil_serv_gen <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "NA", merged_df$com_mil_serv_gen)
merged_df$com_mil_serv_sur_min <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "24", merged_df$com_mil_serv_sur_min)
merged_df$com_mil_serv_dur_max <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "24", merged_df$com_mil_serv_dur_max)
merged_df$com_mil_serv_dur_min <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "24", merged_df$com_mil_serv_dur_min)
merged_df$alt_civ_serv <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "NA", merged_df$alt_civ_serv)
merged_df$mobilisation <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "0", merged_df$mobilisation)
merged_df$no_military_service <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "NA", merged_df$no_military_service)
merged_df$no_civil_service <- ifelse(merged_df$countryname_standard == "Russia" & merged_df$year == 1992, "NA", merged_df$no_civil_service)

# Drop observations based on conditions
merged_df <- merged_df[!(merged_df$countryname_standard == "Yemen, North" & merged_df$year > 1990), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Yemen, South" & merged_df$year > 1990), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Yugoslavia" & merged_df$year > 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Czechoslovakia" & merged_df$year > 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Soviet Union" & merged_df$year > 1991), ]

merged_df <- merged_df[!(merged_df$countryname_standard == "Yemen" & merged_df$year < 1991), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Republic of Serbia" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Bosnia and Herzegovina" & merged_df$year < 2006), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Croatia" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Slovenia" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Macedonia" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Czech Republic" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Slovakia" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Lithuania" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Georgia" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Estonia" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Latvia" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Ukraine" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Moldova" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Kyrgyzstan" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Uzbekistan" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Tajikistan" & merged_df$year < 1994), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Armenia" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Azerbaijan" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Turkmenistan" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Belarus" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Russia" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Kazakhstan" & merged_df$year < 1993), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Eritrea" & merged_df$year < 1994), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Kuwait" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Namibia" & merged_df$year < 1991), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Somalia" & merged_df$year < 2009), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Cyprus" & merged_df$year < 2003), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Ethiopia" & merged_df$year %in% 1991:1996), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Gabon" & merged_df$year < 1991), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Equatorial Guinea" & merged_df$year < 2009), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Haiti" & merged_df$year %in% 1995:2017), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Qatar" & merged_df$year < 2006), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Timor-Leste" & merged_df$year < 2007), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Bahrain" & merged_df$year < 2007), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Angola" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Eswatini" & merged_df$year < 1998), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "Guinea-Bissau" & merged_df$year < 1992), ]
merged_df <- merged_df[!(merged_df$countryname_standard == "South Sudan" & merged_df$year < 2012), ]

Recode specific values

# Find the rows that meet the conditions
rows_to_reset <- merged_df$country_iso3c == "PER" & merged_df$year %in% c(2012, 2013, 2014, 2015, 2016)

# Set the values to 0 for the selected rows
merged_df$mil_leader[rows_to_reset] <- 0

Change variable and drop

# Create a named vector for renaming
change_variables <- c("milpol_crime"="milpol_cripre",
                  "p_a_milpol_crime" = "p_a_milpol_cripre",
                  "milpol_law"="milpol_lawenf",
                  "p_a_milpol_law"="p_a_milpol_lawenf",
                  "milpol_peace"="milpol_peapre",
                  "p_a_milpol_peace"="p_a_milpol_peapre",
                  "milex_healthexp"="milex_health",
                  "pers_to_pop"="milpara_pop",  
                  "pers_to_phy"="milpara_phy", 
                  "hwi_norm"="weapons_pop_norm",
                  "hwi"="weapons_pop")

# Rename
merged_df <- merged_df %>%
  rename(., all_of(change_variables))


# Drop variables

drop_variables <- c("com_mil_serv_sur_min","no_military_service","no_civil_service","gmi",
                    "overmillion_bool","military_bool") #"dup","dup2

merged_df <- merged_df %>%
  select(-one_of(drop_variables))

# Variables to drop in version 1 but include in version 2

drop_variables_v1 <- c("milpol_sum","mobilisation","mil_eco_inf")

merged_df <- merged_df %>%
  select(-one_of(drop_variables_v1))

Transform NA

convert_to_numeric <- c("com_mil_serv",
                        "com_mil_serv_gen",
                        "com_mil_serv_dur_max",
                        "com_mil_serv_dur_min",
                        "alt_civ_serv",
                        #"mobilisation",  #dropped for version 1
                        "pop_total")

merged_df <- merged_df %>%
  mutate(across(all_of(convert_to_numeric), ~ifelse(. == "NA", NA, .))) %>%
  mutate(across(all_of(convert_to_numeric), as.numeric))

merged_df <- merged_df %>%
  mutate_if(is.numeric, ~ifelse(. == -999, NA, .))

FILE BUILDING

Create CSV file

#Select columns

merged_df<-merged_df%>%
  select(-c("countryname_m3","countryname_gmi","country_iso_conscription",
            "country_iso_m3","country_iso_gmi","region","countryname_milpol",
            "countryname_conscription","country_cowcode","countryname_dsg_gmi")) 
            #this are of the old ecom3 dataset: "countryname_ecom3","state_abb", #"ccode"

#Arrange columns
merged_df<-merged_df%>%
  select(year,country_iso3c,everything())

#Filter rows without un-name

merged_df<-merged_df%>%
  filter(!is.na(countryname_un))

#Filter the years 
merged_df<-merged_df%>%
  filter(year!="2021")

#create a file to verify manually

name_of_file=str_c("merged_military_dataset",sep = "_",Sys.Date(),lubridate::hour(Sys.time()),lubridate::minute(Sys.time()),".csv")

write_csv(merged_df,name_of_file)

Create SQLite file

db_file=str_c("merged_military_dataset",sep = "_",Sys.Date(),lubridate::hour(Sys.time()),lubridate::minute(Sys.time()),".sqlite")

# Create an SQLite connection
con <- dbConnect(RSQLite::SQLite(), dbname = db_file)

# Specify column types
#column_types <- c(year = "INTEGER", everything())

# Write the data frame to an SQLite table
dbWriteTable(con, "m3_dataset", merged_df, overwrite = TRUE) #, col_types = column_types)

# Close the connection
dbDisconnect(con)

COLINEARITY TEST

Prepare collinearlity algorithm

#evaluation turned false, turn on when needed

collinearity_tests <- function(response_vars, data) {
  vif_tol_results <- list()
  eigen_cindex_results <- list()

  for (response_var in response_vars) {
    # Fit the linear regression model
    formula <- paste(response_var, "~ .")
    lm_model <- lm(formula, data = data)

    # Calculate VIF and tolerance
    vif_tol_results[[response_var]] <- olsrr::ols_vif_tol(lm_model)

    # Calculate eigenvalues and condition index
    eigen_cindex_results[[response_var]] <- olsrr::ols_eigen_cindex(lm_model)
  }

  return(list(vif_tol_results, eigen_cindex_results))
}

#evaluation turned false, turn on when needed
#change names and get rid of -888 and -999

# Non standarized vars
variables_list_a <- c("milex_gdp","milex_health","milpara_pop","milpara_phy","reserve_pop",
                      "weapons_pop_norm", #this is the HWI
                      "mil_leader", "mil_origin", "mil_mod", "mil_impun", "mil_veto",
                      "mil_repress", "mil_police", "com_mil_serv", "mil_eco_dummy", "mil_eco_inf")



collinearity_df_a<-merged_df[variables_list_a]

collinearity_df_a$com_mil_serv <- as.numeric(collinearity_df_a$com_mil_serv)

# Standarized vars

variables_list_b <- c("milex_gdp_norm","milex_health_norm","milpara_pop_norm","milpara_phy_norm",
                      "reserve_pop_norm",
                      "weapons_pop_norm", #this is the HWI
                      "mil_leader", "mil_origin", "mil_mod", "mil_impun", "mil_veto",
                      "mil_repress", "mil_police", "com_mil_serv", "mil_eco_dummy", "mil_eco_inf_std")

collinearity_df_b<-merged_df[variables_list_b]

collinearity_df_b$com_mil_serv <- as.numeric(collinearity_df_a$com_mil_serv)

Run the tests

#evaluation turned false, turn on when needed

#Perform the collinearity test

#non standarized vars
results <- collinearity_tests(response_vars = variables_list_a, data = collinearity_df_a)

#standarized vars
results_b <- collinearity_tests(response_vars = variables_list_b, data = collinearity_df_b)

Create the file

#evaluation turned false, turn on when needed

file_name=str_c("collinearity_results_non",sep = "_",Sys.Date(),lubridate::hour(Sys.time()),lubridate::minute(Sys.time()),".xlsx")

# Create an Excel workbook
wb <- openxlsx::createWorkbook()

# Loop through the first list in results
for (i in 1:length(results[[1]])) {
  # Get the name of the variable for the sheet
  var_name <- names(results[[1]])[i]
  
  # Create a new sheet for each entry in the first list
  sheet_name <- paste("viftol", var_name, sep = "_")
  openxlsx::addWorksheet(wb, sheetName =sheet_name)   #sheetName = paste("Sheet_1_", i, sep = "")
  
  # Convert the list of results to a data frame and transpose it
  results_df <- as.data.frame(results[[1]][[i]])
  
  # Write the results of the variable to the sheet
  openxlsx::writeData(wb, sheet = i, x = results_df, startCol = 1, startRow = 1)
}

# Loop through the second list in results
for (i in 1:length(results[[2]])) {
  # Get the name of the variable for the sheet
  var_name <- names(results[[2]])[i]
  
  
  # Create a new sheet with the variable name and index
  sheet_name <- paste("eigenindex", var_name, sep = "_")
  openxlsx::addWorksheet(wb, sheetName =sheet_name)
  
  # Convert the list of results to a data frame and transpose it
  results_df <- as.data.frame(results[[2]][[i]])
  
  # Write the results of the variable to the sheet
  openxlsx::writeData(wb, sheet = i + length(results[[1]]), x = results_df, startCol = 1, startRow = 1)
}

# Save the Excel workbook to a file
openxlsx::saveWorkbook(wb, file_name)

#evaluation turned false, turn on when needed


file_name=str_c("collinearity_results_std",sep = "_",Sys.Date(),lubridate::hour(Sys.time()),lubridate::minute(Sys.time()),".xlsx")

# Create an Excel workbook
wb <- openxlsx::createWorkbook()

# Loop through the first list in results
for (i in 1:length(results_b[[1]])) {
  # Get the name of the variable for the sheet
  var_name <- names(results_b[[1]])[i]
  
  # Create a new sheet for each entry in the first list
  sheet_name <- paste("viftol", var_name, sep = "_")
  openxlsx::addWorksheet(wb, sheetName =sheet_name)   #sheetName = paste("Sheet_1_", i, sep = "")
  
  # Convert the list of results to a data frame and transpose it
  results_df <- as.data.frame(results_b[[1]][[i]])
  
  # Write the results of the variable to the sheet
  openxlsx::writeData(wb, sheet = i, x = results_df, startCol = 1, startRow = 1)
}

# Loop through the second list in results
for (i in 1:length(results_b[[2]])) {
  # Get the name of the variable for the sheet
  var_name <- names(results_b[[2]])[i]
  
  
  # Create a new sheet with the variable name and index
  sheet_name <- paste("eigenindex", var_name, sep = "_")
  openxlsx::addWorksheet(wb, sheetName =sheet_name)
  
  # Convert the list of results to a data frame and transpose it
  results_df <- as.data.frame(results_b[[2]][[i]])
  
  # Write the results of the variable to the sheet
  openxlsx::writeData(wb, sheet = i + length(results_b[[1]]), x = results_df, startCol = 1, startRow = 1)
}

# Save the Excel workbook to a file
openxlsx::saveWorkbook(wb, file_name)

Multidimensional Measures of Militarization (m3) Dataset

BICC | Bonn, Germany

Rodrigo Bolaños Suárez

November 17, 2023 | 10:22:14 | CET