## Tested on R version 4.1.2 (2021-11-01) ########################################################### ## Libraries. Install with install.packages("") if needed. ########################################################### library(stringr) library(slam) ################################################################################## ## Load relevant objects for PART 1. ## ## unigrams_dictionary has three columns: ## 1. unigram ## 2. MNIR loading (fitted on earning calls 2006-2014). ## 3. frequency counts (across full corpora) ## ## bigrams_dictionary has three columns: ## 1. bigram ## 2. MNIR loading (fitted on earning calls 2006-2014). ## 3. frequency counts (across full corpora) ## ## LM_pos_words and LM_neg_words are the Loughran and McDonald positive ## and negative word lists (2017). ## ## ML_unigrams_pos and ML_unigrams_neg are the plain money English dictionaries ## for unigrams estimated over 2006-2019. ## ML_bigrams_pos and ML_bigrams_neg are the plain money English dictionaries ## for bigrams estimated over 2006-2019. ## These are the dictionaries provided in the Appendix of the paper. ## ## ML_unigrams_pos_2014 and ML_unigrams_neg_2014 are the plain money English dictionaries ## for unigrams estimated over 2006-2014. ## ML_bigrams_pos_2014 and ML_bigrams_neg_2014 are the plain money English dictionaries ## for bigrams estimated over 2006-2014. ## These are the dictionaries used for in Table 10 (OOS). ################################################################################## load("Color.Part1.20211201.RData") objects() ################################################################################## ## Simple comparisons of ML unigrams and LM dictionaries. ## Grab positive/negative/neutrals unigrams (main-spec, 2006-2014 fit, for Tables 1+). ## These calculations roughly correspond to Table 3 in the paper. ################################################################################## ML_uni_pos_main_spec <- unigrams_dictionary[unigrams_dictionary[,2]>0,1] ML_uni_neg_main_spec <- unigrams_dictionary[unigrams_dictionary[,2]<0,1] ML_uni_neu_main_spec <- unigrams_dictionary[unigrams_dictionary[,2]==0,1] flag_pos_ml_neg <- ML_uni_neg_main_spec %in% LM_pos_words flag_neg_ml_neg <- ML_uni_neg_main_spec %in% LM_neg_words flag_pos_ml_pos <- ML_uni_pos_main_spec %in% LM_pos_words flag_neg_ml_pos <- ML_uni_pos_main_spec %in% LM_neg_words flag_pos_ml_neu <- ML_uni_neu_main_spec %in% LM_pos_words flag_neg_ml_neu <- ML_uni_neu_main_spec %in% LM_neg_words length(ML_pos_unigrams) length(LM_pos_words) length(ML_neg_unigrams) length(LM_neg_words) ## Numbers for Panel A of Table 3 in the paper. sum(flag_pos_ml_pos) sum(flag_neg_ml_pos) sum(flag_pos_ml_neg) sum(flag_neg_ml_neg) sum(flag_pos_ml_neu) sum(flag_neg_ml_neu) ################################################################################## ## Simple comparisons of ML bigrams and LM dictionaries ################################################################################## ## Grab positive/negative/neutrals bigrams (2006-2014 fit, Tables 1 and 2) ML_bi_pos_main_spec <- bigrams_dictionary[bigrams_dictionary[,2]>0,1] ML_bi_neg_main_spec <- bigrams_dictionary[bigrams_dictionary[,2]<0,1] ML_bi_neu_main_spec <- bigrams_dictionary[bigrams_dictionary[,2]==0,1] regex_lm_pos <- paste("\\b(", paste(LM_pos_words, collapse="|"), ")\\b", sep="") regex_lm_neg <- paste("\\b(", paste(LM_neg_words, collapse="|"), ")\\b", sep="") flag_pos_ml_pos <- grepl(regex_lm_pos, ML_bi_pos_main_spec) flag_neg_ml_pos <- grepl(regex_lm_neg, ML_bi_pos_main_spec) flag_pos_ml_neg <- grepl(regex_lm_pos, ML_bi_neg_main_spec) flag_neg_ml_neg <- grepl(regex_lm_neg, ML_bi_neg_main_spec) length(ML_bi_pos_main_spec) length(ML_bi_neg_main_spec) sum(flag_pos_ml_pos) sum(flag_neg_ml_pos) sum(flag_pos_ml_neg) sum(flag_neg_ml_neg) sum(flag_neg_ml_neg & flag_pos_ml_neg) sum(flag_neg_ml_pos & flag_pos_ml_pos) ################################################################################## ## Simple comparisons of ML plain money English and LM dictionaries ################################################################################## ## Unigrams flag_pos_ml_neg <- ML_neg_unigrams %in% LM_pos_words flag_neg_ml_neg <- ML_neg_unigrams %in% LM_neg_words flag_pos_ml_pos <- ML_pos_unigrams %in% LM_pos_words flag_neg_ml_pos <- ML_pos_unigrams %in% LM_neg_words length(ML_pos_unigrams) length(LM_pos_words) length(ML_neg_unigrams) length(LM_neg_words) sum(flag_neg_ml_pos) sum(flag_pos_ml_pos) sum(flag_neg_ml_neg) sum(flag_pos_ml_neg) ## Bigrams regex_lm_pos <- paste("\\b(", paste(LM_pos_words, collapse="|"), ")\\b", sep="") regex_lm_neg <- paste("\\b(", paste(LM_neg_words, collapse="|"), ")\\b", sep="") flag_pos_ml_neg <- grepl(regex_lm_pos, ML_neg_bigrams) flag_neg_ml_neg <- grepl(regex_lm_neg, ML_neg_bigrams) flag_pos_ml_pos <- grepl(regex_lm_pos, ML_pos_bigrams) flag_neg_ml_pos <- grepl(regex_lm_neg, ML_pos_bigrams) length(ML_pos_bigrams) length(ML_neg_bigrams) sum(flag_neg_ml_pos) sum(flag_pos_ml_pos) sum(flag_neg_ml_neg) sum(flag_pos_ml_neg) ## Frequency counts flag_lm_pos <- unigrams_dictionary[,1] %in% LM_pos_words flag_lm_neg <- unigrams_dictionary[,1] %in% LM_neg_words sum(unigrams_dictionary[flag_lm_pos,3])/sum(unigrams_dictionary[,3]) sum(unigrams_dictionary[flag_lm_neg,3])/sum(unigrams_dictionary[,3]) flag_ml_pos <- unigrams_dictionary[,1] %in% ML_pos_unigrams flag_ml_neg <- unigrams_dictionary[,1] %in% ML_neg_unigrams sum(unigrams_dictionary[flag_ml_pos,3])/sum(unigrams_dictionary[,3]) sum(unigrams_dictionary[flag_ml_neg,3])/sum(unigrams_dictionary[,3]) flag_ml_pos <- bigrams_dictionary[,1] %in% ML_pos_bigrams flag_ml_neg <- bigrams_dictionary[,1] %in% ML_neg_bigrams sum(bigrams_dictionary[flag_ml_pos,3])/sum(unigrams_dictionary[,3]) sum(bigrams_dictionary[flag_ml_neg,3])/sum(unigrams_dictionary[,3]) ################################################################################## ## Disambiguation function, replicating Table 6 in the paper. ## I takes a single word as an input, with an optional variable ## restricting how much to print out in the output (30 is the default). ## ## The output, if the word is associated with a bigram in our 65K dtm, is ## a list with five items: ## 1. Number of bigrams, positive/neutral/negative. ## 2. Total frequency counts of all bigrams, percent positive/neutral/negative. ## 3. Top positive bigrams associated with that term (frequency ranked). ## 4. Top neutral bigrams associated with that term (frequency ranked). ## 5. Top negative bigrams associated with that term (frequency ranked). ################################################################################## bigram_colors <- function(word, top_no = 30){ regex_word <- paste("\\b", word, "\\b", sep="") bigram_out <- "No bigrams associated with this name in top 65K." regex_hit_2 <- grepl(regex_word, as.character(unlist(bigrams_dictionary[,1]))) uni_temp_count <- unigrams_dictionary[word==unigrams_dictionary[,1],3] if(sum(regex_hit_2)>0){ bigram_temp <- bigrams_dictionary[regex_hit_2,] bigram_temp[,4] <- bigram_temp[,3]/uni_temp_count no_bigrams <- dim(bigram_temp)[1] no_bigrams_pos <- sum(bigram_temp[,2]>0) no_bigrams_neu <- sum(bigram_temp[,2]==0) no_bigrams_neg <- sum(bigram_temp[,2]<0) sum_bigrams <- sum(bigram_temp[,3]) sum_bigrams_pos <- sum(bigram_temp[bigram_temp[,2]>0,3])/sum_bigrams sum_bigrams_neu <- sum(bigram_temp[bigram_temp[,2]==0,3])/sum_bigrams sum_bigrams_neg <- sum(bigram_temp[bigram_temp[,2]<0,3])/sum_bigrams bigrams_pos <- bigram_temp[bigram_temp[,2]>0,c(1,3,4)] bigrams_neu <- bigram_temp[bigram_temp[,2]==0,c(1,3,4)] bigrams_neg <- bigram_temp[bigram_temp[,2]<0,c(1,3,4)] bigrams_pos <- bigrams_pos[order(bigrams_pos[,2], decreasing=TRUE),] bigrams_neu <- bigrams_neu[order(bigrams_neu[,2], decreasing=TRUE),] bigrams_neg <- bigrams_neg[order(bigrams_neg[,2], decreasing=TRUE),] if(dim(bigrams_pos)[1]>top_no){ bigrams_pos <- bigrams_pos[1:top_no,]} if(dim(bigrams_neu)[1]>top_no){ bigrams_neu <- bigrams_neu[1:top_no,]} if(dim(bigrams_neg)[1]>top_no){ bigrams_neg <- bigrams_neg[1:top_no,]} no_bigrams_all <- c(no_bigrams, no_bigrams_pos, no_bigrams_neu, no_bigrams_neg) sum_bigrams_all <- c(sum_bigrams, sum_bigrams_pos, sum_bigrams_neu, sum_bigrams_neg) bigram_out <- list(no_bigrams_all, sum_bigrams_all, bigrams_pos, bigrams_neu, bigrams_neg) } return(bigram_out) } ####################### bigram_colors("demand") bigram_colors("solid") bigram_colors("softer") bigram_colors("not") bigram_colors("dead") bigram_colors("leverage") bigram_colors("sheet") bigram_colors("cash") bigram_colors("confident")