diff --git a/DESCRIPTION b/DESCRIPTION index 492af92..1cd8f70 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: lexicon Title: Lexicons for Text Analysis -Version: 1.1.2 +Version: 1.1.3 Maintainer: Tyler Rinker Description: A collection of lexical hash tables, dictionaries, and word lists. Depends: R (>= 3.2.2) diff --git a/NEWS b/NEWS index 2d78613..d0ebcf5 100644 --- a/NEWS +++ b/NEWS @@ -27,13 +27,17 @@ BUG FIXES * `hash_lemmas` had Spaces before 2 tokens (" furtherst", " skilled") meaning. This extra white space has been stripped. + +* The `hash_sentiment_senticnett` dictionary contained "sparsely" which is also + contained in `hash_valence_shifters`. This term has been dropped from the + `hash_sentiment_senticnett` dictionary. See # 12 for more info. NEW FEATURES * `profanity_zac_anger` added to provide a longer list of profane words. * `profanity_racist` added to provide a profane list that is specific for - dtecting racist terms. + detecting racist terms. * `key_regressive_imagery` added to provide R users with access to Colin Martindale's (1975, 1990) English Regressive Imagery Dictionary (RID). The diff --git a/NEWS.md b/NEWS.md index b3ea6b8..8f83ba7 100644 --- a/NEWS.md +++ b/NEWS.md @@ -27,19 +27,26 @@ lexicon 1.0.1 - * `hash_lemmas` had Spaces before 2 tokens (" furtherst", " skilled") meaning. This extra white space has been stripped. + +* The `hash_sentiment_senticnett` dictionary contained "sparsely" which is also + contained in `hash_valence_shifters`. This term has been dropped from the + `hash_sentiment_senticnett` dictionary. See # 12 for more info. **NEW FEATURES** * `profanity_zac_anger` added to provide a longer list of profane words. * `profanity_racist` added to provide a profane list that is specific for - dtecting racist terms. + detecting racist terms. * `key_regressive_imagery` added to provide R users with access to Colin Martindale's (1975, 1990) English Regressive Imagery Dictionary (RID). The Regressive Imagery Dictionary (RID) is a text analysis coding taxonomy that can be used to measure the degree to which a text is *primordial* vs. *conceptual*. + +* `key_corporate_social_responsibility` added to provide R users with access to + Pencle & Mălăescu's Corporate Social Responsibility (CSR) Dictionary. **MINOR FEATURES** diff --git a/R/hash_sentiment_senticnet.R b/R/hash_sentiment_senticnet.R index 132e480..b310b1f 100644 --- a/R/hash_sentiment_senticnet.R +++ b/R/hash_sentiment_senticnet.R @@ -24,7 +24,7 @@ #' @keywords datasets #' @name hash_sentiment_senticnet #' @usage data(hash_sentiment_senticnet) -#' @format A data frame with 23,627 rows and 2 variables +#' @format A data frame with 23,626 rows and 2 variables #' @references Cambria, E., Poria, S., Bajpai, R. and Schuller, B. SenticNet 4: #' A semantic resource for sentiment analysis based on conceptual primitives. #' In: COLING, pp. 2666-2677, Osaka (2016) diff --git a/README.md b/README.md index 4167040..a715a65 100644 --- a/README.md +++ b/README.md @@ -69,10 +69,10 @@ word lists. The data prefixes help to categorize the data types: Data ==== - +
--++ @@ -182,110 +182,114 @@ Data + + + + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + - + diff --git a/data/hash_sentiment_senticnet.rda b/data/hash_sentiment_senticnet.rda index 44b6897..450a316 100644 Binary files a/data/hash_sentiment_senticnet.rda and b/data/hash_sentiment_senticnet.rda differ diff --git a/inst/CITATION b/inst/CITATION index 4e3cb8d..bc6c08c 100644 --- a/inst/CITATION +++ b/inst/CITATION @@ -5,11 +5,11 @@ citEntry(entry = "manual", title = "{lexicon}: Lexicon Data", author = "Tyler W. Rinker", address = "Buffalo, New York", - note = "version 1.1.2", + note = "version 1.1.3", year = "2018", url = "http://github.com/trinker/lexicon", textVersion = paste("Rinker, T. W. (2018).", "lexicon: Lexicon Data", - "version 1.1.2.", + "version 1.1.3.", "http://github.com/trinker/lexicon") ) diff --git a/inst/dev_kit/test_valence_shifter.R b/inst/dev_kit/test_valence_shifter.R new file mode 100644 index 0000000..5f4b2b3 --- /dev/null +++ b/inst/dev_kit/test_valence_shifter.R @@ -0,0 +1,51 @@ +if (!require("pacman")) install.packages("pacman") +pacman::p_load(lexicon) + + +test_valence_shifter <- function( + valence_shifter_table = lexicon::hash_valence_shifters, + sentiment_tables = lexicon::available_data('hash_sentiment')[['Data']] + ){ + + valence_words <- valence_shifter_table[['x']] + + overlaps <- lapply(sentiment_tables, function(x){ + + sent_hash <- eval(parse(text = paste0('lexicon::', x))) + intersect(sent_hash[['x']], valence_words) + + }) + + names(overlaps) <- sentiment_tables + + class(overlaps) <- 'test_valence_shifter' + overlaps + +} + +print.test_valence_shifter <- function(x, ...){ + + class(x) <- 'list' + bads <- x[lengths(x) > 0] + + if (length(bads) == 0) { + textclean:::all_good() + } + + intersecting <- Map(function(x, y){ + + paste0(x,':\n\n - ', y, '\n') + + }, names(bads), lapply(bads, function(x) paste(shQuote(x), collapse = ', '))) + + cat(paste0( + "The following tables contained these words\n", + "overlapping with the supplied valence shifter table:\n\n" + )) + cat(paste(unlist(intersecting), collapse = '\n\n\n')) +} + + + + +test_valence_shifter(lexicon::hash_valence_shifters) \ No newline at end of file diff --git a/inst/scraping_scripts/sentiment Lexicon Scripts/senticnet.R b/inst/scraping_scripts/sentiment Lexicon Scripts/senticnet.R index 8849ac4..1382a8b 100644 --- a/inst/scraping_scripts/sentiment Lexicon Scripts/senticnet.R +++ b/inst/scraping_scripts/sentiment Lexicon Scripts/senticnet.R @@ -24,4 +24,7 @@ senticnet <- senticnet %>% hash_sentiment_senticnet <- sentimentr::update_polarity_table(senticnet) + +hash_sentiment_senticnet <- sentimentr::update_key(hash_sentiment_senticnet, drop = "sparsely") + pax::new_data(hash_sentiment_senticnet, , stand.alone = TRUE) diff --git a/man/hash_sentiment_senticnet.Rd b/man/hash_sentiment_senticnet.Rd index 3c07484..d429e9b 100644 --- a/man/hash_sentiment_senticnet.Rd +++ b/man/hash_sentiment_senticnet.Rd @@ -4,7 +4,7 @@ \name{hash_sentiment_senticnet} \alias{hash_sentiment_senticnet} \title{Augmented SenticNet Polarity Table} -\format{A data frame with 23,627 rows and 2 variables} +\format{A data frame with 23,626 rows and 2 variables} \usage{ data(hash_sentiment_senticnet) }

Contraction Conversions

key_corporate_social_responsibility

Nadra Pencle and Irina Malaescu's Corporate Social Responsibility Dictionary

key_grade

Grades Data Set

key_rating

Ratings Data Set

key_regressive_imagery

Colin Martindale's English Regressive Imagery Dictionary

key_sentiment_jockers

Jockers Sentiment Data Set

modal_loughran_mcdonald

Loughran-McDonald Modal List

nrc_emotions

NRC Emotions

pos_action_verb

Action Word List

pos_df_irregular_nouns

Irregular Nouns Word Dataframe

pos_df_pronouns

Pronouns

pos_interjections

Interjections

pos_preposition

Preposition Words

profanity_alvarez

Alejandro U. Alvarez's List of Profane Words

profanity_arr_bad

Stackoverflow user2592414's List of Profane Words

profanity_banned

bannedwordlist.com's List of Profane Words

profanity_racist

Titus Wormer's List of Racist Words

profanity_zac_anger

Zac Anger's List of Profane Words

sw_dolch

Leveled Dolch List of 220 Common Words

sw_fry_100

Fry's 100 Most Commonly Used English Words

sw_fry_1000

Fry's 1000 Most Commonly Used English Words

sw_fry_200

Fry's 200 Most Commonly Used English Words

sw_fry_25

Fry's 25 Most Commonly Used English Words

sw_jockers

Matthew Jocker's Expanded Topic Modeling Stopword List

sw_loughran_mcdonald_long

Loughran-McDonald Long Stopword List

sw_loughran_mcdonald_short

Loughran-McDonald Short Stopword List

sw_lucene

Lucene Stopword List

sw_mallet

MALLET Stopword List

sw_python

Python Stopword List