* The hash_sentiment_senticnett dictionary contained "sparsely" whi…

…ch is also contained in `hash_valence_shifters`. This term has been dropped from the `hash_sentiment_senticnett` dictionary. See # 12 for more info.
trinker · Oct 19, 2018 · ae5779e · ae5779e
1 parent 657f750
commit ae5779e
Show file tree

Hide file tree

Showing 10 changed files with 105 additions and 36 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: lexicon
 Title: Lexicons for Text Analysis
-Version: 1.1.2
+Version: 1.1.3
 Maintainer: Tyler Rinker <[email protected]>
 Description: A collection of lexical hash tables, dictionaries, and word lists.
 Depends: R (>= 3.2.2)

diff --git a/NEWS b/NEWS
@@ -27,13 +27,17 @@ BUG FIXES
 
 * `hash_lemmas` had Spaces before 2 tokens (" furtherst", " skilled") meaning.  
   This extra white space has been stripped.
+
+* The `hash_sentiment_senticnett` dictionary contained "sparsely" which is also
+  contained in `hash_valence_shifters`.  This term has been dropped from the 
+  `hash_sentiment_senticnett` dictionary.  See # 12 for more info.
 
 NEW FEATURES
 
 * `profanity_zac_anger` added to provide a longer list of profane words.
 
 * `profanity_racist` added to provide a profane list that is specific for 
-  dtecting racist terms.
+  detecting racist terms.
 
 * `key_regressive_imagery` added to provide R users with access to Colin 
   Martindale's (1975, 1990) English Regressive Imagery Dictionary (RID).  The 

diff --git a/NEWS.md b/NEWS.md
@@ -27,19 +27,26 @@ lexicon 1.0.1 -
 
 * `hash_lemmas` had Spaces before 2 tokens (" furtherst", " skilled") meaning.  
   This extra white space has been stripped.
+
+* The `hash_sentiment_senticnett` dictionary contained "sparsely" which is also
+  contained in `hash_valence_shifters`.  This term has been dropped from the 
+  `hash_sentiment_senticnett` dictionary.  See # 12 for more info.
 
 **NEW FEATURES**
 
 * `profanity_zac_anger` added to provide a longer list of profane words.
 
 * `profanity_racist` added to provide a profane list that is specific for 
-  dtecting racist terms.
+  detecting racist terms.
 
 * `key_regressive_imagery` added to provide R users with access to Colin 
   Martindale's (1975, 1990) English Regressive Imagery Dictionary (RID).  The 
   Regressive Imagery Dictionary (RID) is a text analysis coding taxonomy that 
   can be used to measure the degree to which a text is *primordial* vs. 
   *conceptual*.
+
+* `key_corporate_social_responsibility` added to provide R users with access to 
+  Pencle & Mălăescu's Corporate Social Responsibility (CSR) Dictionary. 
 
 **MINOR FEATURES**
 

diff --git a/R/hash_sentiment_senticnet.R b/R/hash_sentiment_senticnet.R
@@ -24,7 +24,7 @@
 #' @keywords datasets
 #' @name hash_sentiment_senticnet
 #' @usage data(hash_sentiment_senticnet)
-#' @format A data frame with 23,627 rows and 2 variables
+#' @format A data frame with 23,626 rows and 2 variables
 #' @references Cambria, E., Poria, S., Bajpai, R. and Schuller, B. SenticNet 4:
 #' A semantic resource for sentiment analysis based on conceptual primitives.
 #' In: COLING, pp. 2666-2677, Osaka (2016)

diff --git a/README.md b/README.md
@@ -69,10 +69,10 @@ word lists. The data prefixes help to categorize the data types:
 Data
 ====
 
-<table style="width:97%;">
+<table style="width:99%;">
 <colgroup>
-<col width="48%" />
-<col width="48%" />
+<col width="52%" />
+<col width="45%" />
 </colgroup>
 <thead>
 <tr class="header">
@@ -182,110 +182,114 @@ Data
 <td><p>Contraction Conversions</p></td>
 </tr>
 <tr class="even">
+<td><p>key_corporate_social_responsibility</p></td>
+<td><p>Nadra Pencle and Irina Malaescu's Corporate Social Responsibility Dictionary</p></td>
+</tr>
+<tr class="odd">
 <td><p>key_grade</p></td>
 <td><p>Grades Data Set</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>key_rating</p></td>
 <td><p>Ratings Data Set</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>key_regressive_imagery</p></td>
 <td><p>Colin Martindale's English Regressive Imagery Dictionary</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>key_sentiment_jockers</p></td>
 <td><p>Jockers Sentiment Data Set</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>modal_loughran_mcdonald</p></td>
 <td><p>Loughran-McDonald Modal List</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>nrc_emotions</p></td>
 <td><p>NRC Emotions</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>pos_action_verb</p></td>
 <td><p>Action Word List</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>pos_df_irregular_nouns</p></td>
 <td><p>Irregular Nouns Word Dataframe</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>pos_df_pronouns</p></td>
 <td><p>Pronouns</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>pos_interjections</p></td>
 <td><p>Interjections</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>pos_preposition</p></td>
 <td><p>Preposition Words</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>profanity_alvarez</p></td>
 <td><p>Alejandro U. Alvarez's List of Profane Words</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>profanity_arr_bad</p></td>
 <td><p>Stackoverflow user2592414's List of Profane Words</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>profanity_banned</p></td>
 <td><p>bannedwordlist.com's List of Profane Words</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>profanity_racist</p></td>
 <td><p>Titus Wormer's List of Racist Words</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>profanity_zac_anger</p></td>
 <td><p>Zac Anger's List of Profane Words</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>sw_dolch</p></td>
 <td><p>Leveled Dolch List of 220 Common Words</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>sw_fry_100</p></td>
 <td><p>Fry's 100 Most Commonly Used English Words</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>sw_fry_1000</p></td>
 <td><p>Fry's 1000 Most Commonly Used English Words</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>sw_fry_200</p></td>
 <td><p>Fry's 200 Most Commonly Used English Words</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>sw_fry_25</p></td>
 <td><p>Fry's 25 Most Commonly Used English Words</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>sw_jockers</p></td>
 <td><p>Matthew Jocker's Expanded Topic Modeling Stopword List</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>sw_loughran_mcdonald_long</p></td>
 <td><p>Loughran-McDonald Long Stopword List</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>sw_loughran_mcdonald_short</p></td>
 <td><p>Loughran-McDonald Short Stopword List</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>sw_lucene</p></td>
 <td><p>Lucene Stopword List</p></td>
 </tr>
-<tr class="odd">
+<tr class="even">
 <td><p>sw_mallet</p></td>
 <td><p>MALLET Stopword List</p></td>
 </tr>
-<tr class="even">
+<tr class="odd">
 <td><p>sw_python</p></td>
 <td><p>Python Stopword List</p></td>
 </tr>

diff --git a/data/hash_sentiment_senticnet.rda b/data/hash_sentiment_senticnet.rda
diff --git a/inst/CITATION b/inst/CITATION
@@ -5,11 +5,11 @@ citEntry(entry = "manual",
     title = "{lexicon}: Lexicon Data",
     author = "Tyler W. Rinker",
     address = "Buffalo, New York",
-    note = "version 1.1.2",
+    note = "version 1.1.3",
     year = "2018",
     url = "http://github.com/trinker/lexicon",
     textVersion  = paste("Rinker, T. W. (2018).",
         "lexicon: Lexicon Data",
-        "version 1.1.2.",
+        "version 1.1.3.",
         "http://github.com/trinker/lexicon")
 )
diff --git a/inst/dev_kit/test_valence_shifter.R b/inst/dev_kit/test_valence_shifter.R
@@ -0,0 +1,51 @@
+if (!require("pacman")) install.packages("pacman")
+pacman::p_load(lexicon)
+
+
+test_valence_shifter <- function(
+        valence_shifter_table = lexicon::hash_valence_shifters,
+        sentiment_tables = lexicon::available_data('hash_sentiment')[['Data']]
+    ){
+
+    valence_words <- valence_shifter_table[['x']]
+
+    overlaps <- lapply(sentiment_tables, function(x){
+
+        sent_hash <- eval(parse(text = paste0('lexicon::', x)))
+        intersect(sent_hash[['x']], valence_words)
+
+    })
+
+    names(overlaps) <- sentiment_tables
+
+    class(overlaps) <- 'test_valence_shifter'
+    overlaps
+
+}
+
+print.test_valence_shifter <- function(x, ...){
+
+    class(x) <- 'list'
+    bads <- x[lengths(x) > 0]
+
+    if (length(bads) == 0) {
+        textclean:::all_good()
+    }
+
+    intersecting <- Map(function(x, y){
+
+        paste0(x,':\n\n    - ', y, '\n')
+
+    }, names(bads), lapply(bads, function(x) paste(shQuote(x), collapse = ', ')))
+
+    cat(paste0(
+        "The following tables contained these words\n", 
+        "overlapping with the supplied valence shifter table:\n\n"
+    ))
+    cat(paste(unlist(intersecting), collapse = '\n\n\n'))
+}
+
+
+
+
+test_valence_shifter(lexicon::hash_valence_shifters)
diff --git a/inst/scraping_scripts/sentiment Lexicon Scripts/senticnet.R b/inst/scraping_scripts/sentiment Lexicon Scripts/senticnet.R
@@ -24,4 +24,7 @@ senticnet <- senticnet %>%
 
 hash_sentiment_senticnet <- sentimentr::update_polarity_table(senticnet)
 
+
+hash_sentiment_senticnet <- sentimentr::update_key(hash_sentiment_senticnet, drop = "sparsely")
+
 pax::new_data(hash_sentiment_senticnet, , stand.alone = TRUE)
diff --git a/man/hash_sentiment_senticnet.Rd b/man/hash_sentiment_senticnet.Rd
Original file line number	Diff line number	Diff line change
Expand Up		@@ -24,4 +24,7 @@ senticnet <- senticnet %>%

		hash_sentiment_senticnet <- sentimentr::update_polarity_table(senticnet)


		hash_sentiment_senticnet <- sentimentr::update_key(hash_sentiment_senticnet, drop = "sparsely")

		pax::new_data(hash_sentiment_senticnet, , stand.alone = TRUE)