chore: refactored and linted

jocmp · Sep 13, 2016 · 7e2a349 · 7e2a349
1 parent 9906bd3
commit 7e2a349
Show file tree

Hide file tree

Showing 193 changed files with 4,177 additions and 4,315 deletions.
diff --git a/.eslintignore b/.eslintignore
@@ -0,0 +1 @@
+**/fixtures/*
diff --git a/.eslintrc b/.eslintrc
@@ -0,0 +1,39 @@
+// Use this file as a starting point for your project's .eslintrc.
+// Copy this file, and add rule overrides as needed.
+{
+  "parser": "babel-eslint",
+  "extends": "airbnb",
+  "plugins": [
+    "babel"
+  ],
+  "globals": {
+    /* mocha */
+    "describe",
+    "it"
+  },
+  "rules": {
+    "no-param-reassign": 0,
+    /* TODO fix this; this should work w/import/resolver below, but doesn't */
+    "import/no-extraneous-dependencies": 0,
+    "import/no-unresolved": 0,
+    "no-control-regex": 0,
+    "import/prefer-default-export": 0,
+    "generator-star-spacing": 0,
+    "babel/generator-star-spacing": 0,
+    "func-names": 0,
+    "no-useless-escape": 0,
+    "no-confusing-arrow": 0,
+  },
+  "settings": {
+    "import/resolver": {
+      "babel-module": {
+        "extensions": [".js"]
+      }
+    }
+  },
+  "parserOptions":{
+    "ecmaFeatures": {
+      "experimentalObjectRestSpread": true
+    }
+  }
+}
diff --git a/package.json b/package.json
@@ -5,14 +5,17 @@
   "main": "index.js",
   "scripts": {
     "start": "node ./build",
-    "build": "rollup -c",
+    "lint": "eslint src/**",
+    "build": "eslint src/** && rollup -c",
     "test": "./test-runner"
   },
   "author": "",
   "license": "ISC",
   "devDependencies": {
+    "babel-eslint": "^6.1.2",
     "babel-plugin-external-helpers": "^6.8.0",
     "babel-plugin-module-alias": "^1.6.0",
+    "babel-plugin-module-resolver": "^2.2.0",
     "babel-plugin-transform-async-to-generator": "^6.8.0",
     "babel-plugin-transform-es2015-destructuring": "^6.9.0",
     "babel-plugin-transform-object-rest-spread": "^6.8.0",
@@ -21,6 +24,14 @@
     "babel-preset-es2015-rollup": "^1.2.0",
     "babel-register": "^6.11.6",
     "babelrc-rollup": "^3.0.0",
+    "eslint": "^3.5.0",
+    "eslint-config-airbnb": "^11.1.0",
+    "eslint-import-resolver-babel-module": "^2.0.1",
+    "eslint-plugin-async": "^0.1.1",
+    "eslint-plugin-babel": "^3.3.0",
+    "eslint-plugin-import": "^1.15.0",
+    "eslint-plugin-jsx-a11y": "^2.2.2",
+    "eslint-plugin-react": "^6.2.1",
     "mocha": "^3.0.2",
     "rollup": "^0.34.13",
     "rollup-plugin-babel": "^2.6.1",

diff --git a/score-move b/score-move
@@ -0,0 +1,21 @@
+#!/usr/local/bin/fish
+
+set file $argv[1]
+set function $argv[2]
+
+touch src/extractors/generic/next-page-url/scoring/utils/index.js
+touch src/extractors/generic/next-page-url/scoring/utils/$file.js
+touch src/extractors/generic/next-page-url/scoring/utils/$file.test.js
+
+echo "import assert from 'assert';" > src/extractors/generic/next-page-url/scoring/utils/$file.test.js
+echo "" >> src/extractors/generic/next-page-url/scoring/utils/$file.test.js
+echo "import $function from './$file';" >> src/extractors/generic/next-page-url/scoring/utils/$file.test.js
+echo "" >> src/extractors/generic/next-page-url/scoring/utils/$file.test.js
+echo "export { default as $function } from './$file'" >> src/extractors/generic/next-page-url/scoring/utils/index.js
+
+echo "Now make it a default export"
+echo "Move it to its file"
+echo "Move its tests to its test file"
+echo "import in score-links"
+echo "Test it."
+
diff --git a/src/cleaners/author.js b/src/cleaners/author.js
@@ -1,7 +1,7 @@
-import { CLEAN_AUTHOR_RE } from './constants'
+import { CLEAN_AUTHOR_RE } from './constants';
 
 // Take an author string (like 'By David Smith ') and clean it to
 // just the name(s): 'David Smith'.
 export default function cleanAuthor(author) {
-  return author.replace(CLEAN_AUTHOR_RE, '$2').trim()
+  return author.replace(CLEAN_AUTHOR_RE, '$2').trim();
 }
diff --git a/src/cleaners/author.test.js b/src/cleaners/author.test.js
@@ -1,21 +1,21 @@
-import assert from 'assert'
+import assert from 'assert';
 
-import cleanAuthor from './author'
+import cleanAuthor from './author';
 
 describe('cleanAuthor(author)', () => {
   it('removes the By from an author string', () => {
-    const author = cleanAuthor('By Bob Dylan')
+    const author = cleanAuthor('By Bob Dylan');
 
-    assert.equal(author, 'Bob Dylan')
-  })
+    assert.equal(author, 'Bob Dylan');
+  });
 
   it('trims trailing whitespace and line breaks', () => {
     const text = `
       written by
       Bob Dylan
-    `
-    const author = cleanAuthor(text)
+    `;
+    const author = cleanAuthor(text);
 
-    assert.equal(author, 'Bob Dylan')
-  })
-})
+    assert.equal(author, 'Bob Dylan');
+  });
+});
diff --git a/src/cleaners/constants.js b/src/cleaners/constants.js
@@ -1,9 +1,9 @@
 // CLEAN AUTHOR CONSTANTS
-export const CLEAN_AUTHOR_RE = /^\s*(posted |written )?by\s*:?\s*(.*)/i
+export const CLEAN_AUTHOR_RE = /^\s*(posted |written )?by\s*:?\s*(.*)/i;
     //     author = re.sub(r'^\s*(posted |written )?by\s*:?\s*(.*)(?i)',
 
 // CLEAN DEK CONSTANTS
-export const TEXT_LINK_RE = new RegExp('http(s)?://', 'i')
+export const TEXT_LINK_RE = new RegExp('http(s)?://', 'i');
 // An ordered list of meta tag names that denote likely article deks.
 // From most distinct to least distinct.
 //
@@ -14,7 +14,7 @@ export const TEXT_LINK_RE = new RegExp('http(s)?://', 'i')
 // However, these tags often have SEO-specific junk in them that's not
 // header-worthy like a dek is. Excerpt material at best.
 export const DEK_META_TAGS = [
-]
+];
 
 // An ordered list of Selectors to find likely article deks. From
 // most explicit to least explicit.
@@ -23,18 +23,36 @@ export const DEK_META_TAGS = [
 // detrimental to the aesthetics of an article.
 export const DEK_SELECTORS = [
   '.entry-summary',
-]
+];
 
 // CLEAN DATE PUBLISHED CONSTANTS
-export const CLEAN_DATE_STRING_RE = /^\s*published\s*:?\s*(.*)/i
-export const TIME_MERIDIAN_SPACE_RE = /(.*\d)(am|pm)(.*)/i
-export const TIME_MERIDIAN_DOTS_RE = /\.m\./i
-export const SPLIT_DATE_STRING = /(\d{1,2}:\d{2,2}(\s?[ap]\.?m\.?)?)|(\d{1,2}[\/-]\d{1,2}[\/-]\d{2,4})|(\d{1,4})|(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)/ig
+export const CLEAN_DATE_STRING_RE = /^\s*published\s*:?\s*(.*)/i;
+export const TIME_MERIDIAN_SPACE_RE = /(.*\d)(am|pm)(.*)/i;
+export const TIME_MERIDIAN_DOTS_RE = /\.m\./i;
+const months = [
+  'jan',
+  'feb',
+  'mar',
+  'apr',
+  'may',
+  'jun',
+  'jul',
+  'aug',
+  'sep',
+  'oct',
+  'nov',
+  'dec',
+];
+const allMonths = months.join('|');
+const timestamp1 = '[0-9]{1,2}:[0-9]{2,2}( ?[ap].?m.?)?';
+const timestamp2 = '[0-9]{1,2}[/-][0-9]{1,2}[/-][0-9]{2,4}';
+export const SPLIT_DATE_STRING =
+  new RegExp(`(${timestamp1})|(${timestamp2})|([0-9]{1,4})|(${allMonths})`, 'ig');
 
 // CLEAN TITLE CONSTANTS
 // A regular expression that will match separating characters on a
 // title, that usually denote breadcrumbs or something similar.
-export const TITLE_SPLITTERS_RE = /(: | - | \| )/g
+export const TITLE_SPLITTERS_RE = /(: | - | \| )/g;
 
 export const DOMAIN_ENDINGS_RE =
-  new RegExp('\.com$|\.net$|\.org$|\.co\.uk$', 'g')
+  new RegExp('\.com$|\.net$|\.org$|\.co\.uk$', 'g');
diff --git a/src/cleaners/content.js b/src/cleaners/content.js
@@ -8,54 +8,52 @@ import {
   rewriteTopLevel,
   stripJunkTags,
   makeLinksAbsolute,
-} from 'utils/dom'
-
-import { convertNodeTo } from 'utils/dom'
+} from 'utils/dom';
 
 // Clean our article content, returning a new, cleaned node.
 export default function extractCleanNode(
   article,
   {
     $,
-    cleanConditionally=true,
-    title='',
-    url='',
+    cleanConditionally = true,
+    title = '',
+    url = '',
   }
 ) {
   // Rewrite the tag name to div if it's a top level node like body or
   // html to avoid later complications with multiple body tags.
-  rewriteTopLevel(article, $)
+  rewriteTopLevel(article, $);
 
   // Drop small images and spacer images
-  cleanImages(article, $)
+  cleanImages(article, $);
 
   // Drop certain tags like <title>, etc
   // This is -mostly- for cleanliness, not security.
-  stripJunkTags(article, $)
+  stripJunkTags(article, $);
 
   // H1 tags are typically the article title, which should be extracted
   // by the title extractor instead. If there's less than 3 of them (<3),
   // strip them. Otherwise, turn 'em into H2s.
-  cleanHOnes(article, $)
+  cleanHOnes(article, $);
 
   // Clean headers
-  cleanHeaders(article, $, title)
+  cleanHeaders(article, $, title);
 
   // Make links absolute
-  makeLinksAbsolute(article, $, url)
+  makeLinksAbsolute(article, $, url);
 
   // Remove style or align attributes
-  cleanAttributes(article, $)
+  cleanAttributes(article);
 
   // We used to clean UL's and OL's here, but it was leading to
   // too many in-article lists being removed. Consider a better
   // way to detect menus particularly and remove them.
-  cleanTags(article, $, cleanConditionally)
+  cleanTags(article, $, cleanConditionally);
 
   // Remove empty paragraph nodes
-  removeEmpty(article, $)
+  removeEmpty(article, $);
 
-  return article
+  return article;
 }
     //     headers = doc.xpath('.//h2 | .//h3 | .//h4 | .//h5 | .//h6')
     //     for header in headers:

diff --git a/src/cleaners/content.test.js b/src/cleaners/content.test.js
@@ -1,32 +1,32 @@
-import assert from 'assert'
-import cheerio from 'cheerio'
-import fs from 'fs'
+import assert from 'assert';
+import cheerio from 'cheerio';
+import fs from 'fs';
 
-import extractCleanNode from './content'
-import extractBestNode from 'extractors/generic/content/extract-best-node'
+import extractBestNode from 'extractors/generic/content/extract-best-node';
+import extractCleanNode from './content';
 
 describe('extractCleanNode(article, { $, cleanConditionally, title } })', () => {
-  it("cleans cruft out of a DOM node", () => {
-    const html = fs.readFileSync('./fixtures/wired.html', 'utf-8')
-    let $ = cheerio.load(html)
+  it('cleans cruft out of a DOM node', () => {
+    const html = fs.readFileSync('./fixtures/wired.html', 'utf-8');
+    const $ = cheerio.load(html);
 
     const opts = {
-                    stripUnlikelyCandidates: true,
-                    weightNodes: true,
-                    cleanConditionally: true,
-                 }
+      stripUnlikelyCandidates: true,
+      weightNodes: true,
+      cleanConditionally: true,
+    };
 
-    const bestNode = extractBestNode($, opts)
-    let result = $.html(bestNode)
-    // console.log(result)
-    // console.log(result.length)
-    const cleanNode = extractCleanNode(bestNode, { $, opts })
-    result = $.html(cleanNode)
-    // console.log(result.length)
-    // console.log(result)
-    // console.log(bestNode.html())
+    const bestNode = extractBestNode($, opts);
+    // let result = $.html(bestNode);
+    // // console.log(result)
+    // // console.log(result.length)
+    const cleanNode = extractCleanNode(bestNode, { $, opts });
+    // result = $.html(cleanNode);
+    // // console.log(result.length)
+    // // console.log(result)
+    // // console.log(bestNode.html())
 
-    assert.equal($(bestNode).text().length, 2687)
-  })
-})
+    assert.equal($(cleanNode).text().length, 2687);
+  });
+});
 
diff --git a/src/cleaners/date-published.js b/src/cleaners/date-published.js
@@ -1,4 +1,4 @@
-import moment from 'moment'
+import moment from 'moment';
 // Is there a compelling reason to use moment here?
 // Mostly only being used for the isValid() method,
 // but could just check for 'Invalid Date' string.
@@ -7,27 +7,27 @@ import {
   CLEAN_DATE_STRING_RE,
   SPLIT_DATE_STRING,
   TIME_MERIDIAN_SPACE_RE,
-  TIME_MERIDIAN_DOTS_RE
-} from './constants'
+  TIME_MERIDIAN_DOTS_RE,
+} from './constants';
+
+export function cleanDateString(dateString) {
+  return (dateString.match(SPLIT_DATE_STRING) || [])
+                   .join(' ')
+                   .replace(TIME_MERIDIAN_DOTS_RE, 'm')
+                   .replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3')
+                   .replace(CLEAN_DATE_STRING_RE, '$1')
+                   .trim();
+}
 
 // Take a date published string, and hopefully return a date out of
 // it. Return none if we fail.
 export default function cleanDatePublished(dateString) {
-  let date = moment(new Date(dateString))
+  let date = moment(new Date(dateString));
 
   if (!date.isValid()) {
-    dateString = cleanDateString(dateString)
-    date = moment(new Date(dateString))
+    dateString = cleanDateString(dateString);
+    date = moment(new Date(dateString));
   }
 
-  return date.isValid() ? date.toISOString() : null
-}
-
-export function cleanDateString(dateString) {
-  return (dateString.match(SPLIT_DATE_STRING) || [])
-                   .join(' ')
-                   .replace(TIME_MERIDIAN_DOTS_RE, 'm')
-                   .replace(TIME_MERIDIAN_SPACE_RE, '$1 $2 $3')
-                   .replace(CLEAN_DATE_STRING_RE, '$1')
-                   .trim()
+  return date.isValid() ? date.toISOString() : null;
 }