Skip to content

Commit

Permalink
Refactor
Browse files Browse the repository at this point in the history
No longer accepts a string as the first argument.

Signed-off-by: Richie Bendall <[email protected]>
  • Loading branch information
Richienb committed Aug 31, 2020
1 parent bacc534 commit a4efa37
Show file tree
Hide file tree
Showing 11 changed files with 106 additions and 4,534 deletions.
9 changes: 7 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
dist/*
docs/*
# Build directories
dist/
docs/

# Lock files
package-lock.json
yarn.lock

# Created by https://www.gitignore.io/api/node,linux,macos,windows
# Edit at https://www.gitignore.io/?templates=node,linux,macos,windows
Expand Down
1 change: 1 addition & 0 deletions .npmrc
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
package-lock=false
46 changes: 16 additions & 30 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@
"version": "0.0.0",
"main": "dist/index.js",
"files": [
"src/**/*",
"dist/**/*"
"dist"
],
"engines": {
"node": ">=10"
Expand All @@ -22,46 +21,33 @@
"license": "MIT",
"scripts": {
"docs": "typedoc",
"build": "tsc && yarn docs",
"dev": "yarn build --watch",
"build": "tsc && typedoc",
"dev": "tsc --watch",
"lint": "xo",
"test": "yarn lint && ava"
"test": "xo && ava"
},
"dependencies": {
"@sindresorhus/is": "^2.0.0",
"cheerio": "^1.0.0-rc.3",
"content-type": "^1.0.4",
"iconv-lite": "^0.5.0",
"nice-try": "^2.0.0"
"iconv-lite": "^0.6.2",
"nice-try": "^2.0.1"
},
"devDependencies": {
"@types/cheerio": "^0.22.13",
"@richienb/tsconfig": "^0.1.1",
"@richienb/typedoc": "^0.1.1",
"@types/cheerio": "^0.22.21",
"@types/content-type": "^1.1.3",
"@types/nice-try": "^2.0.0",
"ava": "^3.3.0",
"eslint-config-richienb": "^0.3.0",
"ava": "^3.12.1",
"eslint-config-richienb": "^0.4.2",
"node-fetch": "^2.6.0",
"ts-node": "^8.5.4",
"typedoc": "^0.16.4",
"typescript": "^3.7.4",
"xo": "^0.26.0"
},
"resolutions": {
"eslint": "^6.8.0"
"ts-node": "^9.0.0",
"typedoc": "^0.19.0",
"typescript": "^4.0.2",
"xo": "^0.33.0"
},
"xo": {
"extends": "richienb/ts",
"overrides": [
{
"files": "test.js",
"rules": {
"import/default": 0,
"node/no-missing-import": 0,
"import/no-unresolved": 0,
"node/no-unsupported-features/node-builtins": 0
}
}
]
"extends": "richienb"
},
"ava": {
"extensions": [
Expand Down
19 changes: 19 additions & 0 deletions source/index.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import getCharset from "./utils/get-charset"
import {decode} from "iconv-lite"

/**
* Detect buffer encoding and convert to target encoding
* ref: http://www.w3.org/TR/2011/WD-html5-20110113/parsing.html#determining-the-character-encoding
*
* @param content The content to convert.
* @param headers HTTP Headers provided with a request.
*/
function convertBody(content: Buffer, headers?: Headers): string {
// Turn raw buffers into a single utf-8 buffer
return decode(
content,
getCharset(content, headers),
)
}

export = convertBody
42 changes: 42 additions & 0 deletions source/utils/get-charset.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
import {load} from "cheerio"
import parseContentType from "./parse-content-type"

/**
Get the charset of content.
@param content The content to convert.
@param headers HTTP Headers provided with the request.
*/
function getCharset(content: Buffer, headers?: Headers) {
// Resulting charset
let charset: string

// Try to extract content-type header
const contentType = headers?.get("content-type")
if (contentType) {
charset = parseContentType(contentType)
}

// No charset in content type, peek at response body for at most 1024 bytes
const data = content.slice(0, 1024).toString()

// HTML5, HTML4 and XML
if (!charset && data) {
const $ = load(data)

charset = parseContentType(
$("meta[charset]").attr("charset") || // HTML5
$("meta[http-equiv][content]").attr("content") || // HTML4
load(data.replace(/<\?(.*)\?>/im, "<$1>"), {xmlMode: true}).root().find("xml").attr("encoding"), // XML
)

// Prevent decode issues when sites use incorrect encoding
// ref: https://hsivonen.fi/encoding-menu/
if (charset && ["gb2312", "gbk"].includes(charset.toLowerCase())) {
charset = "gb18030"
}
}

return charset || "utf-8"
}

export = getCharset
12 changes: 12 additions & 0 deletions source/utils/parse-content-type.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
import {parse} from "content-type"
import niceTry from "nice-try"

/**
Get the character set from a Content-Type header.
@param contentType The Content-Type HTTP header.
*/
function parseContentType(contentType: string) {
return niceTry(() => parse(contentType))?.parameters?.charset ?? contentType
}

export = parseContentType
49 changes: 0 additions & 49 deletions src/index.ts

This file was deleted.

15 changes: 0 additions & 15 deletions src/utils/get-charset.ts

This file was deleted.

17 changes: 7 additions & 10 deletions tsconfig.json
Original file line number Diff line number Diff line change
@@ -1,12 +1,9 @@
{
"compilerOptions": {
"esModuleInterop": true,
"sourceMap": true,
"declaration": true,
"outDir": "dist/",
"target": "es5"
},
"include": [
"src/**/*"
]
"extends": "@richienb/tsconfig",
"compilerOptions": {
"outDir": "dist/"
},
"include": [
"source"
]
}
6 changes: 2 additions & 4 deletions typedoc.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
{
"out": "./docs",
"mode": "file",
"target": "ES6",
"ignoreCompilerErrors": true
"extends": "node_modules/@richienb/typedoc",
"out": "./docs"
}
Loading

0 comments on commit a4efa37

Please sign in to comment.