Skip to content

Commit

Permalink
scrape.js: Use GraphQL API
Browse files Browse the repository at this point in the history
Use GitHub GraphQL API instead of REST API

Closes #111
  • Loading branch information
li-boxuan committed Oct 29, 2018
1 parent 2ea74bc commit 8c704e9
Show file tree
Hide file tree
Showing 4 changed files with 60 additions and 23 deletions.
9 changes: 9 additions & 0 deletions lib/queries/github_search_org.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
query($query: String!) {
search(type:USER, query:$query, first:1) {
nodes {
...on Organization {
login
}
}
}
}
6 changes: 6 additions & 0 deletions lib/queries/github_user_info.graphql
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
query ($user: String!) {
user(login: $user) {
login
updatedAt
}
}
2 changes: 2 additions & 0 deletions lib/queries/index.js
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
const { loadQuery } = require('../utils')

module.exports.GITHUB_REPO_INFO_QUERY = loadQuery('github_repo_info')
module.exports.GITHUB_SEARCH_ORG_QUERY = loadQuery('github_search_org')
module.exports.GITHUB_USER_INFO_QUERY = loadQuery('github_user_info')
66 changes: 43 additions & 23 deletions lib/scrape.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,19 @@ const validUsername = require('valid-github-username')
const wdk = require('wikidata-sdk')
const cheerio = require('cheerio')

const { GITHUB_REPO_INFO_QUERY } = require('./queries')
const {
GITHUB_REPO_INFO_QUERY,
GITHUB_SEARCH_ORG_QUERY,
GITHUB_USER_INFO_QUERY,
} = require('./queries')
const { getLatestCommitMessage } = require('./utils')

const GH_BASE = 'https://github.com'
const GH_USER_BASE = `${GH_BASE}/users`
const GH_ORG_BASE = `${GH_BASE}/orgs`
const GH_API_BASE = 'https://api.github.com'
const GH_GQL_BASE = 'https://api.github.com/graphql'
const GCI_API_BASE = 'https://codein.withgoogle.com/api'

const MIN_SEARCH_SCORE = 10

// The time to cache GitHub usernames for in milliseconds
const GITHUB_CACHE_TIME = 2 * 24 * 60 * 60 * 1000

Expand Down Expand Up @@ -53,12 +54,6 @@ const CHAT_IMAGES = {
OTHER: 'images/chat.png',
}

const GH_API_OPTIONS = {
headers: process.env.GITHUB_TOKEN
? { Authorization: `token ${process.env.GITHUB_TOKEN}` }
: {},
}

const GH_GQL_OPTIONS = {
url: GH_GQL_BASE,
headers: process.env.GITHUB_TOKEN
Expand Down Expand Up @@ -137,6 +132,7 @@ async function fetchRepositoryInfo(org) {
;({ data, errors } = await client.query(GITHUB_REPO_INFO_QUERY, { org }))
} catch (error) {
console.warn(`GitHub query for org ${org} fails, error: ${error}`)
return []
}

if (data && data.organization) {
Expand Down Expand Up @@ -209,12 +205,24 @@ async function checkGitHubUserExists(user) {
}

async function searchGitHubOrgs(query) {
const res = await fetch(
`${GH_API_BASE}/search/users?q=${query}%20type:org`,
GH_API_OPTIONS
)
const { items } = await res.json()
return items || []
let data, errors
try {
;({ data, errors } = await client.query(GITHUB_SEARCH_ORG_QUERY, { query }))
} catch (error) {
console.warn(`GitHub query ${query} fails, error: ${error}`)
return []
}

if (data && data.search && data.search.nodes) {
return data.search.nodes
} else {
const errorMessage =
errors && errors.length ? errors[0].message : 'unknown error'
console.warn(
`Cannot query ${query} from GitHub, error message: ${errorMessage}`
)
return []
}
}

async function getGitHubUserHistory(user, from, to) {
Expand Down Expand Up @@ -262,12 +270,24 @@ function findMatches(input, pattern) {
}

async function getGitHubUser(user) {
const res = await fetch(`${GH_API_BASE}/users/${user}`, GH_API_OPTIONS)
let response = await res.json()
if (response && response.message) {
response = undefined
let data, errors
try {
;({ data, errors } = await client.query(GITHUB_USER_INFO_QUERY, { user }))
} catch (error) {
console.warn(`GitHub query for user ${user} fails, error: ${error}`)
return undefined
}

if (data && data.user) {
return data.user
} else {
const errorMessage =
errors && errors.length ? errors[0].message : 'unknown error'
console.warn(
`Cannot fetch user ${user} from GitHub, error message: ${errorMessage}`
)
return undefined
}
return response
}

async function findOrganization({
Expand Down Expand Up @@ -307,10 +327,10 @@ async function findOrganization({
)

const removePattern = /the|project|\([a-zA-Z]+\)/gi
const searchQuery = name.replace(removePattern, '').trim()
const searchQuery = name.replace(removePattern, '').trim() + ' type:org'
const searchResults = await searchGitHubOrgs(searchQuery)

if (searchResults.length > 0 && searchResults[0].score > MIN_SEARCH_SCORE) {
if (searchResults.length > 0) {
return searchResults[0].login
}

Expand Down

0 comments on commit 8c704e9

Please sign in to comment.