-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New flag added to skip the verification of the certificate
- Loading branch information
midir99
committed
Jul 16, 2022
1 parent
edbc73e
commit 2f2241d
Showing
4 changed files
with
40 additions
and
19 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,33 +1,47 @@ | ||
package ws | ||
|
||
import ( | ||
"crypto/tls" | ||
"log" | ||
"net/http" | ||
|
||
"github.com/PuerkitoBio/goquery" | ||
"github.com/midir99/rastreadora/mpp" | ||
) | ||
|
||
func RetrieveDocument(url string) (*goquery.Document, error) { | ||
res, err := http.Get(url) | ||
func MakeClient(skipCert bool) *http.Client { | ||
if skipCert { | ||
tr := &http.Transport{ | ||
TLSClientConfig: &tls.Config{InsecureSkipVerify: true}, | ||
} | ||
return &http.Client{Transport: tr} | ||
} else { | ||
return http.DefaultClient | ||
} | ||
} | ||
|
||
func RetrieveDocument(url string, client *http.Client) (*goquery.Document, error) { | ||
resp, err := client.Get(url) | ||
if err != nil { | ||
return nil, err | ||
} | ||
defer res.Body.Close() | ||
if res.StatusCode != http.StatusOK { | ||
defer resp.Body.Close() | ||
if resp.StatusCode != http.StatusOK { | ||
return nil, err | ||
} | ||
doc, err := goquery.NewDocumentFromReader(res.Body) | ||
doc, err := goquery.NewDocumentFromReader(resp.Body) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return doc, nil | ||
} | ||
|
||
func Scrape(pageUrl string, scraper func(*goquery.Document) []mpp.MissingPersonPoster, ch chan []mpp.MissingPersonPoster) { | ||
doc, err := RetrieveDocument(pageUrl) | ||
func Scrape(pageUrl string, client *http.Client, scraper func(*goquery.Document, *http.Client) []mpp.MissingPersonPoster, ch chan []mpp.MissingPersonPoster) { | ||
doc, err := RetrieveDocument(pageUrl, client) | ||
if err != nil { | ||
log.Printf("Error: %s\n", err) | ||
ch <- []mpp.MissingPersonPoster{} | ||
return | ||
} | ||
ch <- scraper(doc) | ||
ch <- scraper(doc, client) | ||
} |