Skip to content

Commit

Permalink
Web UI added
Browse files Browse the repository at this point in the history
  • Loading branch information
gosom committed Oct 16, 2024
1 parent b1f564c commit 1cc78c8
Show file tree
Hide file tree
Showing 14 changed files with 196 additions and 160 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
APP_NAME := google_maps_scraper
VERSION := 1.3.6
VERSION := 1.4.0

default: help

Expand Down
18 changes: 16 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -39,17 +39,31 @@

## 🚀 Please [vote](https://github.com/gosom/google-maps-scraper/discussions/61) for the next features

A command line google maps scraper build using
A command line and web based google maps scraper build using

[scrapemate](https://github.com/gosom/scrapemate) web crawling framework.

You can use this repository either as is, or you can use it's code as a base and
customize it to your needs

**Update** Added email extraction from business website support


## Try it

### Web UI:

![Example GIF](img/example.gif)


```
mkdir -p gmapsdata && docker run -v $PWD/gmapsdata:/gmapsdata -p 8080:8080 gosom/google-maps-scraper -web -data-folder /gmapsdata
```

Or dowload the [binary](https://github.com/gosom/google-maps-scraper/releases) for your platform and run it with the `-web` command line argument.


### Command line:

```
touch results.csv && docker run -v $PWD/example-queries.txt:/example-queries -v $PWD/results.csv:/results.csv gosom/google-maps-scraper -depth 1 -input /example-queries -results /results.csv -exit-on-inactivity 3m
```
Expand Down
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,15 @@ require (
github.com/google/uuid v1.6.0
github.com/gosom/scrapemate v0.6.0
github.com/jackc/pgx/v5 v5.7.1
github.com/mattn/go-runewidth v0.0.16
github.com/mcnijman/go-emailaddress v1.1.1
github.com/playwright-community/playwright-go v0.4701.0
github.com/posthog/posthog-go v1.2.24
github.com/shirou/gopsutil/v4 v4.24.9
github.com/stretchr/testify v1.9.0
golang.org/x/exp v0.0.0-20240909161429-701f63a606c0
golang.org/x/sync v0.8.0
golang.org/x/term v0.24.0
modernc.org/sqlite v1.33.1
)

Expand Down Expand Up @@ -135,7 +137,6 @@ require (
github.com/matoous/godox v0.0.0-20230222163458-006bad1f9d26 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.9 // indirect
github.com/matttproud/golang_protobuf_extensions v1.0.1 // indirect
github.com/mgechev/revive v1.3.9 // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect
Expand All @@ -162,6 +163,7 @@ require (
github.com/quasilyte/regex/syntax v0.0.0-20210819130434-b3f0c404a727 // indirect
github.com/quasilyte/stdinfo v0.0.0-20220114132959-f7386bf02567 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/rs/zerolog v1.33.0 // indirect
github.com/ryancurrah/gomodguard v1.3.5 // indirect
github.com/ryanrolds/sqlclosecheck v0.5.1 // indirect
Expand Down
7 changes: 6 additions & 1 deletion go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -424,8 +424,9 @@ github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/
github.com/mattn/go-isatty v0.0.19/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY=
github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y=
github.com/mattn/go-runewidth v0.0.9 h1:Lm995f3rfxdpd6TSmuVCHVb/QhupuXlYr8sCI/QdE+0=
github.com/mattn/go-runewidth v0.0.9/go.mod h1:H031xJmbD/WCDINGzjvQ9THkh0rPKHF+m2gUSrubnMI=
github.com/mattn/go-runewidth v0.0.16 h1:E5ScNMtiwvlvB5paMFdw9p4kSQzbXFikJ5SQO6TULQc=
github.com/mattn/go-runewidth v0.0.16/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/matttproud/golang_protobuf_extensions v1.0.1 h1:4hp9jkHxhMHkqkrB3Ix0jegS5sx/RkqARlsWZ6pIwiU=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/mcnijman/go-emailaddress v1.1.1 h1:AGhgVDG3tCDaL0/Vc6erlPQjDuDN3dAT7rRdgFtetr0=
Expand Down Expand Up @@ -528,6 +529,8 @@ github.com/quasilyte/stdinfo v0.0.0-20220114132959-f7386bf02567 h1:M8mH9eK4OUR4l
github.com/quasilyte/stdinfo v0.0.0-20220114132959-f7386bf02567/go.mod h1:DWNGW8A4Y+GyBgPuaQJuWiy0XYftx4Xm/y5Jqk9I6VQ=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94icq4NjY3clb7Lk8O1qJ8BdBEF8z0ibU0rE=
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
github.com/rivo/uniseg v0.2.0 h1:S1pD9weZBuJdFmowNwbpi7BJ8TNftyUImj/0WQi72jY=
github.com/rivo/uniseg v0.2.0/go.mod h1:J6wj4VEh+S6ZtnVlnTBMWIodfgj8LQOQFoIToxlJtxc=
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
Expand Down Expand Up @@ -864,6 +867,8 @@ golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
golang.org/x/term v0.24.0 h1:Mh5cbb+Zk2hqqXNO7S1iTjEphVL+jb8ZWaqh/g+JWkM=
golang.org/x/term v0.24.0/go.mod h1:lOBK/LVxemqiMij05LGJ0tzNr8xlmwBRJ81PX6wVLH8=
golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
Expand Down
Binary file added img/example.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
8 changes: 7 additions & 1 deletion main.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@ import (

func main() {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()

runner.Banner()

sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
Expand All @@ -34,6 +35,7 @@ func main() {

runnerInstance, err := runnerFactory(cfg)
if err != nil {
cancel()
os.Stderr.WriteString(err.Error() + "\n")

runner.Telemetry().Close()
Expand All @@ -47,12 +49,16 @@ func main() {
_ = runnerInstance.Close(ctx)
runner.Telemetry().Close()

cancel()

os.Exit(1)
}

_ = runnerInstance.Close(ctx)
runner.Telemetry().Close()

cancel()

os.Exit(0)
}

Expand Down
79 changes: 79 additions & 0 deletions runner/runner.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,16 @@ import (
"context"
"errors"
"flag"
"fmt"
"os"
"runtime"
"strings"
"sync"
"time"

"github.com/mattn/go-runewidth"
"golang.org/x/term"

"github.com/gosom/google-maps-scraper/tlmt"
"github.com/gosom/google-maps-scraper/tlmt/gonoop"
"github.com/gosom/google-maps-scraper/tlmt/goposthog"
Expand Down Expand Up @@ -143,3 +148,77 @@ func Telemetry() tlmt.Telemetry {

return telemetry
}

func wrapText(text string, width int) []string {
var lines []string

currentLine := ""
currentWidth := 0

for _, r := range text {
runeWidth := runewidth.RuneWidth(r)
if currentWidth+runeWidth > width {
lines = append(lines, currentLine)
currentLine = string(r)
currentWidth = runeWidth
} else {
currentLine += string(r)
currentWidth += runeWidth
}
}

if currentLine != "" {
lines = append(lines, currentLine)
}

return lines
}

func banner(messages []string, width int) string {
if width <= 0 {
var err error

width, _, err = term.GetSize(0)
if err != nil {
width = 80
}
}

if width < 20 {
width = 20
}

contentWidth := width - 4

var wrappedLines []string
for _, message := range messages {
wrappedLines = append(wrappedLines, wrapText(message, contentWidth)...)
}

var builder strings.Builder

builder.WriteString("╔" + strings.Repeat("═", width-2) + "╗\n")

for _, line := range wrappedLines {
lineWidth := runewidth.StringWidth(line)
paddingRight := contentWidth - lineWidth

if paddingRight < 0 {
paddingRight = 0
}

builder.WriteString(fmt.Sprintf("║ %s%s ║\n", line, strings.Repeat(" ", paddingRight)))
}

builder.WriteString("╚" + strings.Repeat("═", width-2) + "╝\n")

return builder.String()
}

func Banner() {
message1 := "🌍 Google Maps Scraper"
message2 := "⭐ If you find this project useful, please star it on GitHub: https://github.com/gosom/google-maps-scraper"
message3 := "💖 Consider sponsoring to support development: https://github.com/sponsors/gosom"

fmt.Fprintln(os.Stderr, banner([]string{message1, message2, message3}, 0))
}
49 changes: 25 additions & 24 deletions runner/webrunner/webrunner.go
Original file line number Diff line number Diff line change
Expand Up @@ -62,20 +62,20 @@ func New(cfg *runner.Config) (runner.Runner, error) {
}

func (w *webrunner) Run(ctx context.Context) error {
errgroup, ctx := errgroup.WithContext(ctx)
egroup, ctx := errgroup.WithContext(ctx)

errgroup.Go(func() error {
egroup.Go(func() error {
return w.work(ctx)
})

errgroup.Go(func() error {
egroup.Go(func() error {
return w.srv.Start(ctx)
})

return errgroup.Wait()
return egroup.Wait()
}

func (w *webrunner) Close(ctx context.Context) error {
func (w *webrunner) Close(context.Context) error {
return nil
}

Expand All @@ -93,35 +93,34 @@ func (w *webrunner) work(ctx context.Context) error {
return err
}

for _, job := range jobs {
for i := range jobs {
select {
case <-ctx.Done():
return nil
default:
log.Printf("scraping job %s", job.ID)
if err := w.scrapeJob(ctx, job); err != nil {
log.Printf("error scraping job %s: %v", job.ID, err)
if err := w.scrapeJob(ctx, &jobs[i]); err != nil {
log.Printf("error scraping job %s: %v", jobs[i].ID, err)
} else {
log.Printf("job %s scraped successfully", job.ID)
log.Printf("job %s scraped successfully", jobs[i].ID)
}
}
}
}
}
}

func (w *webrunner) scrapeJob(ctx context.Context, job web.Job) error {
func (w *webrunner) scrapeJob(ctx context.Context, job *web.Job) error {
job.Status = web.StatusWorking

err := w.svc.Update(ctx, &job)
err := w.svc.Update(ctx, job)
if err != nil {
return err
}

if len(job.Data.Keywords) == 0 {
job.Status = web.StatusFailed

return w.svc.Update(ctx, &job)
return w.svc.Update(ctx, job)
}

outpath := filepath.Join(w.cfg.DataFolder, job.ID+".csv")
Expand All @@ -135,11 +134,11 @@ func (w *webrunner) scrapeJob(ctx context.Context, job web.Job) error {
_ = outfile.Close()
}()

mate, err := w.setupMate(ctx, job, outfile)
mate, err := w.setupMate(ctx, outfile)
if err != nil {
job.Status = web.StatusFailed

err2 := w.svc.Update(ctx, &job)
err2 := w.svc.Update(ctx, job)
if err2 != nil {
log.Printf("failed to update job status: %v", err2)
}
Expand All @@ -163,7 +162,7 @@ func (w *webrunner) scrapeJob(ctx context.Context, job web.Job) error {
job.Data.Zoom,
)
if err != nil {
err2 := w.svc.Update(ctx, &job)
err2 := w.svc.Update(ctx, job)
if err2 != nil {
log.Printf("failed to update job status: %v", err2)
}
Expand All @@ -172,12 +171,13 @@ func (w *webrunner) scrapeJob(ctx context.Context, job web.Job) error {
}

if len(seedJobs) > 0 {
var allowedSeconds int = max(180, len(seedJobs)*10*job.Data.Depth/50+120)
if job.Data.MaxSeconds > 0 {
if job.Data.MaxSeconds < 180 {
allowedSeconds = 180
allowedSeconds := max(60, len(seedJobs)*10*job.Data.Depth/50+120)

if job.Data.MaxTime > 0 {
if job.Data.MaxTime.Seconds() < 60 {
allowedSeconds = 60
} else {
allowedSeconds = job.Data.MaxSeconds
allowedSeconds = int(job.Data.MaxTime.Seconds())
}
}

Expand All @@ -190,7 +190,7 @@ func (w *webrunner) scrapeJob(ctx context.Context, job web.Job) error {
if err != nil && !errors.Is(err, context.DeadlineExceeded) {
cancel()

err2 := w.svc.Update(ctx, &job)
err2 := w.svc.Update(ctx, job)
if err2 != nil {
log.Printf("failed to update job status: %v", err2)
}
Expand All @@ -205,13 +205,14 @@ func (w *webrunner) scrapeJob(ctx context.Context, job web.Job) error {

job.Status = web.StatusOK

return w.svc.Update(ctx, &job)
return w.svc.Update(ctx, job)
}

func (w *webrunner) setupMate(_ context.Context, _ web.Job, writer io.Writer) (*scrapemateapp.ScrapemateApp, error) {
func (w *webrunner) setupMate(_ context.Context, writer io.Writer) (*scrapemateapp.ScrapemateApp, error) {
opts := []func(*scrapemateapp.Config) error{
scrapemateapp.WithConcurrency(w.cfg.Concurrency),
scrapemateapp.WithJS(scrapemateapp.DisableImages()),
scrapemateapp.WithExitOnInactivity(time.Second * 20),
}

csvWriter := csvwriter.NewCsvWriter(csv.NewWriter(writer))
Expand Down
Loading

0 comments on commit 1cc78c8

Please sign in to comment.