Skip to content

Commit

Permalink
run from web ui
Browse files Browse the repository at this point in the history
  • Loading branch information
lizongying committed Nov 13, 2023
1 parent d690f89 commit 0c4a8ca
Show file tree
Hide file tree
Showing 22 changed files with 340 additions and 24 deletions.
13 changes: 10 additions & 3 deletions pkg/api/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@ import (
const UrlSpider = "/spider"

type Req struct {
Id string `json:"id"`
Name string `json:"name"`
}
type Spider struct {
Name string `json:"name"`
Name string `json:"name,omitempty"`
Funcs []string `json:"funcs,omitempty"`
}
type RouteSpider struct {
Request
Expand All @@ -32,15 +34,20 @@ func (h *RouteSpider) ServeHTTP(w http.ResponseWriter, r *http.Request) {
if req.Name == "" {
for _, v := range h.crawler.GetSpiders() {
if v.Name() == req.Name {
var funcs []string
for k1, _ := range v.CallBacks() {
funcs = append(funcs, k1)
}
spider = Spider{
Name: v.Name(),
Name: v.Name(),
Funcs: funcs,
}
break
}
}
}

h.OutJson(w, 0, "", spider)
h.OutJson(w, 0, "", &spider)
}

func (h *RouteSpider) FromCrawler(crawler pkg.Crawler) pkg.Route {
Expand Down
2 changes: 2 additions & 0 deletions pkg/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@ type ContextCrawler interface {
}

type ContextSpider interface {
GetSpider() Spider
WithSpider(Spider) ContextSpider
GetId() uint64
WithId(uint64) ContextSpider
GetName() string
Expand Down
8 changes: 8 additions & 0 deletions pkg/context/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import (
)

type Spider struct {
Spider pkg.Spider `json:"-"`
Context context.Context `json:"-"`
Id uint64 `json:"id,omitempty"`
Name string `json:"name,omitempty"`
Expand All @@ -17,6 +18,13 @@ type Spider struct {
UpdateTime utils.Timestamp `json:"update_time,omitempty"`
}

func (c *Spider) GetSpider() pkg.Spider {
return c.Spider
}
func (c *Spider) WithSpider(spider pkg.Spider) pkg.ContextSpider {
c.Spider = spider
return c
}
func (c *Spider) GetId() uint64 {
return c.Id
}
Expand Down
1 change: 1 addition & 0 deletions pkg/request.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ type Request interface {

type CallBack func(Context, Response) error
type ErrBack func(Context, Response, error)
type StartFunc func(Context, string) error

type RequestStatus uint8

Expand Down
4 changes: 2 additions & 2 deletions pkg/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,10 +25,10 @@ type Spider interface {
SetSpider(spider Spider) Spider
CallBacks() map[string]CallBack
CallBack(name string) (callback CallBack)
SetCallBacks(map[string]CallBack) Spider
ErrBacks() map[string]ErrBack
ErrBack(name string) (errBack ErrBack)
SetErrBacks(map[string]ErrBack) Spider
StartFuncs() map[string]StartFunc
StartFunc(name string) (startFunc StartFunc)
GetAllowedDomains() []string
ReplaceAllowedDomains([]string) error
SetAllowedDomain(string)
Expand Down
31 changes: 20 additions & 11 deletions pkg/spider/base_spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ type BaseSpider struct {
browsers map[pkg.Browser]struct{}
callBacks map[string]pkg.CallBack
errBacks map[string]pkg.ErrBack
startFuncs map[string]pkg.StartFunc
defaultAllowedDomains map[string]struct{}
allowedDomains map[string]struct{}
retryMaxTimes uint8
Expand Down Expand Up @@ -197,7 +198,7 @@ func (s *BaseSpider) GetSpider() pkg.Spider {
}
func (s *BaseSpider) SetSpider(spider pkg.Spider) pkg.Spider {
s.spider = spider
s.registerParser()
s.registerFuncs()
return s
}
func (s *BaseSpider) CallBacks() map[string]pkg.CallBack {
Expand All @@ -212,10 +213,6 @@ func (s *BaseSpider) CallBack(name string) (callback pkg.CallBack) {
}
return
}
func (s *BaseSpider) SetCallBacks(callBacks map[string]pkg.CallBack) pkg.Spider {
s.callBacks = callBacks
return s
}
func (s *BaseSpider) ErrBacks() map[string]pkg.ErrBack {
return s.errBacks
}
Expand All @@ -228,9 +225,14 @@ func (s *BaseSpider) ErrBack(name string) (errBack pkg.ErrBack) {
}
return
}
func (s *BaseSpider) SetErrBacks(errBacks map[string]pkg.ErrBack) pkg.Spider {
s.errBacks = errBacks
return s
func (s *BaseSpider) StartFuncs() map[string]pkg.StartFunc {
return s.startFuncs
}
func (s *BaseSpider) StartFunc(name string) (startFunc pkg.StartFunc) {
if name != "" {
startFunc = s.startFuncs[name]
}
return
}
func (s *BaseSpider) GetCrawler() pkg.Crawler {
return s.Crawler
Expand All @@ -252,9 +254,10 @@ func (s *BaseSpider) WithOptions(options ...pkg.SpiderOption) pkg.Spider {
s.options = options
return s
}
func (s *BaseSpider) registerParser() {
func (s *BaseSpider) registerFuncs() {
callBacks := make(map[string]pkg.CallBack)
errBacks := make(map[string]pkg.ErrBack)
startFuncs := make(map[string]pkg.StartFunc)
rv := reflect.ValueOf(s.spider)
rt := rv.Type()
l := rt.NumMethod()
Expand All @@ -268,9 +271,14 @@ func (s *BaseSpider) registerParser() {
if ok {
errBacks[name] = errBack
}
startFunc, ok := rv.Method(i).Interface().(func(pkg.Context, string) error)
if ok {
startFuncs[name] = startFunc
}
}
s.SetCallBacks(callBacks)
s.SetErrBacks(errBacks)
s.callBacks = callBacks
s.errBacks = errBacks
s.startFuncs = startFuncs
}

func (s *BaseSpider) Request(ctx pkg.Context, request pkg.Request) (response pkg.Response, err error) {
Expand Down Expand Up @@ -575,6 +583,7 @@ func (s *BaseSpider) FromCrawler(crawler pkg.Crawler) pkg.Spider {
s.WithContext(new(crawlerContext.Context).
WithCrawler(crawler.GetContext().GetCrawler()).
WithSpider(new(crawlerContext.Spider).
WithSpider(s.spider).
WithId(s.Crawler.GenUid()).
WithName(s.spider.Name()).
WithStatus(pkg.SpiderStatusReady)))
Expand Down
9 changes: 9 additions & 0 deletions pkg/statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ type StatisticsNode interface {
DecJob()
IncTask()
DecTask()
IncRequest()
DecRequest()
IncRecord()
DecRecord()
Marshal() (bytes []byte, err error)
Expand All @@ -20,11 +22,14 @@ type StatisticsSpider interface {
WithId(id uint64) StatisticsSpider
GetSpider() string
WithSpider(spider string) StatisticsSpider
WithFuncs(funcs []string) StatisticsSpider
WithNode(node string) StatisticsSpider
IncJob()
DecJob()
IncTask()
DecTask()
IncRequest()
DecRequest()
IncRecord()
DecRecord()
GetLastTaskId() string
Expand All @@ -43,6 +48,8 @@ type StatisticsJob interface {
WithSpider(spider string) StatisticsJob
IncTask()
DecTask()
IncRequest()
DecRequest()
IncRecord()
DecRecord()
WithEnable(enable bool) StatisticsJob
Expand All @@ -52,6 +59,8 @@ type StatisticsTask interface {
WithStatus(status TaskStatus) StatisticsTask
GetId() string
WithId(id string) StatisticsTask
IncRequest()
DecRequest()
IncRecord()
DecRecord()
GetNode() string
Expand Down
7 changes: 7 additions & 0 deletions pkg/statistics/job/job.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ type Job struct {
Node string `json:"node,omitempty"`
Spider string `json:"spider,omitempty"`
Task uint32 `json:"task,omitempty"`
Request uint32 `json:"request,omitempty"`
Record uint32 `json:"record,omitempty"`
Enable bool `json:"enable,omitempty"`
StartTime utils.Timestamp `json:"start_time,omitempty"`
Expand Down Expand Up @@ -73,6 +74,12 @@ func (s *Job) IncTask() {
func (s *Job) DecTask() {
atomic.AddUint32(&s.Task, ^uint32(0))
}
func (s *Job) IncRequest() {
atomic.AddUint32(&s.Request, 1)
}
func (s *Job) DecRequest() {
atomic.AddUint32(&s.Request, ^uint32(0))
}
func (s *Job) IncRecord() {
atomic.AddUint32(&s.Record, 1)
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/statistics/node/node.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ type Node struct {
Spider uint32 `json:"spider,omitempty"`
Job uint32 `json:"job,omitempty"`
Task uint32 `json:"task,omitempty"`
Request uint32 `json:"request,omitempty"`
Record uint32 `json:"record,omitempty"`
StartTime utils.Timestamp `json:"start_time"`
FinishTime utils.Timestamp `json:"finish_time"`
Expand Down Expand Up @@ -71,6 +72,12 @@ func (n *Node) IncTask() {
func (n *Node) DecTask() {
atomic.AddUint32(&n.Task, ^uint32(0))
}
func (n *Node) IncRequest() {
atomic.AddUint32(&n.Request, 1)
}
func (n *Node) DecRequest() {
atomic.AddUint32(&n.Request, ^uint32(0))
}
func (n *Node) IncRecord() {
atomic.AddUint32(&n.Record, 1)
}
Expand Down
15 changes: 15 additions & 0 deletions pkg/statistics/spider/spider.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ type Spider struct {
Id *utils.Uint64 `json:"id,omitempty"`
Node string `json:"node,omitempty"`
Spider string `json:"spider,omitempty"`
Funcs []string `json:"funcs,omitempty"`
Job uint32 `json:"job,omitempty"`
Task uint32 `json:"task,omitempty"`
Request uint32 `json:"request,omitempty"`
Record uint32 `json:"record,omitempty"`
StartTime utils.Timestamp `json:"start_time,omitempty"`
FinishTime utils.Timestamp `json:"finish_time,omitempty"`
Expand Down Expand Up @@ -57,6 +59,13 @@ func (s *Spider) WithSpider(spider string) pkg.StatisticsSpider {
s.Spider = spider
return s
}
func (s *Spider) GetFuncs() []string {
return s.Funcs
}
func (s *Spider) WithFuncs(funcs []string) pkg.StatisticsSpider {
s.Funcs = funcs
return s
}
func (s *Spider) GetNode() string {
return s.Node
}
Expand All @@ -76,6 +85,12 @@ func (s *Spider) IncTask() {
func (s *Spider) DecTask() {
atomic.AddUint32(&s.Task, ^uint32(0))
}
func (s *Spider) IncRequest() {
atomic.AddUint32(&s.Request, 1)
}
func (s *Spider) DecRequest() {
atomic.AddUint32(&s.Request, ^uint32(0))
}
func (s *Spider) IncRecord() {
atomic.AddUint32(&s.Record, 1)
}
Expand Down
5 changes: 5 additions & 0 deletions pkg/statistics/statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,14 @@ func (s *Statistics) spiderChanged(ctx pkg.Context) {
spiderOne, ok := s.Spiders[ctx.GetSpiderName()]
if !ok {
s.Nodes[ctx.GetCrawlerId()].IncSpider()
var funcs []string
for k1, _ := range ctx.GetSpider().GetSpider().StartFuncs() {
funcs = append(funcs, k1)
}
spiderOne = new(statisticsSpider.Spider).
WithId(ctx.GetSpider().GetId()).
WithSpider(ctx.GetSpiderName()).
WithFuncs(funcs).
WithNode(ctx.GetCrawlerId())
s.Spiders[ctx.GetSpiderName()] = spiderOne
}
Expand Down
7 changes: 7 additions & 0 deletions pkg/statistics/task/task.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ type Task struct {
Spider string `json:"spider,omitempty"`
Job string `json:"job,omitempty"`
Node string `json:"node,omitempty"`
Request uint32 `json:"request,omitempty"`
Record uint32 `json:"record,omitempty"`
StartTime utils.Timestamp `json:"start_time"`
FinishTime utils.Timestamp `json:"finish_time"`
Expand Down Expand Up @@ -57,6 +58,12 @@ func (t *Task) WithJob(job string) pkg.StatisticsTask {
t.Job = job
return t
}
func (t *Task) IncRequest() {
atomic.AddUint32(&t.Request, 1)
}
func (t *Task) DecRequest() {
atomic.AddUint32(&t.Request, ^uint32(0))
}
func (t *Task) IncRecord() {
atomic.AddUint32(&t.Record, 1)
}
Expand Down
7 changes: 6 additions & 1 deletion web/ui/src/requests/api.js
Original file line number Diff line number Diff line change
Expand Up @@ -61,4 +61,9 @@ const getRecords = async data => {
return axios.post(host + '/records', data, config);
};

export {getUser, getNodes, getSpiders, getJobs, runJob, rerunJob, stopJob, getTasks, getRecords}
const getSpider = async data => {
const {host, config} = await api()
return axios.post(host + '/spider', data, config);
};

export {getUser, getNodes, getSpiders, getJobs, runJob, rerunJob, stopJob, getTasks, getRecords, getSpider}
1 change: 1 addition & 0 deletions web/ui/src/stores/jobs.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export const useJobsStore = defineStore('jobs', () => {
getJobs().then(resp => {
console.log(resp.data.data)
if (resp.data.data === null) {
jobs.splice(0, jobs.length)
return
}
jobs.splice(0, jobs.length, ...resp.data.data)
Expand Down
1 change: 1 addition & 0 deletions web/ui/src/stores/nodes.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ export const useNodesStore = defineStore('nodes', () => {
getNodes().then(resp => {
console.log(resp.data.data)
if (resp.data.data === null) {
nodes.splice(0, nodes.length)
return
}
nodes.splice(0, nodes.length, ...resp.data.data)
Expand Down
1 change: 1 addition & 0 deletions web/ui/src/stores/records.js
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ export const useRecordsStore = defineStore('records', () => {
getRecords().then(resp => {
console.log(resp.data.data)
if (resp.data.data === null) {
records.splice(0, records.length)
return
}
records.splice(0, records.length, ...resp.data.data)
Expand Down
28 changes: 28 additions & 0 deletions web/ui/src/stores/spider.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import {defineStore} from 'pinia'
import {reactive} from 'vue';
import {getSpider} from "@/requests/api";

export const SpiderStatusUnknown = 0
export const SpiderStatusReady = 1
export const SpiderStatusStarting = 2
export const SpiderStatusRunning = 3
export const SpiderStatusIdle = 4
export const SpiderStatusStopping = 5
export const SpiderStatusStopped = 6

export const useSpiderStore = defineStore('spider', () => {
const spider = reactive({})

const GetSpider = () => {
getSpider().then(resp => {
console.log(resp.data.data)
if (resp.data.data === null) {
return
}
spider.name = resp.data.data.name
spider.funcs = resp.data.data.funcs
})
}

return {spider, GetSpider}
})
Loading

0 comments on commit 0c4a8ca

Please sign in to comment.