generated from cloudwego/.github
-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add loader, parser examples (#10)
- Loading branch information
1 parent
ff3a7a6
commit 846f144
Showing
9 changed files
with
296 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,82 @@ | ||
/* | ||
* Copyright 2025 CloudWeGo Authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package main | ||
|
||
import ( | ||
"context" | ||
"io" | ||
|
||
"github.com/cloudwego/eino/components/document/parser" | ||
"github.com/cloudwego/eino/schema" | ||
) | ||
|
||
// options | ||
// 定制实现自主定义的 option 结构体 | ||
type options struct { | ||
Encoding string | ||
MaxSize int64 | ||
} | ||
|
||
// WithEncoding | ||
// 定制实现自主定义的 Option 方法 | ||
func WithEncoding(encoding string) parser.Option { | ||
return parser.WrapImplSpecificOptFn(func(o *options) { | ||
o.Encoding = encoding | ||
}) | ||
} | ||
|
||
func WithMaxSize(size int64) parser.Option { | ||
return parser.WrapImplSpecificOptFn(func(o *options) { | ||
o.MaxSize = size | ||
}) | ||
} | ||
|
||
type Config struct { | ||
DefaultEncoding string | ||
DefaultMaxSize int64 | ||
} | ||
|
||
type CustomParser struct { | ||
defaultEncoding string | ||
defaultMaxSize int64 | ||
} | ||
|
||
func NewCustomParser(config *Config) (*CustomParser, error) { | ||
return &CustomParser{ | ||
defaultEncoding: config.DefaultEncoding, | ||
defaultMaxSize: config.DefaultMaxSize, | ||
}, nil | ||
} | ||
|
||
func (p *CustomParser) Parse(ctx context.Context, reader io.Reader, opts ...parser.Option) ([]*schema.Document, error) { | ||
// 1. 处理通用选项 | ||
commonOpts := parser.GetCommonOptions(&parser.Options{}, opts...) | ||
_ = commonOpts | ||
|
||
// 2. 处理特定选项 | ||
myOpts := &options{ | ||
Encoding: p.defaultEncoding, | ||
MaxSize: p.defaultMaxSize, | ||
} | ||
myOpts = parser.GetImplSpecificOptions(myOpts, opts...) | ||
_ = myOpts | ||
// 3. 实现解析逻辑 | ||
|
||
return []*schema.Document{{ | ||
Content: "Hello World", | ||
}}, nil | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
/* | ||
* Copyright 2024 CloudWeGo Authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package main | ||
|
||
import ( | ||
"context" | ||
|
||
"github.com/cloudwego/eino-examples/internal/logs" | ||
) | ||
|
||
func main() { | ||
ctx := context.Background() | ||
|
||
customParser, err := NewCustomParser(&Config{ | ||
DefaultEncoding: "default", | ||
DefaultMaxSize: 1024, | ||
}) | ||
if err != nil { | ||
logs.Errorf("NewCustomParser failed, err=%v", err) | ||
return | ||
} | ||
|
||
docs, err := customParser.Parse(ctx, nil, | ||
WithMaxSize(2048), | ||
) | ||
if err != nil { | ||
logs.Errorf("customParser.Parse, err=%v", err) | ||
return | ||
} | ||
|
||
for idx, doc := range docs { | ||
logs.Infof("doc_%v content: %v", idx, doc.Content) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
/* | ||
* Copyright 2024 CloudWeGo Authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package main | ||
|
||
import ( | ||
"context" | ||
"os" | ||
|
||
"github.com/cloudwego/eino-ext/components/document/parser/html" | ||
"github.com/cloudwego/eino-ext/components/document/parser/pdf" | ||
"github.com/cloudwego/eino/components/document/parser" | ||
|
||
"github.com/cloudwego/eino-examples/internal/gptr" | ||
"github.com/cloudwego/eino-examples/internal/logs" | ||
) | ||
|
||
func main() { | ||
ctx := context.Background() | ||
|
||
textParser := parser.TextParser{} | ||
|
||
htmlParser, err := html.NewParser(ctx, &html.Config{ | ||
Selector: gptr.Of("body"), | ||
}) | ||
if err != nil { | ||
logs.Errorf("html.NewParser failed, err=%v", err) | ||
return | ||
} | ||
|
||
pdfParser, err := pdf.NewPDFParser(ctx, &pdf.Config{}) | ||
if err != nil { | ||
logs.Errorf("pdf.NewPDFParser failed, err=%v", err) | ||
return | ||
} | ||
|
||
// 创建扩展解析器 | ||
extParser, err := parser.NewExtParser(ctx, &parser.ExtParserConfig{ | ||
// 注册特定扩展名的解析器 | ||
Parsers: map[string]parser.Parser{ | ||
".html": htmlParser, | ||
".pdf": pdfParser, | ||
}, | ||
// 设置默认解析器,用于处理未知格式 | ||
FallbackParser: textParser, | ||
}) | ||
if err != nil { | ||
|
||
return | ||
} | ||
|
||
// 使用解析器 | ||
filePath := "./testdata/test.html" | ||
file, err := os.Open(filePath) | ||
if err != nil { | ||
logs.Errorf("os.Open failed, file=%v, err=%v", filePath, err) | ||
return | ||
} | ||
docs, err := extParser.Parse(ctx, file, | ||
// 必须提供 URI ExtParser 选择正确的解析器进行解析 | ||
parser.WithURI(filePath), | ||
parser.WithExtraMeta(map[string]any{ | ||
"source": "local", | ||
}), | ||
) | ||
if err != nil { | ||
logs.Errorf("extParser.Parse, err=%v", err) | ||
return | ||
} | ||
|
||
for idx, doc := range docs { | ||
logs.Infof("doc_%v content: %v", idx, doc.Content) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
<!DOCTYPE html> | ||
<!-- saved from url=(0064)http://10.37.87.106:1313/zh/docs/eino/overview/eino_open_source/ --> | ||
<html lang="zh" class="no-js"> | ||
<head> | ||
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> | ||
<title>大语言模型应用开发框架 —— Eino 正式开源! | CloudWeGo</title> | ||
</head> | ||
<body> | ||
Hello World For Eino | ||
</body> | ||
</html> |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/* | ||
* Copyright 2024 CloudWeGo Authors | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
package main | ||
|
||
import ( | ||
"context" | ||
"strings" | ||
|
||
"github.com/cloudwego/eino/components/document/parser" | ||
|
||
"github.com/cloudwego/eino-examples/internal/logs" | ||
) | ||
|
||
func main() { | ||
ctx := context.Background() | ||
|
||
textParser := parser.TextParser{} | ||
docs, err := textParser.Parse(ctx, strings.NewReader("hello world")) | ||
if err != nil { | ||
logs.Errorf("TextParser{}.Parse failed, err=%v", err) | ||
return | ||
} | ||
|
||
logs.Infof("text content: %v", docs[0].Content) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.