From 7d2ad223c9431e81834578ef610248912533f543 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Wed, 9 Oct 2024 17:21:21 +0100 Subject: [PATCH 01/28] feat(googledrive): add the tasks for google drive --- pkg/component/data/googledrive/v0/README.mdx | 178 +++++++++++++++ .../googledrive/v0/config/definition.json | 22 ++ .../data/googledrive/v0/config/setup.json | 45 ++++ .../data/googledrive/v0/config/tasks.json | 216 ++++++++++++++++++ pkg/component/data/googledrive/v0/main.go | 97 ++++++++ pkg/component/store/store.go | 2 + 6 files changed, 560 insertions(+) create mode 100644 pkg/component/data/googledrive/v0/README.mdx create mode 100644 pkg/component/data/googledrive/v0/config/definition.json create mode 100644 pkg/component/data/googledrive/v0/config/setup.json create mode 100644 pkg/component/data/googledrive/v0/config/tasks.json create mode 100644 pkg/component/data/googledrive/v0/main.go diff --git a/pkg/component/data/googledrive/v0/README.mdx b/pkg/component/data/googledrive/v0/README.mdx new file mode 100644 index 000000000..165aac213 --- /dev/null +++ b/pkg/component/data/googledrive/v0/README.mdx @@ -0,0 +1,178 @@ +--- +title: "Google Drive" +lang: "en-US" +draft: false +description: "Learn about how to set up a VDP Google Drive component https://github.com/instill-ai/instill-core" +--- + +The Google Drive component is a data component that allows users to google Drive is a file storage and synchronization service developed by Google. It allows users to store files in the cloud, synchronize files across devices, and share files.. +It can carry out the following tasks: +- [Read File](#read-file) +- [Read Files](#read-files) +- [Read Drive](#read-drive) + +## Release Stage + +`Alpha` + +## Configuration + +The component definition and tasks are defined in the [definition.json](https://github.com/instill-ai/pipeline-backend/blob/main/pkg/component/data/googledrive/v0/config/definition.json) and [tasks.json](https://github.com/instill-ai/pipeline-backend/blob/main/pkg/component/data/googledrive/v0/config/tasks.json) files respectively. + +## Setup + + +In order to communicate with Google, the following connection details need to be +provided. You may specify them directly in a pipeline recipe as key-value pairs +within the component's `setup` block, or you can create a **Connection** from +the [**Integration Settings**](https://www.instill.tech/docs/vdp/integration) +page and reference the whole `setup` as `setup: +${connection.}`. + +
+ +| Field | Field ID | Type | Note | +| :--- | :--- | :--- | :--- | +| Access Token | `access-token` | string | Access token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation. | +| Refresh Token | `refresh-token` | string | Refresh token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation. | + +
+ + + + +## Supported Tasks + +### Read File + +Read a file from Google Drive. It is recommended to use this task to read a single file when you watch a specific file in Google Drive. + +
+ +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_READ_FILE` | +| File ID (required) | `file-id` | string | ID of the file to read. | +
+ + + + + + +
+ +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| [File](#read-file-file) | `file` | object | File in Google Drive. | +
+ +
+ Output Objects in Read File + +

File

+ +
+ +| Field | Field ID | Type | Note | +| :--- | :--- | :--- | :--- | +| Content | `content` | string | Content of the file. | +| Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ` | +| MD5 checksum | `md5-checksum` | string | MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user. | +| MIME type | `mime-type` | string | MIME type of the file. | +| Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ` | +| Name | `name` | string | Name of the file. | +| Size | `size` | integer | Size of the file in bytes. | +| Version | `version` | string | Version of the file. | +
+
+ +### Read Files + +Read files from Google Drive. + +
+ +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_READ_FILES` | +| File Names (required) | `file-names` | array[string] | List of file names to read. | +| Read Content (required) | `read-content` | boolean | Read content of the files. | +
+ + + + + + +
+ +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| [Files](#read-files-files) | `files` | array[object] | List of files read from Google Drive. | +
+ +
+ Output Objects in Read Files + +

Files

+ +
+ +| Field | Field ID | Type | Note | +| :--- | :--- | :--- | :--- | +| Content | `content` | string | Content of the file. | +| Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ` | +| MD5 checksum | `md5-checksum` | string | MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user. | +| MIME type | `mime-type` | string | MIME type of the file. | +| Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ` | +| Name | `name` | string | Name of the file. | +| Size | `size` | integer | Size of the file in bytes. | +| Version | `version` | string | Version of the file. | +
+
+ +### Read Drive + +Read the metadata of files and folders in Google Drive. It is recommended to use this task to read the metadata of files and folders in Google Drive when you watch a change in Google Drive. + +
+ +| Input | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| Task ID (required) | `task` | string | `TASK_READ_DRIVE` | +| Order By | `order-by` | string | The keys of the properties to sort the results by, separated by commas. The default sort order is ascending. e.g. modifiedTime desc, name | +| Limit | `limit` | integer | The maximum number of files to return. Default is 10. | +
+ + + + + + +
+ +| Output | ID | Type | Description | +| :--- | :--- | :--- | :--- | +| [Files](#read-drive-files) | `files` | array[object] | List of files and folders in Google Drive. | +
+ +
+ Output Objects in Read Drive + +

Files

+ +
+ +| Field | Field ID | Type | Note | +| :--- | :--- | :--- | :--- | +| Content | `content` | string | Content of the file. | +| Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ` | +| MD5 checksum | `md5-checksum` | string | MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user. | +| MIME type | `mime-type` | string | MIME type of the file. | +| Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ` | +| Name | `name` | string | Name of the file. | +| Size | `size` | integer | Size of the file in bytes. | +| Version | `version` | string | Version of the file. | +
+
diff --git a/pkg/component/data/googledrive/v0/config/definition.json b/pkg/component/data/googledrive/v0/config/definition.json new file mode 100644 index 000000000..738b302a1 --- /dev/null +++ b/pkg/component/data/googledrive/v0/config/definition.json @@ -0,0 +1,22 @@ +{ + "availableTasks": [ + "TASK_READ_FILE", + "TASK_READ_FILES", + "TASK_READ_DRIVE" + ], + "custom": false, + "documentationUrl": "https://www.instill.tech/docs/component/data/googledrive", + "icon": "assets/google-drive.svg", + "id": "google-drive", + "public": true, + "title": "Google Drive", + "description": "Google Drive is a file storage and synchronization service developed by Google. It allows users to store files in the cloud, synchronize files across devices, and share files.", + "tombstone": false, + "type": "COMPONENT_TYPE_DATA", + "uid": "cd220d2d-3d19-468e-8b95-37dd6a57c15f", + "vendor": "Google", + "vendorAttributes": {}, + "version": "0.1.0", + "sourceUrl": "https://github.com/instill-ai/pipeline-backend/blob/main/pkg/component/data/googledrive/v0", + "releaseStage": "RELEASE_STAGE_ALPHA" +} diff --git a/pkg/component/data/googledrive/v0/config/setup.json b/pkg/component/data/googledrive/v0/config/setup.json new file mode 100644 index 000000000..069230769 --- /dev/null +++ b/pkg/component/data/googledrive/v0/config/setup.json @@ -0,0 +1,45 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "properties": { + "access-token": { + "description": "Access token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation.", + "instillUpstreamTypes": [ + "reference" + ], + "instillAcceptFormats": [ + "string" + ], + "instillSecret": true, + "instillUIOrder": 0, + "title": "Access Token", + "type": "string" + }, + "refresh-token": { + "description": "Refresh token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation.", + "instillUpstreamTypes": [ + "reference" + ], + "instillAcceptFormats": [ + "string" + ], + "instillSecret": true, + "instillUIOrder": 1, + "title": "Refresh Token", + "type": "string" + } + }, + "required": [], + "instillEditOnNodeFields": [ + "access-token", + "refresh-token" + ], + "instillOAuthConfig": { + "authUrl": "https://accounts.google.com/o/oauth2/auth", + "accessUrl": "https://oauth2.googleapis.com/token", + "scopes": [ + "https://www.googleapis.com/auth/drive.readonly" + ] + }, + "title": "Google Drive Connection", + "type": "object" +} diff --git a/pkg/component/data/googledrive/v0/config/tasks.json b/pkg/component/data/googledrive/v0/config/tasks.json new file mode 100644 index 000000000..00332a2e3 --- /dev/null +++ b/pkg/component/data/googledrive/v0/config/tasks.json @@ -0,0 +1,216 @@ +{ + "$defs": { + "file": { + "description": "File in Google Drive.", + "instillUIOrder": 0, + "properties": { + "name": { + "description": "Name of the file.", + "instillFormat": "string", + "instillUIOrder": 0, + "title": "Name", + "type": "string" + }, + "content": { + "description": "Content of the file.", + "instillFormat": "string", + "instillUIMultiline": true, + "instillUIOrder": 1, + "title": "Content", + "type": "string" + }, + "created-time": { + "description": "Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ`", + "instillFormat": "string", + "instillUIOrder": 2, + "title": "Created time", + "type": "string" + }, + "modified-time": { + "description": "Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ`", + "instillFormat": "string", + "instillUIOrder": 3, + "title": "Modified time", + "type": "string" + }, + "size": { + "description": "Size of the file in bytes.", + "instillFormat": "integer", + "instillUIOrder": 4, + "title": "Size", + "type": "integer" + }, + "mime-type": { + "description": "MIME type of the file.", + "instillFormat": "string", + "instillUIOrder": 5, + "title": "MIME type", + "type": "string" + }, + "md5-checksum": { + "description": "MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user.", + "instillFormat": "string", + "instillUIOrder": 6, + "title": "MD5 checksum", + "type": "string" + }, + "version": { + "description": "Version of the file.", + "instillFormat": "string", + "instillUIOrder": 7, + "title": "Version", + "type": "string" + } + }, + "required": [ + "name", + "created-time", + "modified-time", + "size", + "mime-type", + "version" + ], + "title": "File", + "type": "object" + } + }, + "TASK_READ_FILE": { + "instillShortDescription": "Read a file from Google Drive. It is recommended to use this task to read a single file when you watch a specific file in Google Drive.", + "input": { + "description": "Please provide the name of the file to read from Google Drive.", + "instillUIOrder": 0, + "properties": { + "file-id": { + "description": "ID of the file to read.", + "instillFormat": "string", + "instillUIOrder": 0, + "title": "File ID", + "type": "string" + } + }, + "required": [ + "file-id" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 1, + "properties": { + "file": { + "$ref": "#/$defs/file" + } + }, + "required": [ + "file" + ], + "title": "Output", + "type": "object" + } + }, + "TASK_READ_FILES": { + "instillShortDescription": "Read files from Google Drive.", + "input": { + "description": "Please provide the list of file names to read from Google Drive.", + "instillUIOrder": 0, + "properties": { + "file-names": { + "description": "List of file names to read.", + "instillAcceptFormats": [ + "array:string" + ], + "items": { + "title": "File Name", + "type": "string" + }, + "instillUIOrder": 0, + "title": "File Names", + "type": "array" + }, + "read-content": { + "description": "Read content of the files.", + "instillShortDescription": "Read content of the files.", + "instillAcceptFormats": [ + "boolean" + ], + "instillUIOrder": 1, + "title": "Read Content", + "type": "boolean" + } + }, + "required": [ + "file-names", + "read-content" + ], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 1, + "properties": { + "files": { + "description": "List of files read from Google Drive.", + "instillUIOrder": 0, + "instillFormat": "array", + "items": { + "$ref": "#/$defs/file" + }, + "title": "Files", + "type": "array" + } + }, + "required": [ + "files" + ], + "title": "Output", + "type": "object" + } + }, + "TASK_READ_DRIVE": { + "instillShortDescription": "Read the metadata of files and folders in Google Drive. It is recommended to use this task to read the metadata of files and folders in Google Drive when you watch a change in Google Drive.", + "input": { + "description": "Please input the query params to read the metadata of files and folders in Google Drive.", + "instillUIOrder": 0, + "properties": { + "order-by": { + "description": "The keys of the properties to sort the results by, separated by commas. The default sort order is ascending. e.g. modifiedTime desc, name", + "instillFormat": "string", + "instillUIOrder": 0, + "title": "Order By", + "type": "string" + }, + "limit": { + "default": 10, + "description": "The maximum number of files to return. Default is 10.", + "instillFormat": "integer", + "instillUIOrder": 1, + "title": "Limit", + "type": "integer" + } + }, + "required": [], + "title": "Input", + "type": "object" + }, + "output": { + "instillUIOrder": 1, + "properties": { + "files": { + "description": "List of files and folders in Google Drive.", + "instillUIOrder": 0, + "instillFormat": "array", + "items": { + "$ref": "#/$defs/file" + }, + "title": "Files", + "type": "array" + } + }, + "required": [ + "files" + ], + "title": "Output", + "type": "object" + } + } +} diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go new file mode 100644 index 000000000..0050fe2c4 --- /dev/null +++ b/pkg/component/data/googledrive/v0/main.go @@ -0,0 +1,97 @@ +//go:generate compogen readme ./config ./README.mdx +package googledrive + +import ( + "context" + "fmt" + "sync" + + _ "embed" + + "google.golang.org/protobuf/types/known/structpb" + + "github.com/instill-ai/pipeline-backend/pkg/component/base" + "github.com/instill-ai/x/errmsg" +) + +const ( +// taskReadFile = "TASK_READ_FILE" +// taskReadFiles = "TASK_READ_FILES" +// taskReadDrive = "TASK_READ_DRIVE" +) + +var ( + //go:embed config/definition.json + definitionJSON []byte + //go:embed config/setup.json + setupJSON []byte + //go:embed config/tasks.json + tasksJSON []byte + + once sync.Once + comp *component +) + +type component struct { + base.Component +} + +type execution struct { + base.ComponentExecution + execute func(context.Context, *structpb.Struct) (*structpb.Struct, error) +} + +// Init returns an implementation of IComponent that interacts with Slack. +func Init(bc base.Component) *component { + once.Do(func() { + comp = &component{Component: bc} + err := comp.LoadDefinition(definitionJSON, setupJSON, tasksJSON, nil) + if err != nil { + panic(err) + } + }) + + return comp +} + +// CreateExecution initializes a component executor that can be used in a +// pipeline trigger. +func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, error) { + + // e := &execution{ + // ComponentExecution: x, + // } + + switch x.Task { + + default: + return nil, errmsg.AddMessage( + fmt.Errorf("not supported task: %s", x.Task), + fmt.Sprintf("%s task is not supported.", x.Task), + ) + } +} + +func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { + for _, job := range jobs { + input, err := job.Input.Read(ctx) + if err != nil { + job.Error.Error(ctx, err) + continue + } + + output, err := e.execute(ctx, input) + if err != nil { + job.Error.Error(ctx, err) + continue + } + + err = job.Output.Write(ctx, output) + if err != nil { + job.Error.Error(ctx, err) + continue + } + } + + return nil +} diff --git a/pkg/component/store/store.go b/pkg/component/store/store.go index bbd1b9363..29b9f0095 100644 --- a/pkg/component/store/store.go +++ b/pkg/component/store/store.go @@ -36,6 +36,7 @@ import ( "github.com/instill-ai/pipeline-backend/pkg/component/data/chroma/v0" "github.com/instill-ai/pipeline-backend/pkg/component/data/elasticsearch/v0" "github.com/instill-ai/pipeline-backend/pkg/component/data/googlecloudstorage/v0" + "github.com/instill-ai/pipeline-backend/pkg/component/data/googledrive/v0" "github.com/instill-ai/pipeline-backend/pkg/component/data/instillartifact/v0" "github.com/instill-ai/pipeline-backend/pkg/component/data/milvus/v0" "github.com/instill-ai/pipeline-backend/pkg/component/data/mongodb/v0" @@ -208,6 +209,7 @@ func Init( compStore.Import(whatsapp.Init(baseComp)) compStore.Import(freshdesk.Init(baseComp)) compStore.Import(asana.Init(baseComp)) + compStore.Import(googledrive.Init(baseComp)) }) return compStore } From b8f07b8d5c0635f22a538bdccf5805325e246a9d Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Wed, 9 Oct 2024 17:39:58 +0100 Subject: [PATCH 02/28] chore: update doc --- pkg/component/data/googledrive/v0/README.mdx | 4 ++-- pkg/component/data/googledrive/v0/config/setup.json | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/component/data/googledrive/v0/README.mdx b/pkg/component/data/googledrive/v0/README.mdx index 165aac213..be4717797 100644 --- a/pkg/component/data/googledrive/v0/README.mdx +++ b/pkg/component/data/googledrive/v0/README.mdx @@ -33,8 +33,8 @@ ${connection.}`. | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | -| Access Token | `access-token` | string | Access token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation. | -| Refresh Token | `refresh-token` | string | Refresh token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation. | +| Access Token | `access-token` | string | Access token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation. | +| Refresh Token | `refresh-token` | string | Refresh token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation. | diff --git a/pkg/component/data/googledrive/v0/config/setup.json b/pkg/component/data/googledrive/v0/config/setup.json index 069230769..8e0f23568 100644 --- a/pkg/component/data/googledrive/v0/config/setup.json +++ b/pkg/component/data/googledrive/v0/config/setup.json @@ -2,7 +2,7 @@ "$schema": "http://json-schema.org/draft-07/schema#", "properties": { "access-token": { - "description": "Access token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation.", + "description": "Access token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation.", "instillUpstreamTypes": [ "reference" ], @@ -15,7 +15,7 @@ "type": "string" }, "refresh-token": { - "description": "Refresh token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation.", + "description": "Refresh token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation.", "instillUpstreamTypes": [ "reference" ], From d6aecaa9aeebb2202a05c17fea57817d967f9e07 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Thu, 10 Oct 2024 09:15:26 +0100 Subject: [PATCH 03/28] chore(googledrive): add icon --- .../googledrive/v0/assets/google-drive.svg | 171 ++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 pkg/component/data/googledrive/v0/assets/google-drive.svg diff --git a/pkg/component/data/googledrive/v0/assets/google-drive.svg b/pkg/component/data/googledrive/v0/assets/google-drive.svg new file mode 100644 index 000000000..773008293 --- /dev/null +++ b/pkg/component/data/googledrive/v0/assets/google-drive.svg @@ -0,0 +1,171 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + From 5176ad3388c5f6be56bbc7b89ca367ecac54e627 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Thu, 10 Oct 2024 16:28:42 +0100 Subject: [PATCH 04/28] feat(googledrive): design the structure about how to implement google drive details --- pkg/component/data/googledrive/v0/README.mdx | 2 +- .../data/googledrive/v0/config/tasks.json | 3 +- .../data/googledrive/v0/drive_service.go | 22 +++++ pkg/component/data/googledrive/v0/main.go | 84 +++++++++++++++++-- .../data/googledrive/v0/read_operation.go | 56 +++++++++++++ 5 files changed, 157 insertions(+), 10 deletions(-) create mode 100644 pkg/component/data/googledrive/v0/drive_service.go create mode 100644 pkg/component/data/googledrive/v0/read_operation.go diff --git a/pkg/component/data/googledrive/v0/README.mdx b/pkg/component/data/googledrive/v0/README.mdx index be4717797..f35bfd6ba 100644 --- a/pkg/component/data/googledrive/v0/README.mdx +++ b/pkg/component/data/googledrive/v0/README.mdx @@ -142,7 +142,7 @@ Read the metadata of files and folders in Google Drive. It is recommended to use | :--- | :--- | :--- | :--- | | Task ID (required) | `task` | string | `TASK_READ_DRIVE` | | Order By | `order-by` | string | The keys of the properties to sort the results by, separated by commas. The default sort order is ascending. e.g. modifiedTime desc, name | -| Limit | `limit` | integer | The maximum number of files to return. Default is 10. | +| Limit | `limit` | integer | The maximum number of files to return. Default is 10. Maximum is 100. | diff --git a/pkg/component/data/googledrive/v0/config/tasks.json b/pkg/component/data/googledrive/v0/config/tasks.json index 00332a2e3..dccbc581a 100644 --- a/pkg/component/data/googledrive/v0/config/tasks.json +++ b/pkg/component/data/googledrive/v0/config/tasks.json @@ -181,7 +181,8 @@ }, "limit": { "default": 10, - "description": "The maximum number of files to return. Default is 10.", + "maximum": 100, + "description": "The maximum number of files to return. Default is 10. Maximum is 100.", "instillFormat": "integer", "instillUIOrder": 1, "title": "Limit", diff --git a/pkg/component/data/googledrive/v0/drive_service.go b/pkg/component/data/googledrive/v0/drive_service.go new file mode 100644 index 000000000..aab6dd5a0 --- /dev/null +++ b/pkg/component/data/googledrive/v0/drive_service.go @@ -0,0 +1,22 @@ +package googledrive + +import "google.golang.org/api/drive/v3" + +type IDriveService interface { + readFile() + readFiles() + readDrive() +} + +type driveService struct { + service *drive.Service +} + +func (d *driveService) readFile() { +} + +func (d *driveService) readFiles() { +} + +func (d *driveService) readDrive() { +} diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go index 0050fe2c4..5081b427f 100644 --- a/pkg/component/data/googledrive/v0/main.go +++ b/pkg/component/data/googledrive/v0/main.go @@ -8,6 +8,10 @@ import ( _ "embed" + "golang.org/x/oauth2" + "golang.org/x/oauth2/google" + "google.golang.org/api/drive/v3" + "google.golang.org/api/option" "google.golang.org/protobuf/types/known/structpb" "github.com/instill-ai/pipeline-backend/pkg/component/base" @@ -15,9 +19,9 @@ import ( ) const ( -// taskReadFile = "TASK_READ_FILE" -// taskReadFiles = "TASK_READ_FILES" -// taskReadDrive = "TASK_READ_DRIVE" + taskReadFile = "TASK_READ_FILE" + taskReadFiles = "TASK_READ_FILES" + taskReadDrive = "TASK_READ_DRIVE" ) var ( @@ -39,9 +43,11 @@ type component struct { type execution struct { base.ComponentExecution execute func(context.Context, *structpb.Struct) (*structpb.Struct, error) + + service IDriveService } -// Init returns an implementation of IComponent that interacts with Slack. +// Init returns an implementation of IComponent that interacts with Google Drive. func Init(bc base.Component) *component { once.Do(func() { comp = &component{Component: bc} @@ -58,20 +64,82 @@ func Init(bc base.Component) *component { // pipeline trigger. func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, error) { - // e := &execution{ - // ComponentExecution: x, - // } + ctx := context.Background() - switch x.Task { + drive, err := getDriveService(ctx, x.Setup) + + if err != nil { + return nil, fmt.Errorf("failed to get drive service: %w", err) + } + + e := &execution{ + ComponentExecution: x, + service: &driveService{service: drive}, + } + switch x.Task { + case taskReadFile: + e.execute = e.readFile + case taskReadFiles: + e.execute = e.readFiles + case taskReadDrive: + e.execute = e.readDrive default: return nil, errmsg.AddMessage( fmt.Errorf("not supported task: %s", x.Task), fmt.Sprintf("%s task is not supported.", x.Task), ) } + return e, nil +} + +func getDriveService(ctx context.Context, setup *structpb.Struct) (*drive.Service, error) { + accessToken := setup.GetFields()["access-token"].GetStringValue() + refreshToken := setup.GetFields()["refresh-token"].GetStringValue() + + config := &oauth2.Config{ + ClientID: getClientID(), + ClientSecret: getClientSecret(), + Scopes: getScopes(setup), + Endpoint: google.Endpoint, + } + + tok := &oauth2.Token{ + AccessToken: accessToken, + RefreshToken: refreshToken, + } + + client := config.Client(ctx, tok) + + srv, err := drive.NewService(ctx, option.WithHTTPClient(client)) + + if err != nil { + return nil, err + } + + return srv, nil +} + +// TODO: Need to get from env variables +func getClientID() string { + return "" +} + +// TODO: Need to get from env variables +func getClientSecret() string { + return "" +} + +// TODO: Need to get the scopes from the token.json +// Temporarily, it will be same as the scopes in setup.json. +// So, we get it from setup.json first. Later, we will get it from token.json +// after we confirm how we retrieve the scopes from token.json. +func getScopes(setup *structpb.Struct) []string { + return []string{} } +// Execute reads the input from the job, executes the task, and writes the output +// to the job. func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { for _, job := range jobs { input, err := job.Input.Read(ctx) diff --git a/pkg/component/data/googledrive/v0/read_operation.go b/pkg/component/data/googledrive/v0/read_operation.go new file mode 100644 index 000000000..595e383ca --- /dev/null +++ b/pkg/component/data/googledrive/v0/read_operation.go @@ -0,0 +1,56 @@ +package googledrive + +import ( + "context" + + "google.golang.org/protobuf/types/known/structpb" +) + +type readFileInput struct { + FileID string `json:"file-id"` +} + +type readFileOutput struct { + File file `json:"file"` +} + +type file struct { + Name string `json:"name"` + Content string `json:"content"` + CreatedTime string `json:"created-time"` + ModifiedTime string `json:"modified-time"` + Size int64 `json:"size"` + MimeType string `json:"mime-type"` + Md5Checksum string `json:"md5-checksum,omitempty"` + Version string `json:"version"` +} + +func (e *execution) readFile(ctx context.Context, input *structpb.Struct) (*structpb.Struct, error) { + return nil, nil +} + +type readFilesInput struct { + FileNames []string `json:"file-names"` + ReadContent bool `json:"read-content"` +} + +type readFilesOutput struct { + Files []file `json:"files"` +} + +func (e *execution) readFiles(ctx context.Context, input *structpb.Struct) (*structpb.Struct, error) { + return nil, nil +} + +type readDriveInput struct { + OrderBy string `json:"order-by"` + Limit int `json:"limit"` +} + +type readDriveOutput struct { + Files []file `json:"files"` +} + +func (e *execution) readDrive(ctx context.Context, input *structpb.Struct) (*structpb.Struct, error) { + return nil, nil +} From a2fe5723c466e695ae0819bc3e2be8eb1a63d503 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Thu, 10 Oct 2024 18:11:57 +0100 Subject: [PATCH 05/28] feat(googledrive): add credential and token setting --- pkg/component/data/googledrive/v0/README.mdx | 1 - .../data/googledrive/v0/config/setup.json | 14 ----- pkg/component/data/googledrive/v0/main.go | 61 ++++++++++++------- pkg/component/store/store.go | 13 +++- 4 files changed, 50 insertions(+), 39 deletions(-) diff --git a/pkg/component/data/googledrive/v0/README.mdx b/pkg/component/data/googledrive/v0/README.mdx index f35bfd6ba..a17d29bc8 100644 --- a/pkg/component/data/googledrive/v0/README.mdx +++ b/pkg/component/data/googledrive/v0/README.mdx @@ -33,7 +33,6 @@ ${connection.}`. | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | -| Access Token | `access-token` | string | Access token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation. | | Refresh Token | `refresh-token` | string | Refresh token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation. | diff --git a/pkg/component/data/googledrive/v0/config/setup.json b/pkg/component/data/googledrive/v0/config/setup.json index 8e0f23568..112a9cab6 100644 --- a/pkg/component/data/googledrive/v0/config/setup.json +++ b/pkg/component/data/googledrive/v0/config/setup.json @@ -1,19 +1,6 @@ { "$schema": "http://json-schema.org/draft-07/schema#", "properties": { - "access-token": { - "description": "Access token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation.", - "instillUpstreamTypes": [ - "reference" - ], - "instillAcceptFormats": [ - "string" - ], - "instillSecret": true, - "instillUIOrder": 0, - "title": "Access Token", - "type": "string" - }, "refresh-token": { "description": "Refresh token for the Google Drive API. For more information about how to create tokens, please refer to the Google Drive API documentation and OAuth 2.0 documentation.", "instillUpstreamTypes": [ @@ -30,7 +17,6 @@ }, "required": [], "instillEditOnNodeFields": [ - "access-token", "refresh-token" ], "instillOAuthConfig": { diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go index 5081b427f..7c758d0dc 100644 --- a/pkg/component/data/googledrive/v0/main.go +++ b/pkg/component/data/googledrive/v0/main.go @@ -3,6 +3,8 @@ package googledrive import ( "context" + "encoding/base64" + "encoding/json" "fmt" "sync" @@ -19,9 +21,10 @@ import ( ) const ( - taskReadFile = "TASK_READ_FILE" - taskReadFiles = "TASK_READ_FILES" - taskReadDrive = "TASK_READ_DRIVE" + taskReadFile = "TASK_READ_FILE" + taskReadFiles = "TASK_READ_FILES" + taskReadDrive = "TASK_READ_DRIVE" + cfgOAuthCredential = "oauth-credentials" ) var ( @@ -38,6 +41,8 @@ var ( type component struct { base.Component + // The JSON string of OAuth credentials encoded by base64. + instillAICredentials string } type execution struct { @@ -66,7 +71,7 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, ctx := context.Background() - drive, err := getDriveService(ctx, x.Setup) + drive, err := getDriveService(ctx, x.Setup, c) if err != nil { return nil, fmt.Errorf("failed to get drive service: %w", err) @@ -93,19 +98,23 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, return e, nil } -func getDriveService(ctx context.Context, setup *structpb.Struct) (*drive.Service, error) { - accessToken := setup.GetFields()["access-token"].GetStringValue() - refreshToken := setup.GetFields()["refresh-token"].GetStringValue() +func getDriveService(ctx context.Context, setup *structpb.Struct, c *component) (*drive.Service, error) { + + decodedBytes, err := base64.StdEncoding.DecodeString(c.instillAICredentials) - config := &oauth2.Config{ - ClientID: getClientID(), - ClientSecret: getClientSecret(), - Scopes: getScopes(setup), - Endpoint: google.Endpoint, + if err != nil { + return nil, fmt.Errorf("failed to decode Instill AI credentials: %w", err) + } + + config, err := google.ConfigFromJSON(decodedBytes, getConfigScopes()...) + + if err != nil { + return nil, fmt.Errorf("failed to get Google config from JSON: %w", err) } + refreshToken := setup.GetFields()["refresh-token"].GetStringValue() + tok := &oauth2.Token{ - AccessToken: accessToken, RefreshToken: refreshToken, } @@ -120,22 +129,23 @@ func getDriveService(ctx context.Context, setup *structpb.Struct) (*drive.Servic return srv, nil } -// TODO: Need to get from env variables -func getClientID() string { - return "" -} - -// TODO: Need to get from env variables -func getClientSecret() string { - return "" +func getConfigScopes() []string { + type setupConfig struct { + InstillOAuthConfig struct { + Scopes []string `json:"scopes"` + } `json:"instillOAuthConfig"` + } + var setup setupConfig + json.Unmarshal(setupJSON, &setup) + return setup.InstillOAuthConfig.Scopes } -// TODO: Need to get the scopes from the token.json +// Need to get the scopes from the token.json that received from the OAuth2 // Temporarily, it will be same as the scopes in setup.json. // So, we get it from setup.json first. Later, we will get it from token.json // after we confirm how we retrieve the scopes from token.json. func getScopes(setup *structpb.Struct) []string { - return []string{} + return getConfigScopes() } // Execute reads the input from the job, executes the task, and writes the output @@ -163,3 +173,8 @@ func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { return nil } + +func (c *component) WithOAuthCredentials(s map[string]any) *component { + c.instillAICredentials = base.ReadFromGlobalConfig(cfgOAuthCredential, s) + return c +} diff --git a/pkg/component/store/store.go b/pkg/component/store/store.go index 29b9f0095..5122acae9 100644 --- a/pkg/component/store/store.go +++ b/pkg/component/store/store.go @@ -172,6 +172,18 @@ func Init( compStore.Import(conn) } + { + conn := googledrive.Init(baseComp) + conn = conn.WithOAuthCredentials(secrets["google"]) + compStore.Import(conn) + } + + { + conn := googledrive.Init(baseComp) + conn = conn.WithOAuthCredentials(secrets["google"]) + compStore.Import(conn) + } + compStore.Import(instillapp.Init(baseComp)) compStore.Import(bigquery.Init(baseComp)) compStore.Import(googlecloudstorage.Init(baseComp)) @@ -209,7 +221,6 @@ func Init( compStore.Import(whatsapp.Init(baseComp)) compStore.Import(freshdesk.Init(baseComp)) compStore.Import(asana.Init(baseComp)) - compStore.Import(googledrive.Init(baseComp)) }) return compStore } From 3fc92ee6a6571448cf47cb43cb42a8d5e3f9e4b2 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Thu, 17 Oct 2024 13:33:58 +0100 Subject: [PATCH 06/28] feat(googledrive): update input output design --- pkg/component/data/googledrive/v0/README.mdx | 74 ++------- .../googledrive/v0/config/definition.json | 5 +- .../data/googledrive/v0/config/tasks.json | 146 +++++++----------- .../data/googledrive/v0/drive_service.go | 8 +- pkg/component/data/googledrive/v0/main.go | 9 +- .../data/googledrive/v0/read_operation.go | 44 ++---- 6 files changed, 97 insertions(+), 189 deletions(-) diff --git a/pkg/component/data/googledrive/v0/README.mdx b/pkg/component/data/googledrive/v0/README.mdx index a17d29bc8..2e06d37fd 100644 --- a/pkg/component/data/googledrive/v0/README.mdx +++ b/pkg/component/data/googledrive/v0/README.mdx @@ -8,8 +8,7 @@ description: "Learn about how to set up a VDP Google Drive component https://git The Google Drive component is a data component that allows users to google Drive is a file storage and synchronization service developed by Google. It allows users to store files in the cloud, synchronize files across devices, and share files.. It can carry out the following tasks: - [Read File](#read-file) -- [Read Files](#read-files) -- [Read Drive](#read-drive) +- [Read Folder](#read-folder) ## Release Stage @@ -44,14 +43,14 @@ ${connection.}`. ### Read File -Read a file from Google Drive. It is recommended to use this task to read a single file when you watch a specific file in Google Drive. +Read a file content and metadata from Google Drive.
| Input | ID | Type | Description | | :--- | :--- | :--- | :--- | | Task ID (required) | `task` | string | `TASK_READ_FILE` | -| File ID (required) | `file-id` | string | ID of the file to read. | +| File ID (required) | `shared-link` | string | Shared link of the file. You can get the shared link by right-clicking on the file and selecting `Copy link`. |
@@ -77,26 +76,29 @@ Read a file from Google Drive. It is recommended to use this task to read a sing | :--- | :--- | :--- | :--- | | Content | `content` | string | Content of the file. | | Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ` | +| ID | `id` | string | ID of the file. | | MD5 checksum | `md5-checksum` | string | MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user. | | MIME type | `mime-type` | string | MIME type of the file. | | Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ` | | Name | `name` | string | Name of the file. | | Size | `size` | integer | Size of the file in bytes. | | Version | `version` | string | Version of the file. | +| Web Content Link | `web-content-link` | string | Link for downloading the content of the file in a browser. | +| Web View Link | `web-view-link` | string | Link for opening the file in a relevant Google editor or viewer in a browser. Usually, web view link is same as shared link. | -### Read Files +### Read Folder -Read files from Google Drive. +Read metadata and content of files under the specified folder in Google Drive.
| Input | ID | Type | Description | | :--- | :--- | :--- | :--- | -| Task ID (required) | `task` | string | `TASK_READ_FILES` | -| File Names (required) | `file-names` | array[string] | List of file names to read. | -| Read Content (required) | `read-content` | boolean | Read content of the files. | +| Task ID (required) | `task` | string | `TASK_READ_FOLDER` | +| File ID (required) | `shared-link` | string | Shared link of the file. You can get the shared link by right-clicking on the file and selecting `Copy link`. | +| Read Content | `read-content` | boolean | Whether to read the content of the files under the folder. |
@@ -108,58 +110,13 @@ Read files from Google Drive. | Output | ID | Type | Description | | :--- | :--- | :--- | :--- | -| [Files](#read-files-files) | `files` | array[object] | List of files read from Google Drive. | +| [Files](#read-folder-files) | `files` | array[object] | List of files under the specified folder. |
- Output Objects in Read Files + Output Objects in Read Folder -

Files

- -
- -| Field | Field ID | Type | Note | -| :--- | :--- | :--- | :--- | -| Content | `content` | string | Content of the file. | -| Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ` | -| MD5 checksum | `md5-checksum` | string | MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user. | -| MIME type | `mime-type` | string | MIME type of the file. | -| Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ` | -| Name | `name` | string | Name of the file. | -| Size | `size` | integer | Size of the file in bytes. | -| Version | `version` | string | Version of the file. | -
-
- -### Read Drive - -Read the metadata of files and folders in Google Drive. It is recommended to use this task to read the metadata of files and folders in Google Drive when you watch a change in Google Drive. - -
- -| Input | ID | Type | Description | -| :--- | :--- | :--- | :--- | -| Task ID (required) | `task` | string | `TASK_READ_DRIVE` | -| Order By | `order-by` | string | The keys of the properties to sort the results by, separated by commas. The default sort order is ascending. e.g. modifiedTime desc, name | -| Limit | `limit` | integer | The maximum number of files to return. Default is 10. Maximum is 100. | -
- - - - - - -
- -| Output | ID | Type | Description | -| :--- | :--- | :--- | :--- | -| [Files](#read-drive-files) | `files` | array[object] | List of files and folders in Google Drive. | -
- -
- Output Objects in Read Drive - -

Files

+

Files

@@ -167,11 +124,14 @@ Read the metadata of files and folders in Google Drive. It is recommended to use | :--- | :--- | :--- | :--- | | Content | `content` | string | Content of the file. | | Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ` | +| ID | `id` | string | ID of the file. | | MD5 checksum | `md5-checksum` | string | MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user. | | MIME type | `mime-type` | string | MIME type of the file. | | Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ` | | Name | `name` | string | Name of the file. | | Size | `size` | integer | Size of the file in bytes. | | Version | `version` | string | Version of the file. | +| Web Content Link | `web-content-link` | string | Link for downloading the content of the file in a browser. | +| Web View Link | `web-view-link` | string | Link for opening the file in a relevant Google editor or viewer in a browser. Usually, web view link is same as shared link. |
diff --git a/pkg/component/data/googledrive/v0/config/definition.json b/pkg/component/data/googledrive/v0/config/definition.json index 738b302a1..1e92c3002 100644 --- a/pkg/component/data/googledrive/v0/config/definition.json +++ b/pkg/component/data/googledrive/v0/config/definition.json @@ -1,11 +1,10 @@ { "availableTasks": [ "TASK_READ_FILE", - "TASK_READ_FILES", - "TASK_READ_DRIVE" + "TASK_READ_FOLDER" ], "custom": false, - "documentationUrl": "https://www.instill.tech/docs/component/data/googledrive", + "documentationUrl": "https://www.instill.tech/docs/component/data/google-drive", "icon": "assets/google-drive.svg", "id": "google-drive", "public": true, diff --git a/pkg/component/data/googledrive/v0/config/tasks.json b/pkg/component/data/googledrive/v0/config/tasks.json index dccbc581a..382b67fb8 100644 --- a/pkg/component/data/googledrive/v0/config/tasks.json +++ b/pkg/component/data/googledrive/v0/config/tasks.json @@ -4,10 +4,17 @@ "description": "File in Google Drive.", "instillUIOrder": 0, "properties": { + "id": { + "description": "ID of the file.", + "instillFormat": "string", + "instillUIOrder": 0, + "title": "ID", + "type": "string" + }, "name": { "description": "Name of the file.", "instillFormat": "string", - "instillUIOrder": 0, + "instillUIOrder": 1, "title": "Name", "type": "string" }, @@ -15,81 +22,100 @@ "description": "Content of the file.", "instillFormat": "string", "instillUIMultiline": true, - "instillUIOrder": 1, + "instillUIOrder": 2, "title": "Content", "type": "string" }, "created-time": { "description": "Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ`", "instillFormat": "string", - "instillUIOrder": 2, + "instillUIOrder": 3, "title": "Created time", "type": "string" }, "modified-time": { "description": "Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ`", "instillFormat": "string", - "instillUIOrder": 3, + "instillUIOrder": 4, "title": "Modified time", "type": "string" }, "size": { "description": "Size of the file in bytes.", "instillFormat": "integer", - "instillUIOrder": 4, + "instillUIOrder": 5, "title": "Size", "type": "integer" }, "mime-type": { "description": "MIME type of the file.", "instillFormat": "string", - "instillUIOrder": 5, + "instillUIOrder": 6, "title": "MIME type", "type": "string" }, "md5-checksum": { "description": "MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user.", "instillFormat": "string", - "instillUIOrder": 6, + "instillUIOrder": 7, "title": "MD5 checksum", "type": "string" }, "version": { "description": "Version of the file.", "instillFormat": "string", - "instillUIOrder": 7, + "instillUIOrder": 8, "title": "Version", "type": "string" + }, + "web-view-link": { + "description": "Link for opening the file in a relevant Google editor or viewer in a browser. Usually, web view link is same as shared link.", + "instillFormat": "string", + "instillUIOrder": 9, + "title": "Web View Link", + "type": "string" + }, + "web-content-link": { + "description": "Link for downloading the content of the file in a browser.", + "instillFormat": "string", + "instillUIOrder": 10, + "title": "Web Content Link", + "type": "string" } }, "required": [ + "id", "name", "created-time", "modified-time", "size", "mime-type", - "version" + "version", + "web-view-link" ], "title": "File", "type": "object" + }, + "shared-link": { + "description": "Shared link of the file. You can get the shared link by right-clicking on the file and selecting `Copy link`.", + "instillFormat": "string", + "instillUIOrder": 0, + "title": "File ID", + "type": "string" } }, "TASK_READ_FILE": { - "instillShortDescription": "Read a file from Google Drive. It is recommended to use this task to read a single file when you watch a specific file in Google Drive.", + "instillShortDescription": "Read a file content and metadata from Google Drive.", "input": { - "description": "Please provide the name of the file to read from Google Drive.", + "description": "Please provide the shared link of the file to read from Google Drive.", "instillUIOrder": 0, "properties": { - "file-id": { - "description": "ID of the file to read.", - "instillFormat": "string", - "instillUIOrder": 0, - "title": "File ID", - "type": "string" + "shared-link": { + "$ref": "#/$defs/shared-link" } }, "required": [ - "file-id" + "shared-link" ], "title": "Input", "type": "object" @@ -108,101 +134,41 @@ "type": "object" } }, - "TASK_READ_FILES": { - "instillShortDescription": "Read files from Google Drive.", + "TASK_READ_FOLDER": { + "instillShortDescription": "Read metadata and content of files under the specified folder in Google Drive.", "input": { - "description": "Please provide the list of file names to read from Google Drive.", + "description": "Please provide the shared link of the folder to read from Google Drive.", "instillUIOrder": 0, "properties": { - "file-names": { - "description": "List of file names to read.", - "instillAcceptFormats": [ - "array:string" - ], - "items": { - "title": "File Name", - "type": "string" - }, - "instillUIOrder": 0, - "title": "File Names", - "type": "array" + "shared-link": { + "$ref": "#/$defs/shared-link" }, "read-content": { - "description": "Read content of the files.", - "instillShortDescription": "Read content of the files.", - "instillAcceptFormats": [ - "boolean" - ], + "description": "Whether to read the content of the files under the folder.", + "instillFormat": "boolean", "instillUIOrder": 1, "title": "Read Content", "type": "boolean" } }, "required": [ - "file-names", - "read-content" + "shared-link" ], "title": "Input", "type": "object" }, "output": { + "description": "Output", "instillUIOrder": 1, "properties": { "files": { - "description": "List of files read from Google Drive.", - "instillUIOrder": 0, - "instillFormat": "array", - "items": { - "$ref": "#/$defs/file" - }, - "title": "Files", - "type": "array" - } - }, - "required": [ - "files" - ], - "title": "Output", - "type": "object" - } - }, - "TASK_READ_DRIVE": { - "instillShortDescription": "Read the metadata of files and folders in Google Drive. It is recommended to use this task to read the metadata of files and folders in Google Drive when you watch a change in Google Drive.", - "input": { - "description": "Please input the query params to read the metadata of files and folders in Google Drive.", - "instillUIOrder": 0, - "properties": { - "order-by": { - "description": "The keys of the properties to sort the results by, separated by commas. The default sort order is ascending. e.g. modifiedTime desc, name", - "instillFormat": "string", - "instillUIOrder": 0, - "title": "Order By", - "type": "string" - }, - "limit": { - "default": 10, - "maximum": 100, - "description": "The maximum number of files to return. Default is 10. Maximum is 100.", - "instillFormat": "integer", - "instillUIOrder": 1, - "title": "Limit", - "type": "integer" - } - }, - "required": [], - "title": "Input", - "type": "object" - }, - "output": { - "instillUIOrder": 1, - "properties": { - "files": { - "description": "List of files and folders in Google Drive.", + "description": "List of files under the specified folder.", + "instillFormat": "array:object", "instillUIOrder": 0, - "instillFormat": "array", "items": { "$ref": "#/$defs/file" }, + "required": [], "title": "Files", "type": "array" } diff --git a/pkg/component/data/googledrive/v0/drive_service.go b/pkg/component/data/googledrive/v0/drive_service.go index aab6dd5a0..bf9d12fbc 100644 --- a/pkg/component/data/googledrive/v0/drive_service.go +++ b/pkg/component/data/googledrive/v0/drive_service.go @@ -4,8 +4,7 @@ import "google.golang.org/api/drive/v3" type IDriveService interface { readFile() - readFiles() - readDrive() + readFolder() } type driveService struct { @@ -15,8 +14,5 @@ type driveService struct { func (d *driveService) readFile() { } -func (d *driveService) readFiles() { -} - -func (d *driveService) readDrive() { +func (d *driveService) readFolder() { } diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go index 7c758d0dc..76d925de1 100644 --- a/pkg/component/data/googledrive/v0/main.go +++ b/pkg/component/data/googledrive/v0/main.go @@ -22,8 +22,7 @@ import ( const ( taskReadFile = "TASK_READ_FILE" - taskReadFiles = "TASK_READ_FILES" - taskReadDrive = "TASK_READ_DRIVE" + taskReadFolder = "TASK_READ_FOLDER" cfgOAuthCredential = "oauth-credentials" ) @@ -85,10 +84,8 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, switch x.Task { case taskReadFile: e.execute = e.readFile - case taskReadFiles: - e.execute = e.readFiles - case taskReadDrive: - e.execute = e.readDrive + case taskReadFolder: + e.execute = e.readFolder default: return nil, errmsg.AddMessage( fmt.Errorf("not supported task: %s", x.Task), diff --git a/pkg/component/data/googledrive/v0/read_operation.go b/pkg/component/data/googledrive/v0/read_operation.go index 595e383ca..e646fc830 100644 --- a/pkg/component/data/googledrive/v0/read_operation.go +++ b/pkg/component/data/googledrive/v0/read_operation.go @@ -7,7 +7,7 @@ import ( ) type readFileInput struct { - FileID string `json:"file-id"` + SharedLink string `json:"shared-link"` } type readFileOutput struct { @@ -15,42 +15,32 @@ type readFileOutput struct { } type file struct { - Name string `json:"name"` - Content string `json:"content"` - CreatedTime string `json:"created-time"` - ModifiedTime string `json:"modified-time"` - Size int64 `json:"size"` - MimeType string `json:"mime-type"` - Md5Checksum string `json:"md5-checksum,omitempty"` - Version string `json:"version"` + ID string `json:"id"` + Name string `json:"name"` + Content string `json:"content"` + CreatedTime string `json:"created-time"` + ModifiedTime string `json:"modified-time"` + Size int64 `json:"size"` + MimeType string `json:"mime-type"` + Md5Checksum string `json:"md5-checksum,omitempty"` + Version string `json:"version"` + WebViewLink string `json:"web-view-link"` + WebContentLink string `json:"web-content-link,omitempty"` } func (e *execution) readFile(ctx context.Context, input *structpb.Struct) (*structpb.Struct, error) { return nil, nil } -type readFilesInput struct { - FileNames []string `json:"file-names"` - ReadContent bool `json:"read-content"` +type readFolderInput struct { + SharedLink string `json:"shared-link"` + ReadContent bool `json:"read-content"` } -type readFilesOutput struct { +type readFolderOutput struct { Files []file `json:"files"` } -func (e *execution) readFiles(ctx context.Context, input *structpb.Struct) (*structpb.Struct, error) { - return nil, nil -} - -type readDriveInput struct { - OrderBy string `json:"order-by"` - Limit int `json:"limit"` -} - -type readDriveOutput struct { - Files []file `json:"files"` -} - -func (e *execution) readDrive(ctx context.Context, input *structpb.Struct) (*structpb.Struct, error) { +func (e *execution) readFolder(ctx context.Context, input *structpb.Struct) (*structpb.Struct, error) { return nil, nil } From 8c36316f280b1139ed1d06e631d2b1fdcf94cea4 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Mon, 21 Oct 2024 14:18:09 +0100 Subject: [PATCH 07/28] chore(googledrive): add scope to read users' basic info --- pkg/component/data/googledrive/v0/config/setup.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/component/data/googledrive/v0/config/setup.json b/pkg/component/data/googledrive/v0/config/setup.json index 112a9cab6..cc59e05ca 100644 --- a/pkg/component/data/googledrive/v0/config/setup.json +++ b/pkg/component/data/googledrive/v0/config/setup.json @@ -23,7 +23,9 @@ "authUrl": "https://accounts.google.com/o/oauth2/auth", "accessUrl": "https://oauth2.googleapis.com/token", "scopes": [ - "https://www.googleapis.com/auth/drive.readonly" + "https://www.googleapis.com/auth/drive.readonly", + "https://www.googleapis.com/auth/userinfo.email", + "https://www.googleapis.com/auth/userinfo.profile" ] }, "title": "Google Drive Connection", From 8de0bd13ca409f9c40179cdaadeaf13387629c68 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Mon, 21 Oct 2024 16:40:23 +0100 Subject: [PATCH 08/28] feat(googledrive): add client function --- .../data/googledrive/v0/drive_service.go | 161 +++++++++++++++++- 1 file changed, 156 insertions(+), 5 deletions(-) diff --git a/pkg/component/data/googledrive/v0/drive_service.go b/pkg/component/data/googledrive/v0/drive_service.go index bf9d12fbc..b1a6fa592 100644 --- a/pkg/component/data/googledrive/v0/drive_service.go +++ b/pkg/component/data/googledrive/v0/drive_service.go @@ -1,18 +1,169 @@ package googledrive -import "google.golang.org/api/drive/v3" +import ( + "encoding/base64" + "fmt" + "io" + "net/http" + + "google.golang.org/api/drive/v3" +) type IDriveService interface { - readFile() - readFolder() + readFile(fileUID string) (*file, error) + readFolder(folderUID string, readContent bool) ([]*file, error) } type driveService struct { service *drive.Service } -func (d *driveService) readFile() { +// Google Drive API only can support downloading the binary data. +// So, when the file is not binary, we need to export the file as PDF/CSV first. +// For example: +// Google Sheets -> Export as CSV +// Google Slides -> Export as PDF +// Google Docs -> Export as PDF +func (d *driveService) readFile(fileUID string) (*file, error) { + + srv := d.service + + driveFile, err := srv.Files.Get(fileUID). + // We will need to confirm if we want to support all drives. + // By setting SupportsAllDrives to true, the API can return the file from the shared drive, which is not owned by the user but shared with the user. + SupportsAllDrives(true). + Fields("id, name, createdTime, modifiedTime, size, mimeType, md5Checksum, version, webViewLink, webContentLink"). + Do() + + if err != nil { + return nil, fmt.Errorf("fetch fetch metadata of file: %w", err) + } + + file := convertDriveFileToComponentFile(driveFile) + + base64Content, err := readFileContent(srv, driveFile) + + if err != nil { + return nil, fmt.Errorf("read file content: %w", err) + } + + file.Content = base64Content + + return file, nil +} + +func (d *driveService) readFolder(folderUID string, readContent bool) ([]*file, error) { + srv := d.service + + q := fmt.Sprintf("'%s' in parents", folderUID) + + var allFiles []*drive.File + + pageToken := "" + + for { + fileList, err := srv.Files.List(). + Q(q). + Fields("id, name, createdTime, modifiedTime, size, mimeType, md5Checksum, version, webViewLink, webContentLink"). + PageToken(pageToken). + Do() + + if err != nil { + return nil, fmt.Errorf("fetch metadata of files: %w", err) + } + + allFiles = append(allFiles, fileList.Files...) + + pageToken = fileList.NextPageToken + + if pageToken == "" { + break + } + } + + files := make([]*file, 0, len(allFiles)) + + for _, f := range allFiles { + file := convertDriveFileToComponentFile(f) + + if readContent { + base64Content, err := readFileContent(srv, f) + + if err != nil { + return nil, fmt.Errorf("read file content: %w", err) + } + + file.Content = base64Content + } + + files = append(files, file) + } + + return files, nil + +} + +func convertDriveFileToComponentFile(driveFile *drive.File) *file { + return &file{ + ID: driveFile.Id, + Name: driveFile.Name, + CreatedTime: driveFile.CreatedTime, + ModifiedTime: driveFile.ModifiedTime, + Size: driveFile.Size, + MimeType: driveFile.MimeType, + Md5Checksum: driveFile.Md5Checksum, + Version: driveFile.Version, + WebViewLink: driveFile.WebViewLink, + WebContentLink: driveFile.WebContentLink, + } +} + +func readFileContent(srv *drive.Service, driveFile *drive.File) (string, error) { + exportFormat := exportFormat(driveFile) + + var resp *http.Response + var err error + if exportFormat == "" { + resp, err = srv.Files.Get(driveFile.Id).SupportsAllDrives(true).Download() + if err != nil { + return "", fmt.Errorf("download file: %w", err) + } + } else { + resp, err = srv.Files.Export(driveFile.Id, exportFormat).Download() + if err != nil { + return "", fmt.Errorf("export file: %w", err) + } + } + + defer resp.Body.Close() + + b, err := io.ReadAll(resp.Body) + + if err != nil { + return "", fmt.Errorf("read file content: %w", err) + } + + return base64.StdEncoding.EncodeToString(b), nil +} + +func exportFormat(file *drive.File) string { + switch file.MimeType { + case "application/vnd.google-apps.spreadsheet": + return "text/csv" + case "application/vnd.google-apps.presentation", "application/vnd.google-apps.document": + return "application/pdf" + default: + return "" + } } -func (d *driveService) readFolder() { +func exportFileExtension(mimeType string) string { + switch mimeType { + case "application/vnd.google-apps.spreadsheet": + return ".csv" + case "application/vnd.google-apps.presentation", "application/vnd.google-apps.document": + return ".pdf" + default: + return "" + } } From 06f4ae9076f1a40e8ec3c5971e9eba443cac3dfa Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Mon, 21 Oct 2024 16:41:08 +0100 Subject: [PATCH 09/28] feat(googledrive): add implementation code --- pkg/component/data/googledrive/v0/main.go | 24 +--- .../data/googledrive/v0/read_operation.go | 113 +++++++++++++++++- 2 files changed, 109 insertions(+), 28 deletions(-) diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go index 76d925de1..fb0c98259 100644 --- a/pkg/component/data/googledrive/v0/main.go +++ b/pkg/component/data/googledrive/v0/main.go @@ -46,7 +46,7 @@ type component struct { type execution struct { base.ComponentExecution - execute func(context.Context, *structpb.Struct) (*structpb.Struct, error) + execute func(*structpb.Struct, *base.Job, context.Context) (*structpb.Struct, error) service IDriveService } @@ -148,27 +148,7 @@ func getScopes(setup *structpb.Struct) []string { // Execute reads the input from the job, executes the task, and writes the output // to the job. func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { - for _, job := range jobs { - input, err := job.Input.Read(ctx) - if err != nil { - job.Error.Error(ctx, err) - continue - } - - output, err := e.execute(ctx, input) - if err != nil { - job.Error.Error(ctx, err) - continue - } - - err = job.Output.Write(ctx, output) - if err != nil { - job.Error.Error(ctx, err) - continue - } - } - - return nil + return base.ConcurrentExecutor(ctx, jobs, e.execute) } func (c *component) WithOAuthCredentials(s map[string]any) *component { diff --git a/pkg/component/data/googledrive/v0/read_operation.go b/pkg/component/data/googledrive/v0/read_operation.go index e646fc830..72e31955f 100644 --- a/pkg/component/data/googledrive/v0/read_operation.go +++ b/pkg/component/data/googledrive/v0/read_operation.go @@ -2,8 +2,12 @@ package googledrive import ( "context" + "fmt" + "strings" "google.golang.org/protobuf/types/known/structpb" + + "github.com/instill-ai/pipeline-backend/pkg/component/base" ) type readFileInput struct { @@ -23,13 +27,44 @@ type file struct { Size int64 `json:"size"` MimeType string `json:"mime-type"` Md5Checksum string `json:"md5-checksum,omitempty"` - Version string `json:"version"` + Version int64 `json:"version"` WebViewLink string `json:"web-view-link"` WebContentLink string `json:"web-content-link,omitempty"` } -func (e *execution) readFile(ctx context.Context, input *structpb.Struct) (*structpb.Struct, error) { - return nil, nil +func (e *execution) readFile(input *structpb.Struct, job *base.Job, ctx context.Context) (*structpb.Struct, error) { + + inputStruct := readFileInput{} + + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, fmt.Errorf("convert input to struct: %w", err) + } + + fileUID, err := extractUIDFromSharedLink(inputStruct.SharedLink) + + if err != nil { + return nil, fmt.Errorf("extract UID from Google Drive link: %w", err) + } + + file, err := e.service.readFile(fileUID) + + if err != nil { + return nil, fmt.Errorf("read file from Google Drive: %w", err) + } + + output := readFileOutput{ + File: *file, + } + + outputStruct, err := base.ConvertToStructpb(output) + + if err != nil { + return nil, fmt.Errorf("convert output to struct: %w", err) + } + + return outputStruct, nil } type readFolderInput struct { @@ -38,9 +73,75 @@ type readFolderInput struct { } type readFolderOutput struct { - Files []file `json:"files"` + Files []*file `json:"files"` +} + +func (e *execution) readFolder(input *structpb.Struct, job *base.Job, ctx context.Context) (*structpb.Struct, error) { + inputStruct := readFolderInput{} + + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, fmt.Errorf("convert input to struct: %w", err) + } + + folderUID, err := extractUIDFromSharedLink(inputStruct.SharedLink) + + if err != nil { + return nil, fmt.Errorf("extract UID from Google Drive link: %w", err) + } + + files, err := e.service.readFolder(folderUID, inputStruct.ReadContent) + + if err != nil { + return nil, fmt.Errorf("read folder from Google Drive: %w", err) + } + + output := readFolderOutput{ + Files: files, + } + + outputStruct, err := base.ConvertToStructpb(output) + + if err != nil { + return nil, fmt.Errorf("convert output to struct: %w", err) + } + + return outputStruct, nil } -func (e *execution) readFolder(ctx context.Context, input *structpb.Struct) (*structpb.Struct, error) { - return nil, nil +// Now, we support the following types of Google Drive links: +// 1. Folder: https://drive.google +// 2. File: https://drive.google.com/file/d/ +// 3. Spreadsheet: https://docs.google.com/spreadsheets/d/ +// 4. Document: https://docs.google.com/document/d/ +// 5. Presentation: https://docs.google.com/presentation/d/ +// 6. Colab: https://colab.research.google.com/drive/ +// So, it means the Google Form, Google Map and other types of links are not supported +func extractUIDFromSharedLink(driveLink string) (string, error) { + patterns := map[string]string{ + "file": "/file/d/", + "folder": "/drive/folders/", + "spreadsheet": "/spreadsheets/d/", + "document": "/document/d/", + "presentation": "/presentation/d/", + "colab": "colab.research.google.com/drive/", + } + + // Iterate over the patterns to find a match + for _, pattern := range patterns { + if strings.Contains(driveLink, pattern) { + parts := strings.Split(driveLink, pattern) + if len(parts) < 2 { + return "", fmt.Errorf("invalid Google Drive link") + } + // Sample link: https://drive.google.com/drive/folders/xxxxxx?usp=drive_link + // Sample link: https://drive.google.com/file/d/xxxxxx/view?usp=drive_link + uidParts := strings.SplitN(parts[1], "?", 2) + uidParts = strings.SplitN(uidParts[0], "/", 2) + return uidParts[0], nil + } + } + + return "", fmt.Errorf("unrecognized Google Drive link format") } From be0f77a6a5a9dee693c0d474baac9737c30b5476 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Mon, 21 Oct 2024 16:41:57 +0100 Subject: [PATCH 10/28] chore(googledrive): update doc --- pkg/component/data/googledrive/v0/README.mdx | 4 ++-- pkg/component/data/googledrive/v0/config/tasks.json | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pkg/component/data/googledrive/v0/README.mdx b/pkg/component/data/googledrive/v0/README.mdx index 2e06d37fd..c6198763f 100644 --- a/pkg/component/data/googledrive/v0/README.mdx +++ b/pkg/component/data/googledrive/v0/README.mdx @@ -74,7 +74,7 @@ Read a file content and metadata from Google Drive. | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | -| Content | `content` | string | Content of the file. | +| Content | `content` | string | Base64 encoded content of the binary file without the `data:[MIME_TYPE];base64,` prefix. | | Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ` | | ID | `id` | string | ID of the file. | | MD5 checksum | `md5-checksum` | string | MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user. | @@ -122,7 +122,7 @@ Read metadata and content of files under the specified folder in Google Drive. | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | -| Content | `content` | string | Content of the file. | +| Content | `content` | string | Base64 encoded content of the binary file without the `data:[MIME_TYPE];base64,` prefix. | | Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ` | | ID | `id` | string | ID of the file. | | MD5 checksum | `md5-checksum` | string | MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user. | diff --git a/pkg/component/data/googledrive/v0/config/tasks.json b/pkg/component/data/googledrive/v0/config/tasks.json index 382b67fb8..9b64026e4 100644 --- a/pkg/component/data/googledrive/v0/config/tasks.json +++ b/pkg/component/data/googledrive/v0/config/tasks.json @@ -19,7 +19,7 @@ "type": "string" }, "content": { - "description": "Content of the file.", + "description": "Base64 encoded content of the binary file without the `data:[MIME_TYPE];base64,` prefix.", "instillFormat": "string", "instillUIMultiline": true, "instillUIOrder": 2, @@ -63,7 +63,7 @@ }, "version": { "description": "Version of the file.", - "instillFormat": "string", + "instillFormat": "integer", "instillUIOrder": 8, "title": "Version", "type": "string" From 4e5c9d8bf3cface31c4b5a01e06eb8bde1d8540f Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Mon, 21 Oct 2024 17:27:56 +0100 Subject: [PATCH 11/28] fix(googledrive): fix bug when fetching list from Google Drive --- pkg/component/data/googledrive/v0/README.mdx | 13 +++++++++++-- .../data/googledrive/v0/config/tasks.json | 4 ++-- .../data/googledrive/v0/drive_service.go | 18 +++++++++++------- 3 files changed, 24 insertions(+), 11 deletions(-) diff --git a/pkg/component/data/googledrive/v0/README.mdx b/pkg/component/data/googledrive/v0/README.mdx index c6198763f..2eaf066fc 100644 --- a/pkg/component/data/googledrive/v0/README.mdx +++ b/pkg/component/data/googledrive/v0/README.mdx @@ -10,14 +10,21 @@ It can carry out the following tasks: - [Read File](#read-file) - [Read Folder](#read-folder) + + ## Release Stage `Alpha` + + ## Configuration The component definition and tasks are defined in the [definition.json](https://github.com/instill-ai/pipeline-backend/blob/main/pkg/component/data/googledrive/v0/config/definition.json) and [tasks.json](https://github.com/instill-ai/pipeline-backend/blob/main/pkg/component/data/googledrive/v0/config/tasks.json) files respectively. + + + ## Setup @@ -82,7 +89,7 @@ Read a file content and metadata from Google Drive. | Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ` | | Name | `name` | string | Name of the file. | | Size | `size` | integer | Size of the file in bytes. | -| Version | `version` | string | Version of the file. | +| Version | `version` | integer | Version of the file. | | Web Content Link | `web-content-link` | string | Link for downloading the content of the file in a browser. | | Web View Link | `web-view-link` | string | Link for opening the file in a relevant Google editor or viewer in a browser. Usually, web view link is same as shared link. | @@ -130,8 +137,10 @@ Read metadata and content of files under the specified folder in Google Drive. | Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ` | | Name | `name` | string | Name of the file. | | Size | `size` | integer | Size of the file in bytes. | -| Version | `version` | string | Version of the file. | +| Version | `version` | integer | Version of the file. | | Web Content Link | `web-content-link` | string | Link for downloading the content of the file in a browser. | | Web View Link | `web-view-link` | string | Link for opening the file in a relevant Google editor or viewer in a browser. Usually, web view link is same as shared link. | + + diff --git a/pkg/component/data/googledrive/v0/config/tasks.json b/pkg/component/data/googledrive/v0/config/tasks.json index 9b64026e4..867ca8394 100644 --- a/pkg/component/data/googledrive/v0/config/tasks.json +++ b/pkg/component/data/googledrive/v0/config/tasks.json @@ -66,7 +66,7 @@ "instillFormat": "integer", "instillUIOrder": 8, "title": "Version", - "type": "string" + "type": "integer" }, "web-view-link": { "description": "Link for opening the file in a relevant Google editor or viewer in a browser. Usually, web view link is same as shared link.", @@ -180,4 +180,4 @@ "type": "object" } } -} +} \ No newline at end of file diff --git a/pkg/component/data/googledrive/v0/drive_service.go b/pkg/component/data/googledrive/v0/drive_service.go index b1a6fa592..de4ab537a 100644 --- a/pkg/component/data/googledrive/v0/drive_service.go +++ b/pkg/component/data/googledrive/v0/drive_service.go @@ -18,12 +18,6 @@ type driveService struct { service *drive.Service } -// Google Drive API only can support downloading the binary data. -// So, when the file is not binary, we need to export the file as PDF/CSV first. -// For example: -// Google Sheets -> Export as CSV -// Google Slides -> Export as PDF -// Google Docs -> Export as PDF func (d *driveService) readFile(fileUID string) (*file, error) { srv := d.service @@ -64,7 +58,11 @@ func (d *driveService) readFolder(folderUID string, readContent bool) ([]*file, for { fileList, err := srv.Files.List(). Q(q). - Fields("id, name, createdTime, modifiedTime, size, mimeType, md5Checksum, version, webViewLink, webContentLink"). + // To fetch file from shared drive, we need to set SupportsAllDrives to true. + SupportsAllDrives(true). + // To fetch file from shared drive, we need to set IncludeItemsFromAllDrives to true. + IncludeItemsFromAllDrives(true). + Fields("files(id, name, createdTime, modifiedTime, size, mimeType, md5Checksum, version, webViewLink, webContentLink)"). PageToken(pageToken). Do() @@ -118,6 +116,12 @@ func convertDriveFileToComponentFile(driveFile *drive.File) *file { } } +// Google Drive API only can support downloading the binary data. +// So, when the file is not binary, we need to export the file as PDF/CSV first. +// For example: +// Google Sheets -> Export as CSV +// Google Slides -> Export as PDF +// Google Docs -> Export as PDF func readFileContent(srv *drive.Service, driveFile *drive.File) (string, error) { exportFormat := exportFormat(driveFile) From f26539b1a499625410e1f027ccc6deb803e9f80a Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Mon, 21 Oct 2024 17:55:47 +0100 Subject: [PATCH 12/28] chore(googledrive): fix golangcilint --- pkg/component/data/googledrive/v0/README.mdx | 8 ++++---- pkg/component/data/googledrive/v0/config/tasks.json | 6 +++--- pkg/component/data/googledrive/v0/drive_service.go | 8 ++++++++ pkg/component/data/googledrive/v0/main.go | 11 ++--------- 4 files changed, 17 insertions(+), 16 deletions(-) diff --git a/pkg/component/data/googledrive/v0/README.mdx b/pkg/component/data/googledrive/v0/README.mdx index 2eaf066fc..41bc022d4 100644 --- a/pkg/component/data/googledrive/v0/README.mdx +++ b/pkg/component/data/googledrive/v0/README.mdx @@ -81,13 +81,13 @@ Read a file content and metadata from Google Drive. | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | -| Content | `content` | string | Base64 encoded content of the binary file without the `data:[MIME_TYPE];base64,` prefix. | +| Content | `content` | string | Base64 encoded content of the binary file without the `data:[MIME_TYPE];base64,` prefix. Google Sheets will be exported as CSV, Google Docs as PDF, and Google Slides as PDF. If the file is not a Google file, the content will be the same as the original file. | | Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ` | | ID | `id` | string | ID of the file. | | MD5 checksum | `md5-checksum` | string | MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user. | | MIME type | `mime-type` | string | MIME type of the file. | | Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ` | -| Name | `name` | string | Name of the file. | +| Name | `name` | string | Name of the file. The file extension will be added automatically based on the exported MIME type. For example, Google Sheets will be exported as CSV, Google Docs as PDF, and Google Slides as PDF. If the file is a Google Sheet and the name is `MySheet`, the exported file will be `MySheet.csv`. If the file is not a Google file, the name will be used as is. | | Size | `size` | integer | Size of the file in bytes. | | Version | `version` | integer | Version of the file. | | Web Content Link | `web-content-link` | string | Link for downloading the content of the file in a browser. | @@ -129,13 +129,13 @@ Read metadata and content of files under the specified folder in Google Drive. | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | -| Content | `content` | string | Base64 encoded content of the binary file without the `data:[MIME_TYPE];base64,` prefix. | +| Content | `content` | string | Base64 encoded content of the binary file without the `data:[MIME_TYPE];base64,` prefix. Google Sheets will be exported as CSV, Google Docs as PDF, and Google Slides as PDF. If the file is not a Google file, the content will be the same as the original file. | | Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ` | | ID | `id` | string | ID of the file. | | MD5 checksum | `md5-checksum` | string | MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user. | | MIME type | `mime-type` | string | MIME type of the file. | | Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ` | -| Name | `name` | string | Name of the file. | +| Name | `name` | string | Name of the file. The file extension will be added automatically based on the exported MIME type. For example, Google Sheets will be exported as CSV, Google Docs as PDF, and Google Slides as PDF. If the file is a Google Sheet and the name is `MySheet`, the exported file will be `MySheet.csv`. If the file is not a Google file, the name will be used as is. | | Size | `size` | integer | Size of the file in bytes. | | Version | `version` | integer | Version of the file. | | Web Content Link | `web-content-link` | string | Link for downloading the content of the file in a browser. | diff --git a/pkg/component/data/googledrive/v0/config/tasks.json b/pkg/component/data/googledrive/v0/config/tasks.json index 867ca8394..965f0a66d 100644 --- a/pkg/component/data/googledrive/v0/config/tasks.json +++ b/pkg/component/data/googledrive/v0/config/tasks.json @@ -12,14 +12,14 @@ "type": "string" }, "name": { - "description": "Name of the file.", + "description": "Name of the file. The file extension will be added automatically based on the exported MIME type. For example, Google Sheets will be exported as CSV, Google Docs as PDF, and Google Slides as PDF. If the file is a Google Sheet and the name is `MySheet`, the exported file will be `MySheet.csv`. If the file is not a Google file, the name will be used as is.", "instillFormat": "string", "instillUIOrder": 1, "title": "Name", "type": "string" }, "content": { - "description": "Base64 encoded content of the binary file without the `data:[MIME_TYPE];base64,` prefix.", + "description": "Base64 encoded content of the binary file without the `data:[MIME_TYPE];base64,` prefix. Google Sheets will be exported as CSV, Google Docs as PDF, and Google Slides as PDF. If the file is not a Google file, the content will be the same as the original file.", "instillFormat": "string", "instillUIMultiline": true, "instillUIOrder": 2, @@ -180,4 +180,4 @@ "type": "object" } } -} \ No newline at end of file +} diff --git a/pkg/component/data/googledrive/v0/drive_service.go b/pkg/component/data/googledrive/v0/drive_service.go index de4ab537a..803b09364 100644 --- a/pkg/component/data/googledrive/v0/drive_service.go +++ b/pkg/component/data/googledrive/v0/drive_service.go @@ -102,6 +102,14 @@ func (d *driveService) readFolder(folderUID string, readContent bool) ([]*file, } func convertDriveFileToComponentFile(driveFile *drive.File) *file { + // Google Drive API only can support downloading the binary data. + // So, when the file is not binary, we need to export the file as PDF/CSV first. + // To make Google Drive Component can seamlessly work with other components, we need to add the file extension to the file name. + fileExtension := exportFileExtension(driveFile.MimeType) + if fileExtension != "" { + driveFile.Name = driveFile.Name + exportFileExtension(driveFile.MimeType) + } + return &file{ ID: driveFile.Id, Name: driveFile.Name, diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go index fb0c98259..db2774523 100644 --- a/pkg/component/data/googledrive/v0/main.go +++ b/pkg/component/data/googledrive/v0/main.go @@ -133,24 +133,17 @@ func getConfigScopes() []string { } `json:"instillOAuthConfig"` } var setup setupConfig - json.Unmarshal(setupJSON, &setup) + _ = json.Unmarshal(setupJSON, &setup) return setup.InstillOAuthConfig.Scopes } -// Need to get the scopes from the token.json that received from the OAuth2 -// Temporarily, it will be same as the scopes in setup.json. -// So, we get it from setup.json first. Later, we will get it from token.json -// after we confirm how we retrieve the scopes from token.json. -func getScopes(setup *structpb.Struct) []string { - return getConfigScopes() -} - // Execute reads the input from the job, executes the task, and writes the output // to the job. func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { return base.ConcurrentExecutor(ctx, jobs, e.execute) } +// WithOAuthCredentials sets the OAuth credentials for the component. func (c *component) WithOAuthCredentials(s map[string]any) *component { c.instillAICredentials = base.ReadFromGlobalConfig(cfgOAuthCredential, s) return c From cc56a03ba8eb64cc791efbbb1210bea4501a5ebf Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 22 Oct 2024 18:31:44 +0100 Subject: [PATCH 13/28] feat(googledrive): isolate google drive client --- pkg/component/internal/mock/generator.go | 1 + .../internal/mock/i_drive_service_mock.gen.go | 742 ++++++++++++++++++ .../googledriveclient/googledriveclient.go} | 95 +-- 3 files changed, 776 insertions(+), 62 deletions(-) create mode 100644 pkg/component/internal/mock/i_drive_service_mock.gen.go rename pkg/component/{data/googledrive/v0/drive_service.go => internal/util/googledriveclient/googledriveclient.go} (54%) diff --git a/pkg/component/internal/mock/generator.go b/pkg/component/internal/mock/generator.go index ee4430000..fa0036772 100644 --- a/pkg/component/internal/mock/generator.go +++ b/pkg/component/internal/mock/generator.go @@ -7,6 +7,7 @@ package mock //go:generate minimock -g -i github.com/instill-ai/pipeline-backend/pkg/component/operator/document/v0.commandRunner -o ./ -s "_mock.gen.go" //go:generate minimock -g -i io.WriteCloser -o ./ -s "_mock.gen.go" //go:generate minimock -g -i github.com/instill-ai/protogen-go/artifact/artifact/v1alpha.ArtifactPublicServiceClient -o ./ -s "_mock.gen.go" +//go:generate minimock -g -i github.com/instill-ai/pipeline-backend/pkg/component/internal/util/googledriveclient.IDriveService -o ./ -s "_mock.gen.go" // Ollama mock is generated in the source package to avoid import cycles. //go:generate minimock -i github.com/instill-ai/pipeline-backend/pkg/component/ai/ollama/v0.OllamaClientInterface -o ../../ai/ollama/v0 -s "_mock.gen.go" -p ollama diff --git a/pkg/component/internal/mock/i_drive_service_mock.gen.go b/pkg/component/internal/mock/i_drive_service_mock.gen.go new file mode 100644 index 000000000..239159bc1 --- /dev/null +++ b/pkg/component/internal/mock/i_drive_service_mock.gen.go @@ -0,0 +1,742 @@ +// Code generated by http://github.com/gojuno/minimock (v3.4.0). DO NOT EDIT. + +package mock + +import ( + "sync" + mm_atomic "sync/atomic" + mm_time "time" + + "github.com/gojuno/minimock/v3" + "google.golang.org/api/drive/v3" +) + +// IDriveServiceMock implements mm_googledriveclient.IDriveService +type IDriveServiceMock struct { + t minimock.Tester + finishOnce sync.Once + + funcReadFile func(fileUID string) (fp1 *drive.File, sp1 *string, err error) + funcReadFileOrigin string + inspectFuncReadFile func(fileUID string) + afterReadFileCounter uint64 + beforeReadFileCounter uint64 + ReadFileMock mIDriveServiceMockReadFile + + funcReadFolder func(folderUID string, readContent bool) (fpa1 []*drive.File, spa1 []*string, err error) + funcReadFolderOrigin string + inspectFuncReadFolder func(folderUID string, readContent bool) + afterReadFolderCounter uint64 + beforeReadFolderCounter uint64 + ReadFolderMock mIDriveServiceMockReadFolder +} + +// NewIDriveServiceMock returns a mock for mm_googledriveclient.IDriveService +func NewIDriveServiceMock(t minimock.Tester) *IDriveServiceMock { + m := &IDriveServiceMock{t: t} + + if controller, ok := t.(minimock.MockController); ok { + controller.RegisterMocker(m) + } + + m.ReadFileMock = mIDriveServiceMockReadFile{mock: m} + m.ReadFileMock.callArgs = []*IDriveServiceMockReadFileParams{} + + m.ReadFolderMock = mIDriveServiceMockReadFolder{mock: m} + m.ReadFolderMock.callArgs = []*IDriveServiceMockReadFolderParams{} + + t.Cleanup(m.MinimockFinish) + + return m +} + +type mIDriveServiceMockReadFile struct { + optional bool + mock *IDriveServiceMock + defaultExpectation *IDriveServiceMockReadFileExpectation + expectations []*IDriveServiceMockReadFileExpectation + + callArgs []*IDriveServiceMockReadFileParams + mutex sync.RWMutex + + expectedInvocations uint64 + expectedInvocationsOrigin string +} + +// IDriveServiceMockReadFileExpectation specifies expectation struct of the IDriveService.ReadFile +type IDriveServiceMockReadFileExpectation struct { + mock *IDriveServiceMock + params *IDriveServiceMockReadFileParams + paramPtrs *IDriveServiceMockReadFileParamPtrs + expectationOrigins IDriveServiceMockReadFileExpectationOrigins + results *IDriveServiceMockReadFileResults + returnOrigin string + Counter uint64 +} + +// IDriveServiceMockReadFileParams contains parameters of the IDriveService.ReadFile +type IDriveServiceMockReadFileParams struct { + fileUID string +} + +// IDriveServiceMockReadFileParamPtrs contains pointers to parameters of the IDriveService.ReadFile +type IDriveServiceMockReadFileParamPtrs struct { + fileUID *string +} + +// IDriveServiceMockReadFileResults contains results of the IDriveService.ReadFile +type IDriveServiceMockReadFileResults struct { + fp1 *drive.File + sp1 *string + err error +} + +// IDriveServiceMockReadFileOrigins contains origins of expectations of the IDriveService.ReadFile +type IDriveServiceMockReadFileExpectationOrigins struct { + origin string + originFileUID string +} + +// Marks this method to be optional. The default behavior of any method with Return() is '1 or more', meaning +// the test will fail minimock's automatic final call check if the mocked method was not called at least once. +// Optional() makes method check to work in '0 or more' mode. +// It is NOT RECOMMENDED to use this option unless you really need it, as default behaviour helps to +// catch the problems when the expected method call is totally skipped during test run. +func (mmReadFile *mIDriveServiceMockReadFile) Optional() *mIDriveServiceMockReadFile { + mmReadFile.optional = true + return mmReadFile +} + +// Expect sets up expected params for IDriveService.ReadFile +func (mmReadFile *mIDriveServiceMockReadFile) Expect(fileUID string) *mIDriveServiceMockReadFile { + if mmReadFile.mock.funcReadFile != nil { + mmReadFile.mock.t.Fatalf("IDriveServiceMock.ReadFile mock is already set by Set") + } + + if mmReadFile.defaultExpectation == nil { + mmReadFile.defaultExpectation = &IDriveServiceMockReadFileExpectation{} + } + + if mmReadFile.defaultExpectation.paramPtrs != nil { + mmReadFile.mock.t.Fatalf("IDriveServiceMock.ReadFile mock is already set by ExpectParams functions") + } + + mmReadFile.defaultExpectation.params = &IDriveServiceMockReadFileParams{fileUID} + mmReadFile.defaultExpectation.expectationOrigins.origin = minimock.CallerInfo(1) + for _, e := range mmReadFile.expectations { + if minimock.Equal(e.params, mmReadFile.defaultExpectation.params) { + mmReadFile.mock.t.Fatalf("Expectation set by When has same params: %#v", *mmReadFile.defaultExpectation.params) + } + } + + return mmReadFile +} + +// ExpectFileUIDParam1 sets up expected param fileUID for IDriveService.ReadFile +func (mmReadFile *mIDriveServiceMockReadFile) ExpectFileUIDParam1(fileUID string) *mIDriveServiceMockReadFile { + if mmReadFile.mock.funcReadFile != nil { + mmReadFile.mock.t.Fatalf("IDriveServiceMock.ReadFile mock is already set by Set") + } + + if mmReadFile.defaultExpectation == nil { + mmReadFile.defaultExpectation = &IDriveServiceMockReadFileExpectation{} + } + + if mmReadFile.defaultExpectation.params != nil { + mmReadFile.mock.t.Fatalf("IDriveServiceMock.ReadFile mock is already set by Expect") + } + + if mmReadFile.defaultExpectation.paramPtrs == nil { + mmReadFile.defaultExpectation.paramPtrs = &IDriveServiceMockReadFileParamPtrs{} + } + mmReadFile.defaultExpectation.paramPtrs.fileUID = &fileUID + mmReadFile.defaultExpectation.expectationOrigins.originFileUID = minimock.CallerInfo(1) + + return mmReadFile +} + +// Inspect accepts an inspector function that has same arguments as the IDriveService.ReadFile +func (mmReadFile *mIDriveServiceMockReadFile) Inspect(f func(fileUID string)) *mIDriveServiceMockReadFile { + if mmReadFile.mock.inspectFuncReadFile != nil { + mmReadFile.mock.t.Fatalf("Inspect function is already set for IDriveServiceMock.ReadFile") + } + + mmReadFile.mock.inspectFuncReadFile = f + + return mmReadFile +} + +// Return sets up results that will be returned by IDriveService.ReadFile +func (mmReadFile *mIDriveServiceMockReadFile) Return(fp1 *drive.File, sp1 *string, err error) *IDriveServiceMock { + if mmReadFile.mock.funcReadFile != nil { + mmReadFile.mock.t.Fatalf("IDriveServiceMock.ReadFile mock is already set by Set") + } + + if mmReadFile.defaultExpectation == nil { + mmReadFile.defaultExpectation = &IDriveServiceMockReadFileExpectation{mock: mmReadFile.mock} + } + mmReadFile.defaultExpectation.results = &IDriveServiceMockReadFileResults{fp1, sp1, err} + mmReadFile.defaultExpectation.returnOrigin = minimock.CallerInfo(1) + return mmReadFile.mock +} + +// Set uses given function f to mock the IDriveService.ReadFile method +func (mmReadFile *mIDriveServiceMockReadFile) Set(f func(fileUID string) (fp1 *drive.File, sp1 *string, err error)) *IDriveServiceMock { + if mmReadFile.defaultExpectation != nil { + mmReadFile.mock.t.Fatalf("Default expectation is already set for the IDriveService.ReadFile method") + } + + if len(mmReadFile.expectations) > 0 { + mmReadFile.mock.t.Fatalf("Some expectations are already set for the IDriveService.ReadFile method") + } + + mmReadFile.mock.funcReadFile = f + mmReadFile.mock.funcReadFileOrigin = minimock.CallerInfo(1) + return mmReadFile.mock +} + +// When sets expectation for the IDriveService.ReadFile which will trigger the result defined by the following +// Then helper +func (mmReadFile *mIDriveServiceMockReadFile) When(fileUID string) *IDriveServiceMockReadFileExpectation { + if mmReadFile.mock.funcReadFile != nil { + mmReadFile.mock.t.Fatalf("IDriveServiceMock.ReadFile mock is already set by Set") + } + + expectation := &IDriveServiceMockReadFileExpectation{ + mock: mmReadFile.mock, + params: &IDriveServiceMockReadFileParams{fileUID}, + expectationOrigins: IDriveServiceMockReadFileExpectationOrigins{origin: minimock.CallerInfo(1)}, + } + mmReadFile.expectations = append(mmReadFile.expectations, expectation) + return expectation +} + +// Then sets up IDriveService.ReadFile return parameters for the expectation previously defined by the When method +func (e *IDriveServiceMockReadFileExpectation) Then(fp1 *drive.File, sp1 *string, err error) *IDriveServiceMock { + e.results = &IDriveServiceMockReadFileResults{fp1, sp1, err} + return e.mock +} + +// Times sets number of times IDriveService.ReadFile should be invoked +func (mmReadFile *mIDriveServiceMockReadFile) Times(n uint64) *mIDriveServiceMockReadFile { + if n == 0 { + mmReadFile.mock.t.Fatalf("Times of IDriveServiceMock.ReadFile mock can not be zero") + } + mm_atomic.StoreUint64(&mmReadFile.expectedInvocations, n) + mmReadFile.expectedInvocationsOrigin = minimock.CallerInfo(1) + return mmReadFile +} + +func (mmReadFile *mIDriveServiceMockReadFile) invocationsDone() bool { + if len(mmReadFile.expectations) == 0 && mmReadFile.defaultExpectation == nil && mmReadFile.mock.funcReadFile == nil { + return true + } + + totalInvocations := mm_atomic.LoadUint64(&mmReadFile.mock.afterReadFileCounter) + expectedInvocations := mm_atomic.LoadUint64(&mmReadFile.expectedInvocations) + + return totalInvocations > 0 && (expectedInvocations == 0 || expectedInvocations == totalInvocations) +} + +// ReadFile implements mm_googledriveclient.IDriveService +func (mmReadFile *IDriveServiceMock) ReadFile(fileUID string) (fp1 *drive.File, sp1 *string, err error) { + mm_atomic.AddUint64(&mmReadFile.beforeReadFileCounter, 1) + defer mm_atomic.AddUint64(&mmReadFile.afterReadFileCounter, 1) + + mmReadFile.t.Helper() + + if mmReadFile.inspectFuncReadFile != nil { + mmReadFile.inspectFuncReadFile(fileUID) + } + + mm_params := IDriveServiceMockReadFileParams{fileUID} + + // Record call args + mmReadFile.ReadFileMock.mutex.Lock() + mmReadFile.ReadFileMock.callArgs = append(mmReadFile.ReadFileMock.callArgs, &mm_params) + mmReadFile.ReadFileMock.mutex.Unlock() + + for _, e := range mmReadFile.ReadFileMock.expectations { + if minimock.Equal(*e.params, mm_params) { + mm_atomic.AddUint64(&e.Counter, 1) + return e.results.fp1, e.results.sp1, e.results.err + } + } + + if mmReadFile.ReadFileMock.defaultExpectation != nil { + mm_atomic.AddUint64(&mmReadFile.ReadFileMock.defaultExpectation.Counter, 1) + mm_want := mmReadFile.ReadFileMock.defaultExpectation.params + mm_want_ptrs := mmReadFile.ReadFileMock.defaultExpectation.paramPtrs + + mm_got := IDriveServiceMockReadFileParams{fileUID} + + if mm_want_ptrs != nil { + + if mm_want_ptrs.fileUID != nil && !minimock.Equal(*mm_want_ptrs.fileUID, mm_got.fileUID) { + mmReadFile.t.Errorf("IDriveServiceMock.ReadFile got unexpected parameter fileUID, expected at\n%s:\nwant: %#v\n got: %#v%s\n", + mmReadFile.ReadFileMock.defaultExpectation.expectationOrigins.originFileUID, *mm_want_ptrs.fileUID, mm_got.fileUID, minimock.Diff(*mm_want_ptrs.fileUID, mm_got.fileUID)) + } + + } else if mm_want != nil && !minimock.Equal(*mm_want, mm_got) { + mmReadFile.t.Errorf("IDriveServiceMock.ReadFile got unexpected parameters, expected at\n%s:\nwant: %#v\n got: %#v%s\n", + mmReadFile.ReadFileMock.defaultExpectation.expectationOrigins.origin, *mm_want, mm_got, minimock.Diff(*mm_want, mm_got)) + } + + mm_results := mmReadFile.ReadFileMock.defaultExpectation.results + if mm_results == nil { + mmReadFile.t.Fatal("No results are set for the IDriveServiceMock.ReadFile") + } + return (*mm_results).fp1, (*mm_results).sp1, (*mm_results).err + } + if mmReadFile.funcReadFile != nil { + return mmReadFile.funcReadFile(fileUID) + } + mmReadFile.t.Fatalf("Unexpected call to IDriveServiceMock.ReadFile. %v", fileUID) + return +} + +// ReadFileAfterCounter returns a count of finished IDriveServiceMock.ReadFile invocations +func (mmReadFile *IDriveServiceMock) ReadFileAfterCounter() uint64 { + return mm_atomic.LoadUint64(&mmReadFile.afterReadFileCounter) +} + +// ReadFileBeforeCounter returns a count of IDriveServiceMock.ReadFile invocations +func (mmReadFile *IDriveServiceMock) ReadFileBeforeCounter() uint64 { + return mm_atomic.LoadUint64(&mmReadFile.beforeReadFileCounter) +} + +// Calls returns a list of arguments used in each call to IDriveServiceMock.ReadFile. +// The list is in the same order as the calls were made (i.e. recent calls have a higher index) +func (mmReadFile *mIDriveServiceMockReadFile) Calls() []*IDriveServiceMockReadFileParams { + mmReadFile.mutex.RLock() + + argCopy := make([]*IDriveServiceMockReadFileParams, len(mmReadFile.callArgs)) + copy(argCopy, mmReadFile.callArgs) + + mmReadFile.mutex.RUnlock() + + return argCopy +} + +// MinimockReadFileDone returns true if the count of the ReadFile invocations corresponds +// the number of defined expectations +func (m *IDriveServiceMock) MinimockReadFileDone() bool { + if m.ReadFileMock.optional { + // Optional methods provide '0 or more' call count restriction. + return true + } + + for _, e := range m.ReadFileMock.expectations { + if mm_atomic.LoadUint64(&e.Counter) < 1 { + return false + } + } + + return m.ReadFileMock.invocationsDone() +} + +// MinimockReadFileInspect logs each unmet expectation +func (m *IDriveServiceMock) MinimockReadFileInspect() { + for _, e := range m.ReadFileMock.expectations { + if mm_atomic.LoadUint64(&e.Counter) < 1 { + m.t.Errorf("Expected call to IDriveServiceMock.ReadFile at\n%s with params: %#v", e.expectationOrigins.origin, *e.params) + } + } + + afterReadFileCounter := mm_atomic.LoadUint64(&m.afterReadFileCounter) + // if default expectation was set then invocations count should be greater than zero + if m.ReadFileMock.defaultExpectation != nil && afterReadFileCounter < 1 { + if m.ReadFileMock.defaultExpectation.params == nil { + m.t.Errorf("Expected call to IDriveServiceMock.ReadFile at\n%s", m.ReadFileMock.defaultExpectation.returnOrigin) + } else { + m.t.Errorf("Expected call to IDriveServiceMock.ReadFile at\n%s with params: %#v", m.ReadFileMock.defaultExpectation.expectationOrigins.origin, *m.ReadFileMock.defaultExpectation.params) + } + } + // if func was set then invocations count should be greater than zero + if m.funcReadFile != nil && afterReadFileCounter < 1 { + m.t.Errorf("Expected call to IDriveServiceMock.ReadFile at\n%s", m.funcReadFileOrigin) + } + + if !m.ReadFileMock.invocationsDone() && afterReadFileCounter > 0 { + m.t.Errorf("Expected %d calls to IDriveServiceMock.ReadFile at\n%s but found %d calls", + mm_atomic.LoadUint64(&m.ReadFileMock.expectedInvocations), m.ReadFileMock.expectedInvocationsOrigin, afterReadFileCounter) + } +} + +type mIDriveServiceMockReadFolder struct { + optional bool + mock *IDriveServiceMock + defaultExpectation *IDriveServiceMockReadFolderExpectation + expectations []*IDriveServiceMockReadFolderExpectation + + callArgs []*IDriveServiceMockReadFolderParams + mutex sync.RWMutex + + expectedInvocations uint64 + expectedInvocationsOrigin string +} + +// IDriveServiceMockReadFolderExpectation specifies expectation struct of the IDriveService.ReadFolder +type IDriveServiceMockReadFolderExpectation struct { + mock *IDriveServiceMock + params *IDriveServiceMockReadFolderParams + paramPtrs *IDriveServiceMockReadFolderParamPtrs + expectationOrigins IDriveServiceMockReadFolderExpectationOrigins + results *IDriveServiceMockReadFolderResults + returnOrigin string + Counter uint64 +} + +// IDriveServiceMockReadFolderParams contains parameters of the IDriveService.ReadFolder +type IDriveServiceMockReadFolderParams struct { + folderUID string + readContent bool +} + +// IDriveServiceMockReadFolderParamPtrs contains pointers to parameters of the IDriveService.ReadFolder +type IDriveServiceMockReadFolderParamPtrs struct { + folderUID *string + readContent *bool +} + +// IDriveServiceMockReadFolderResults contains results of the IDriveService.ReadFolder +type IDriveServiceMockReadFolderResults struct { + fpa1 []*drive.File + spa1 []*string + err error +} + +// IDriveServiceMockReadFolderOrigins contains origins of expectations of the IDriveService.ReadFolder +type IDriveServiceMockReadFolderExpectationOrigins struct { + origin string + originFolderUID string + originReadContent string +} + +// Marks this method to be optional. The default behavior of any method with Return() is '1 or more', meaning +// the test will fail minimock's automatic final call check if the mocked method was not called at least once. +// Optional() makes method check to work in '0 or more' mode. +// It is NOT RECOMMENDED to use this option unless you really need it, as default behaviour helps to +// catch the problems when the expected method call is totally skipped during test run. +func (mmReadFolder *mIDriveServiceMockReadFolder) Optional() *mIDriveServiceMockReadFolder { + mmReadFolder.optional = true + return mmReadFolder +} + +// Expect sets up expected params for IDriveService.ReadFolder +func (mmReadFolder *mIDriveServiceMockReadFolder) Expect(folderUID string, readContent bool) *mIDriveServiceMockReadFolder { + if mmReadFolder.mock.funcReadFolder != nil { + mmReadFolder.mock.t.Fatalf("IDriveServiceMock.ReadFolder mock is already set by Set") + } + + if mmReadFolder.defaultExpectation == nil { + mmReadFolder.defaultExpectation = &IDriveServiceMockReadFolderExpectation{} + } + + if mmReadFolder.defaultExpectation.paramPtrs != nil { + mmReadFolder.mock.t.Fatalf("IDriveServiceMock.ReadFolder mock is already set by ExpectParams functions") + } + + mmReadFolder.defaultExpectation.params = &IDriveServiceMockReadFolderParams{folderUID, readContent} + mmReadFolder.defaultExpectation.expectationOrigins.origin = minimock.CallerInfo(1) + for _, e := range mmReadFolder.expectations { + if minimock.Equal(e.params, mmReadFolder.defaultExpectation.params) { + mmReadFolder.mock.t.Fatalf("Expectation set by When has same params: %#v", *mmReadFolder.defaultExpectation.params) + } + } + + return mmReadFolder +} + +// ExpectFolderUIDParam1 sets up expected param folderUID for IDriveService.ReadFolder +func (mmReadFolder *mIDriveServiceMockReadFolder) ExpectFolderUIDParam1(folderUID string) *mIDriveServiceMockReadFolder { + if mmReadFolder.mock.funcReadFolder != nil { + mmReadFolder.mock.t.Fatalf("IDriveServiceMock.ReadFolder mock is already set by Set") + } + + if mmReadFolder.defaultExpectation == nil { + mmReadFolder.defaultExpectation = &IDriveServiceMockReadFolderExpectation{} + } + + if mmReadFolder.defaultExpectation.params != nil { + mmReadFolder.mock.t.Fatalf("IDriveServiceMock.ReadFolder mock is already set by Expect") + } + + if mmReadFolder.defaultExpectation.paramPtrs == nil { + mmReadFolder.defaultExpectation.paramPtrs = &IDriveServiceMockReadFolderParamPtrs{} + } + mmReadFolder.defaultExpectation.paramPtrs.folderUID = &folderUID + mmReadFolder.defaultExpectation.expectationOrigins.originFolderUID = minimock.CallerInfo(1) + + return mmReadFolder +} + +// ExpectReadContentParam2 sets up expected param readContent for IDriveService.ReadFolder +func (mmReadFolder *mIDriveServiceMockReadFolder) ExpectReadContentParam2(readContent bool) *mIDriveServiceMockReadFolder { + if mmReadFolder.mock.funcReadFolder != nil { + mmReadFolder.mock.t.Fatalf("IDriveServiceMock.ReadFolder mock is already set by Set") + } + + if mmReadFolder.defaultExpectation == nil { + mmReadFolder.defaultExpectation = &IDriveServiceMockReadFolderExpectation{} + } + + if mmReadFolder.defaultExpectation.params != nil { + mmReadFolder.mock.t.Fatalf("IDriveServiceMock.ReadFolder mock is already set by Expect") + } + + if mmReadFolder.defaultExpectation.paramPtrs == nil { + mmReadFolder.defaultExpectation.paramPtrs = &IDriveServiceMockReadFolderParamPtrs{} + } + mmReadFolder.defaultExpectation.paramPtrs.readContent = &readContent + mmReadFolder.defaultExpectation.expectationOrigins.originReadContent = minimock.CallerInfo(1) + + return mmReadFolder +} + +// Inspect accepts an inspector function that has same arguments as the IDriveService.ReadFolder +func (mmReadFolder *mIDriveServiceMockReadFolder) Inspect(f func(folderUID string, readContent bool)) *mIDriveServiceMockReadFolder { + if mmReadFolder.mock.inspectFuncReadFolder != nil { + mmReadFolder.mock.t.Fatalf("Inspect function is already set for IDriveServiceMock.ReadFolder") + } + + mmReadFolder.mock.inspectFuncReadFolder = f + + return mmReadFolder +} + +// Return sets up results that will be returned by IDriveService.ReadFolder +func (mmReadFolder *mIDriveServiceMockReadFolder) Return(fpa1 []*drive.File, spa1 []*string, err error) *IDriveServiceMock { + if mmReadFolder.mock.funcReadFolder != nil { + mmReadFolder.mock.t.Fatalf("IDriveServiceMock.ReadFolder mock is already set by Set") + } + + if mmReadFolder.defaultExpectation == nil { + mmReadFolder.defaultExpectation = &IDriveServiceMockReadFolderExpectation{mock: mmReadFolder.mock} + } + mmReadFolder.defaultExpectation.results = &IDriveServiceMockReadFolderResults{fpa1, spa1, err} + mmReadFolder.defaultExpectation.returnOrigin = minimock.CallerInfo(1) + return mmReadFolder.mock +} + +// Set uses given function f to mock the IDriveService.ReadFolder method +func (mmReadFolder *mIDriveServiceMockReadFolder) Set(f func(folderUID string, readContent bool) (fpa1 []*drive.File, spa1 []*string, err error)) *IDriveServiceMock { + if mmReadFolder.defaultExpectation != nil { + mmReadFolder.mock.t.Fatalf("Default expectation is already set for the IDriveService.ReadFolder method") + } + + if len(mmReadFolder.expectations) > 0 { + mmReadFolder.mock.t.Fatalf("Some expectations are already set for the IDriveService.ReadFolder method") + } + + mmReadFolder.mock.funcReadFolder = f + mmReadFolder.mock.funcReadFolderOrigin = minimock.CallerInfo(1) + return mmReadFolder.mock +} + +// When sets expectation for the IDriveService.ReadFolder which will trigger the result defined by the following +// Then helper +func (mmReadFolder *mIDriveServiceMockReadFolder) When(folderUID string, readContent bool) *IDriveServiceMockReadFolderExpectation { + if mmReadFolder.mock.funcReadFolder != nil { + mmReadFolder.mock.t.Fatalf("IDriveServiceMock.ReadFolder mock is already set by Set") + } + + expectation := &IDriveServiceMockReadFolderExpectation{ + mock: mmReadFolder.mock, + params: &IDriveServiceMockReadFolderParams{folderUID, readContent}, + expectationOrigins: IDriveServiceMockReadFolderExpectationOrigins{origin: minimock.CallerInfo(1)}, + } + mmReadFolder.expectations = append(mmReadFolder.expectations, expectation) + return expectation +} + +// Then sets up IDriveService.ReadFolder return parameters for the expectation previously defined by the When method +func (e *IDriveServiceMockReadFolderExpectation) Then(fpa1 []*drive.File, spa1 []*string, err error) *IDriveServiceMock { + e.results = &IDriveServiceMockReadFolderResults{fpa1, spa1, err} + return e.mock +} + +// Times sets number of times IDriveService.ReadFolder should be invoked +func (mmReadFolder *mIDriveServiceMockReadFolder) Times(n uint64) *mIDriveServiceMockReadFolder { + if n == 0 { + mmReadFolder.mock.t.Fatalf("Times of IDriveServiceMock.ReadFolder mock can not be zero") + } + mm_atomic.StoreUint64(&mmReadFolder.expectedInvocations, n) + mmReadFolder.expectedInvocationsOrigin = minimock.CallerInfo(1) + return mmReadFolder +} + +func (mmReadFolder *mIDriveServiceMockReadFolder) invocationsDone() bool { + if len(mmReadFolder.expectations) == 0 && mmReadFolder.defaultExpectation == nil && mmReadFolder.mock.funcReadFolder == nil { + return true + } + + totalInvocations := mm_atomic.LoadUint64(&mmReadFolder.mock.afterReadFolderCounter) + expectedInvocations := mm_atomic.LoadUint64(&mmReadFolder.expectedInvocations) + + return totalInvocations > 0 && (expectedInvocations == 0 || expectedInvocations == totalInvocations) +} + +// ReadFolder implements mm_googledriveclient.IDriveService +func (mmReadFolder *IDriveServiceMock) ReadFolder(folderUID string, readContent bool) (fpa1 []*drive.File, spa1 []*string, err error) { + mm_atomic.AddUint64(&mmReadFolder.beforeReadFolderCounter, 1) + defer mm_atomic.AddUint64(&mmReadFolder.afterReadFolderCounter, 1) + + mmReadFolder.t.Helper() + + if mmReadFolder.inspectFuncReadFolder != nil { + mmReadFolder.inspectFuncReadFolder(folderUID, readContent) + } + + mm_params := IDriveServiceMockReadFolderParams{folderUID, readContent} + + // Record call args + mmReadFolder.ReadFolderMock.mutex.Lock() + mmReadFolder.ReadFolderMock.callArgs = append(mmReadFolder.ReadFolderMock.callArgs, &mm_params) + mmReadFolder.ReadFolderMock.mutex.Unlock() + + for _, e := range mmReadFolder.ReadFolderMock.expectations { + if minimock.Equal(*e.params, mm_params) { + mm_atomic.AddUint64(&e.Counter, 1) + return e.results.fpa1, e.results.spa1, e.results.err + } + } + + if mmReadFolder.ReadFolderMock.defaultExpectation != nil { + mm_atomic.AddUint64(&mmReadFolder.ReadFolderMock.defaultExpectation.Counter, 1) + mm_want := mmReadFolder.ReadFolderMock.defaultExpectation.params + mm_want_ptrs := mmReadFolder.ReadFolderMock.defaultExpectation.paramPtrs + + mm_got := IDriveServiceMockReadFolderParams{folderUID, readContent} + + if mm_want_ptrs != nil { + + if mm_want_ptrs.folderUID != nil && !minimock.Equal(*mm_want_ptrs.folderUID, mm_got.folderUID) { + mmReadFolder.t.Errorf("IDriveServiceMock.ReadFolder got unexpected parameter folderUID, expected at\n%s:\nwant: %#v\n got: %#v%s\n", + mmReadFolder.ReadFolderMock.defaultExpectation.expectationOrigins.originFolderUID, *mm_want_ptrs.folderUID, mm_got.folderUID, minimock.Diff(*mm_want_ptrs.folderUID, mm_got.folderUID)) + } + + if mm_want_ptrs.readContent != nil && !minimock.Equal(*mm_want_ptrs.readContent, mm_got.readContent) { + mmReadFolder.t.Errorf("IDriveServiceMock.ReadFolder got unexpected parameter readContent, expected at\n%s:\nwant: %#v\n got: %#v%s\n", + mmReadFolder.ReadFolderMock.defaultExpectation.expectationOrigins.originReadContent, *mm_want_ptrs.readContent, mm_got.readContent, minimock.Diff(*mm_want_ptrs.readContent, mm_got.readContent)) + } + + } else if mm_want != nil && !minimock.Equal(*mm_want, mm_got) { + mmReadFolder.t.Errorf("IDriveServiceMock.ReadFolder got unexpected parameters, expected at\n%s:\nwant: %#v\n got: %#v%s\n", + mmReadFolder.ReadFolderMock.defaultExpectation.expectationOrigins.origin, *mm_want, mm_got, minimock.Diff(*mm_want, mm_got)) + } + + mm_results := mmReadFolder.ReadFolderMock.defaultExpectation.results + if mm_results == nil { + mmReadFolder.t.Fatal("No results are set for the IDriveServiceMock.ReadFolder") + } + return (*mm_results).fpa1, (*mm_results).spa1, (*mm_results).err + } + if mmReadFolder.funcReadFolder != nil { + return mmReadFolder.funcReadFolder(folderUID, readContent) + } + mmReadFolder.t.Fatalf("Unexpected call to IDriveServiceMock.ReadFolder. %v %v", folderUID, readContent) + return +} + +// ReadFolderAfterCounter returns a count of finished IDriveServiceMock.ReadFolder invocations +func (mmReadFolder *IDriveServiceMock) ReadFolderAfterCounter() uint64 { + return mm_atomic.LoadUint64(&mmReadFolder.afterReadFolderCounter) +} + +// ReadFolderBeforeCounter returns a count of IDriveServiceMock.ReadFolder invocations +func (mmReadFolder *IDriveServiceMock) ReadFolderBeforeCounter() uint64 { + return mm_atomic.LoadUint64(&mmReadFolder.beforeReadFolderCounter) +} + +// Calls returns a list of arguments used in each call to IDriveServiceMock.ReadFolder. +// The list is in the same order as the calls were made (i.e. recent calls have a higher index) +func (mmReadFolder *mIDriveServiceMockReadFolder) Calls() []*IDriveServiceMockReadFolderParams { + mmReadFolder.mutex.RLock() + + argCopy := make([]*IDriveServiceMockReadFolderParams, len(mmReadFolder.callArgs)) + copy(argCopy, mmReadFolder.callArgs) + + mmReadFolder.mutex.RUnlock() + + return argCopy +} + +// MinimockReadFolderDone returns true if the count of the ReadFolder invocations corresponds +// the number of defined expectations +func (m *IDriveServiceMock) MinimockReadFolderDone() bool { + if m.ReadFolderMock.optional { + // Optional methods provide '0 or more' call count restriction. + return true + } + + for _, e := range m.ReadFolderMock.expectations { + if mm_atomic.LoadUint64(&e.Counter) < 1 { + return false + } + } + + return m.ReadFolderMock.invocationsDone() +} + +// MinimockReadFolderInspect logs each unmet expectation +func (m *IDriveServiceMock) MinimockReadFolderInspect() { + for _, e := range m.ReadFolderMock.expectations { + if mm_atomic.LoadUint64(&e.Counter) < 1 { + m.t.Errorf("Expected call to IDriveServiceMock.ReadFolder at\n%s with params: %#v", e.expectationOrigins.origin, *e.params) + } + } + + afterReadFolderCounter := mm_atomic.LoadUint64(&m.afterReadFolderCounter) + // if default expectation was set then invocations count should be greater than zero + if m.ReadFolderMock.defaultExpectation != nil && afterReadFolderCounter < 1 { + if m.ReadFolderMock.defaultExpectation.params == nil { + m.t.Errorf("Expected call to IDriveServiceMock.ReadFolder at\n%s", m.ReadFolderMock.defaultExpectation.returnOrigin) + } else { + m.t.Errorf("Expected call to IDriveServiceMock.ReadFolder at\n%s with params: %#v", m.ReadFolderMock.defaultExpectation.expectationOrigins.origin, *m.ReadFolderMock.defaultExpectation.params) + } + } + // if func was set then invocations count should be greater than zero + if m.funcReadFolder != nil && afterReadFolderCounter < 1 { + m.t.Errorf("Expected call to IDriveServiceMock.ReadFolder at\n%s", m.funcReadFolderOrigin) + } + + if !m.ReadFolderMock.invocationsDone() && afterReadFolderCounter > 0 { + m.t.Errorf("Expected %d calls to IDriveServiceMock.ReadFolder at\n%s but found %d calls", + mm_atomic.LoadUint64(&m.ReadFolderMock.expectedInvocations), m.ReadFolderMock.expectedInvocationsOrigin, afterReadFolderCounter) + } +} + +// MinimockFinish checks that all mocked methods have been called the expected number of times +func (m *IDriveServiceMock) MinimockFinish() { + m.finishOnce.Do(func() { + if !m.minimockDone() { + m.MinimockReadFileInspect() + + m.MinimockReadFolderInspect() + } + }) +} + +// MinimockWait waits for all mocked methods to be called the expected number of times +func (m *IDriveServiceMock) MinimockWait(timeout mm_time.Duration) { + timeoutCh := mm_time.After(timeout) + for { + if m.minimockDone() { + return + } + select { + case <-timeoutCh: + m.MinimockFinish() + return + case <-mm_time.After(10 * mm_time.Millisecond): + } + } +} + +func (m *IDriveServiceMock) minimockDone() bool { + done := true + return done && + m.MinimockReadFileDone() && + m.MinimockReadFolderDone() +} diff --git a/pkg/component/data/googledrive/v0/drive_service.go b/pkg/component/internal/util/googledriveclient/googledriveclient.go similarity index 54% rename from pkg/component/data/googledrive/v0/drive_service.go rename to pkg/component/internal/util/googledriveclient/googledriveclient.go index 803b09364..77ae10563 100644 --- a/pkg/component/data/googledrive/v0/drive_service.go +++ b/pkg/component/internal/util/googledriveclient/googledriveclient.go @@ -1,4 +1,4 @@ -package googledrive +package googledriveclient import ( "encoding/base64" @@ -9,18 +9,22 @@ import ( "google.golang.org/api/drive/v3" ) +// IDriveService is an interface for interacting with Google Drive. type IDriveService interface { - readFile(fileUID string) (*file, error) - readFolder(folderUID string, readContent bool) ([]*file, error) + ReadFile(fileUID string) (*drive.File, *string, error) + ReadFolder(folderUID string, readContent bool) ([]*drive.File, []*string, error) } -type driveService struct { - service *drive.Service +// DriveService is a struct that implements IDriveService. +type DriveService struct { + // Service is the Google Drive service. + Service *drive.Service } -func (d *driveService) readFile(fileUID string) (*file, error) { +// ReadFile reads a file from Google Drive and get the file content passed as base64. +func (d *DriveService) ReadFile(fileUID string) (*drive.File, *string, error) { - srv := d.service + srv := d.Service driveFile, err := srv.Files.Get(fileUID). // We will need to confirm if we want to support all drives. @@ -30,24 +34,21 @@ func (d *driveService) readFile(fileUID string) (*file, error) { Do() if err != nil { - return nil, fmt.Errorf("fetch fetch metadata of file: %w", err) + return nil, nil, fmt.Errorf("fetch fetch metadata of file: %w", err) } - file := convertDriveFileToComponentFile(driveFile) - base64Content, err := readFileContent(srv, driveFile) if err != nil { - return nil, fmt.Errorf("read file content: %w", err) + return nil, nil, fmt.Errorf("read file content: %w", err) } - file.Content = base64Content - - return file, nil + return driveFile, &base64Content, nil } -func (d *driveService) readFolder(folderUID string, readContent bool) ([]*file, error) { - srv := d.service +// ReadFolder reads a folder from Google Drive and get the files in the folder. If readContent is true, the file content will be passed as base64. +func (d *DriveService) ReadFolder(folderUID string, readContent bool) ([]*drive.File, []*string, error) { + srv := d.Service q := fmt.Sprintf("'%s' in parents", folderUID) @@ -67,7 +68,7 @@ func (d *driveService) readFolder(folderUID string, readContent bool) ([]*file, Do() if err != nil { - return nil, fmt.Errorf("fetch metadata of files: %w", err) + return nil, nil, fmt.Errorf("fetch metadata of files: %w", err) } allFiles = append(allFiles, fileList.Files...) @@ -79,49 +80,30 @@ func (d *driveService) readFolder(folderUID string, readContent bool) ([]*file, } } - files := make([]*file, 0, len(allFiles)) + files := make([]*drive.File, 0, len(allFiles)) - for _, f := range allFiles { - file := convertDriveFileToComponentFile(f) + for i, f := range allFiles { + files[i] = f + } + + if !readContent { + return files, nil, nil + } - if readContent { - base64Content, err := readFileContent(srv, f) + contents := make([]*string, 0, len(allFiles)) - if err != nil { - return nil, fmt.Errorf("read file content: %w", err) - } + for i, f := range allFiles { + content, err := readFileContent(srv, f) - file.Content = base64Content + if err != nil { + return nil, nil, fmt.Errorf("read file content: %w", err) } - files = append(files, file) + contents[i] = &content } - return files, nil - -} - -func convertDriveFileToComponentFile(driveFile *drive.File) *file { - // Google Drive API only can support downloading the binary data. - // So, when the file is not binary, we need to export the file as PDF/CSV first. - // To make Google Drive Component can seamlessly work with other components, we need to add the file extension to the file name. - fileExtension := exportFileExtension(driveFile.MimeType) - if fileExtension != "" { - driveFile.Name = driveFile.Name + exportFileExtension(driveFile.MimeType) - } + return files, contents, nil - return &file{ - ID: driveFile.Id, - Name: driveFile.Name, - CreatedTime: driveFile.CreatedTime, - ModifiedTime: driveFile.ModifiedTime, - Size: driveFile.Size, - MimeType: driveFile.MimeType, - Md5Checksum: driveFile.Md5Checksum, - Version: driveFile.Version, - WebViewLink: driveFile.WebViewLink, - WebContentLink: driveFile.WebContentLink, - } } // Google Drive API only can support downloading the binary data. @@ -168,14 +150,3 @@ func exportFormat(file *drive.File) string { return "" } } - -func exportFileExtension(mimeType string) string { - switch mimeType { - case "application/vnd.google-apps.spreadsheet": - return ".csv" - case "application/vnd.google-apps.presentation", "application/vnd.google-apps.document": - return ".pdf" - default: - return "" - } -} From eb062d2ab4ce981431cc256bd801c68f5818f6e9 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 22 Oct 2024 18:32:38 +0100 Subject: [PATCH 14/28] feat(googledrive): modify the interface --- pkg/component/data/googledrive/v0/main.go | 5 +- .../data/googledrive/v0/read_operation.go | 63 +++++++++++++++++-- 2 files changed, 61 insertions(+), 7 deletions(-) diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go index db2774523..a1486185b 100644 --- a/pkg/component/data/googledrive/v0/main.go +++ b/pkg/component/data/googledrive/v0/main.go @@ -17,6 +17,7 @@ import ( "google.golang.org/protobuf/types/known/structpb" "github.com/instill-ai/pipeline-backend/pkg/component/base" + "github.com/instill-ai/pipeline-backend/pkg/component/internal/util/googledriveclient" "github.com/instill-ai/x/errmsg" ) @@ -48,7 +49,7 @@ type execution struct { base.ComponentExecution execute func(*structpb.Struct, *base.Job, context.Context) (*structpb.Struct, error) - service IDriveService + service googledriveclient.IDriveService } // Init returns an implementation of IComponent that interacts with Google Drive. @@ -78,7 +79,7 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, e := &execution{ ComponentExecution: x, - service: &driveService{service: drive}, + service: &googledriveclient.DriveService{Service: drive}, } switch x.Task { diff --git a/pkg/component/data/googledrive/v0/read_operation.go b/pkg/component/data/googledrive/v0/read_operation.go index 72e31955f..c01ae51e2 100644 --- a/pkg/component/data/googledrive/v0/read_operation.go +++ b/pkg/component/data/googledrive/v0/read_operation.go @@ -5,6 +5,7 @@ import ( "fmt" "strings" + "google.golang.org/api/drive/v3" "google.golang.org/protobuf/types/known/structpb" "github.com/instill-ai/pipeline-backend/pkg/component/base" @@ -48,12 +49,15 @@ func (e *execution) readFile(input *structpb.Struct, job *base.Job, ctx context. return nil, fmt.Errorf("extract UID from Google Drive link: %w", err) } - file, err := e.service.readFile(fileUID) + driveFile, content, err := e.service.ReadFile(fileUID) if err != nil { return nil, fmt.Errorf("read file from Google Drive: %w", err) } + file := convertDriveFileToComponentFile(driveFile) + file.Content = *content + output := readFileOutput{ File: *file, } @@ -91,12 +95,22 @@ func (e *execution) readFolder(input *structpb.Struct, job *base.Job, ctx contex return nil, fmt.Errorf("extract UID from Google Drive link: %w", err) } - files, err := e.service.readFolder(folderUID, inputStruct.ReadContent) + driveFiles, contents, err := e.service.ReadFolder(folderUID, inputStruct.ReadContent) if err != nil { return nil, fmt.Errorf("read folder from Google Drive: %w", err) } + files := make([]*file, len(driveFiles)) + + for i, driveFile := range driveFiles { + file := convertDriveFileToComponentFile(driveFile) + if inputStruct.ReadContent { + file.Content = *contents[i] + } + files[i] = file + } + output := readFolderOutput{ Files: files, } @@ -120,15 +134,13 @@ func (e *execution) readFolder(input *structpb.Struct, job *base.Job, ctx contex // So, it means the Google Form, Google Map and other types of links are not supported func extractUIDFromSharedLink(driveLink string) (string, error) { patterns := map[string]string{ - "file": "/file/d/", "folder": "/drive/folders/", + "file": "/file/d/", "spreadsheet": "/spreadsheets/d/", "document": "/document/d/", "presentation": "/presentation/d/", - "colab": "colab.research.google.com/drive/", } - // Iterate over the patterns to find a match for _, pattern := range patterns { if strings.Contains(driveLink, pattern) { parts := strings.Split(driveLink, pattern) @@ -145,3 +157,44 @@ func extractUIDFromSharedLink(driveLink string) (string, error) { return "", fmt.Errorf("unrecognized Google Drive link format") } + +func convertDriveFileToComponentFile(driveFile *drive.File) *file { + // Google Drive API only can support downloading the binary data. + // So, when the file is not binary, we need to export the file as PDF/CSV first. + // To make Google Drive Component can seamlessly work with other components, we need to add the file extension to the file name. + fileExtension := exportFileExtension(driveFile.MimeType) + if fileExtension != "" { + driveFile.Name = addFileExtension(driveFile.Name, fileExtension) + } + + return &file{ + ID: driveFile.Id, + Name: driveFile.Name, + CreatedTime: driveFile.CreatedTime, + ModifiedTime: driveFile.ModifiedTime, + Size: driveFile.Size, + MimeType: driveFile.MimeType, + Md5Checksum: driveFile.Md5Checksum, + Version: driveFile.Version, + WebViewLink: driveFile.WebViewLink, + WebContentLink: driveFile.WebContentLink, + } +} + +func exportFileExtension(mimeType string) string { + switch mimeType { + case "application/vnd.google-apps.spreadsheet": + return ".csv" + case "application/vnd.google-apps.presentation", "application/vnd.google-apps.document": + return ".pdf" + default: + return "" + } +} + +func addFileExtension(fileName, Extension string) string { + if !strings.HasSuffix(fileName, Extension) { + return fileName + Extension + } + return fileName +} From 3f83053f91118ff3290ebc3dfb3e07ee1748be15 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 22 Oct 2024 18:37:14 +0100 Subject: [PATCH 15/28] feat(googledrive): add test code --- .../data/googledrive/v0/main_test.go | 502 ++++++++++++++++++ .../googledrive/v0/testdata/credentials.json | 13 + .../googledriveclient/googledriveclient.go | 10 +- 3 files changed, 517 insertions(+), 8 deletions(-) create mode 100644 pkg/component/data/googledrive/v0/main_test.go create mode 100644 pkg/component/data/googledrive/v0/testdata/credentials.json diff --git a/pkg/component/data/googledrive/v0/main_test.go b/pkg/component/data/googledrive/v0/main_test.go new file mode 100644 index 000000000..7000a8f48 --- /dev/null +++ b/pkg/component/data/googledrive/v0/main_test.go @@ -0,0 +1,502 @@ +package googledrive + +import ( + "context" + "encoding/base64" + "fmt" + "os" + "testing" + + qt "github.com/frankban/quicktest" + "github.com/gojuno/minimock/v3" + "google.golang.org/api/drive/v3" + "google.golang.org/protobuf/types/known/structpb" + + "github.com/instill-ai/pipeline-backend/pkg/component/base" + "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" +) + +var ( + fakeID = "1SwuLagPDCuk04_EIV1qj_pzSaVX3ddEA" + + sharedSheetLink = fmt.Sprintf("https://drive.google.com/file/d/%s/view?usp=drivesdk", fakeID) + webViewSheetLink = fmt.Sprintf("https://docs.google.com/spreadsheets/d/%s/edit?usp=sharing", fakeID) + webContentSheetLink = fmt.Sprintf("https://drive.google.com/uc?id=%s&export=download", fakeID) + + sharedDocLink = fmt.Sprintf("https://docs.google.com/document/d/%s/edit?usp=drivesdk", fakeID) + webViewDocLink = fmt.Sprintf("https://docs.google.com/document/d/%s/edit?usp=drivesdk", fakeID) + webContentDocLink = "" + + sharedSlideLink = fmt.Sprintf("https://docs.google.com/presentation/d/%s/edit?usp=drivesdk", fakeID) + webViewSlideLink = fmt.Sprintf("https://docs.google.com/presentation/d/%s/edit?usp=drivesdk", fakeID) + webContentSlideLink = "" + + sharedFileLink = fmt.Sprintf("https://drive.google.com/file/d/%s/view?usp=drivesdk", fakeID) + webViewFileLink = fmt.Sprintf("https://drive.google.com/file/d/%s/view?usp=drivesdk", fakeID) + webContentFileLink = fmt.Sprintf("https://drive.google.com/uc?id=%s&export=download", fakeID) +) + +func Test_Execute_ReadFile(t *testing.T) { + + c := qt.New(t) + mc := minimock.NewController(c) + + ctx := context.Background() + + testcases := []struct { + name string + + in map[string]any + fakeDriveFile *drive.File + want map[string]any + }{ + { + name: "ok - read CSV file with file extension", + in: map[string]any{ + "shared-link": sharedSheetLink, + }, + fakeDriveFile: &drive.File{ + Id: fakeID, + Name: "testdata.csv", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "application/vnd.google-apps.spreadsheet", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewSheetLink, + WebContentLink: webContentSheetLink, + }, + want: map[string]any{ + "file": map[string]any{ + "id": fakeID, + "name": "testdata.csv", + "content": "fake content", + "created-time": "2021-08-09T20:25:02.312Z", + "modified-time": "2021-09-17T16:58:37.924Z", + "size": 0, + "mime-type": "application/vnd.google-apps.spreadsheet", + "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", + "version": 0, + "web-view-link": webViewSheetLink, + "web-content-link": webContentSheetLink, + }, + }, + }, + { + name: "ok - read CSV file without file extension", + in: map[string]any{ + "shared-link": sharedSheetLink, + }, + fakeDriveFile: &drive.File{ + Id: fakeID, + Name: "testdata", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "application/vnd.google-apps.spreadsheet", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewSheetLink, + WebContentLink: webContentSheetLink, + }, + want: map[string]any{ + "file": map[string]any{ + "id": fakeID, + "name": "testdata.csv", + "content": "fake content", + "created-time": "2021-08-09T20:25:02.312Z", + "modified-time": "2021-09-17T16:58:37.924Z", + "size": 0, + "mime-type": "application/vnd.google-apps.spreadsheet", + "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", + "version": 0, + "web-view-link": webViewSheetLink, + "web-content-link": webContentSheetLink, + }, + }, + }, + { + name: "ok - read file Google doc file", + in: map[string]any{ + "shared-link": sharedDocLink, + }, + fakeDriveFile: &drive.File{ + Id: fakeID, + Name: "testdata", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "application/vnd.google-apps.document", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewDocLink, + WebContentLink: webContentDocLink, + }, + want: map[string]any{ + "file": map[string]any{ + "id": fakeID, + "name": "testdata.pdf", + "content": "fake content", + "created-time": "2021-08-09T20:25:02.312Z", + "modified-time": "2021-09-17T16:58:37.924Z", + "size": 0, + "mime-type": "application/vnd.google-apps.document", + "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", + "version": 0, + "web-view-link": webViewDocLink, + }, + }, + }, + { + name: "ok - read file Google slide file", + in: map[string]any{ + "shared-link": sharedSlideLink, + }, + fakeDriveFile: &drive.File{ + Id: fakeID, + Name: "testdata", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "application/vnd.google-apps.presentation", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewSlideLink, + WebContentLink: webContentSlideLink, + }, + want: map[string]any{ + "file": map[string]any{ + "id": fakeID, + "name": "testdata.pdf", + "content": "fake content", + "created-time": "2021-08-09T20:25:02.312Z", + "modified-time": "2021-09-17T16:58:37.924Z", + "size": 0, + "mime-type": "application/vnd.google-apps.presentation", + "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", + "version": 0, + "web-view-link": webViewSlideLink, + }, + }, + }, + { + name: "ok - read file", + in: map[string]any{ + "shared-link": sharedFileLink, + }, + fakeDriveFile: &drive.File{ + Id: fakeID, + Name: "testdata.png", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "image/jpeg", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewFileLink, + WebContentLink: webContentFileLink, + }, + want: map[string]any{ + "file": map[string]any{ + "id": fakeID, + "name": "testdata.png", + "content": "fake content", + "created-time": "2021-08-09T20:25:02.312Z", + "modified-time": "2021-09-17T16:58:37.924Z", + "size": 0, + "mime-type": "image/jpeg", + "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", + "version": 0, + "web-view-link": webViewFileLink, + "web-content-link": webContentFileLink, + }, + }, + }, + } + + bc := base.Component{} + component := Init(bc) + + b, err := os.ReadFile("testdata/credentials.json") + + c.Assert(err, qt.IsNil) + + secrets := map[string]interface{}{ + "oauthcredentials": base64.StdEncoding.EncodeToString(b), + } + + component = component.WithOAuthCredentials(secrets) + + setup := map[string]any{ + "refresh-token": "fake-refresh-token", + } + + setupStruct, err := structpb.NewStruct(setup) + + c.Assert(err, qt.IsNil) + + for _, tc := range testcases { + c.Run(tc.name, func(c *qt.C) { + exec, err := component.CreateExecution(base.ComponentExecution{ + Component: component, + Task: taskReadFile, + Setup: setupStruct, + }) + + c.Assert(err, qt.IsNil) + + mockDriveService := mock.NewIDriveServiceMock(mc) + exec.(*execution).service = mockDriveService + + fakeDriveFile := tc.fakeDriveFile + fakeContent := "fake content" + + mockDriveService.ReadFileMock. + Expect(fakeID). + Return(fakeDriveFile, &fakeContent, nil) + + pbIn, err := structpb.NewStruct(tc.in) + c.Assert(err, qt.IsNil) + + ir, ow, eh, job := mock.GenerateMockJob(c) + ir.ReadMock.Return(pbIn, nil) + + ow.WriteMock.Optional().Set(func(ctx context.Context, output *structpb.Struct) (err error) { + gotJSON, err := output.MarshalJSON() + + c.Check(err, qt.IsNil) + c.Check(gotJSON, qt.JSONEquals, tc.want) + + return nil + }) + + eh.ErrorMock.Optional().Set(func(ctx context.Context, err error) { + c.Check(err, qt.IsNil) + }) + + err = exec.Execute(ctx, []*base.Job{job}) + c.Check(err, qt.IsNil) + + }) + + } +} + +func Test_Execute_ReadFolder(t *testing.T) { + c := qt.New(t) + mc := minimock.NewController(c) + + ctx := context.Background() + + testcases := []struct { + name string + + in map[string]any + fakeDriveFiles []*drive.File + fakeContents []*string + want map[string]any + }{ + { + name: "ok - read folder with content", + in: map[string]any{ + "shared-link": sharedSheetLink, + "read-content": true, + }, + fakeDriveFiles: []*drive.File{ + { + Id: fakeID, + Name: "testdata.csv", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "application/vnd.google-apps.spreadsheet", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewSheetLink, + WebContentLink: webContentSheetLink, + }, + }, + fakeContents: []*string{ + stringPointer("fake content"), + }, + want: map[string]any{ + "files": []map[string]any{ + { + "id": fakeID, + "name": "testdata.csv", + "content": "fake content", + "created-time": "2021-08-09T20:25:02.312Z", + "modified-time": "2021-09-17T16:58:37.924Z", + "size": 0, + "mime-type": "application/vnd.google-apps.spreadsheet", + "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", + "version": 0, + "web-view-link": webViewSheetLink, + "web-content-link": webContentSheetLink, + }, + }, + }, + }, + { + name: "ok - read folder without content", + in: map[string]any{ + "shared-link": sharedSheetLink, + "read-content": false, + }, + fakeDriveFiles: []*drive.File{ + { + Id: fakeID, + Name: "testdata.csv", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "application/vnd.google-apps.spreadsheet", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewSheetLink, + WebContentLink: webContentSheetLink, + }, + }, + fakeContents: nil, + want: map[string]any{ + "files": []map[string]any{ + { + "id": fakeID, + "name": "testdata.csv", + "content": "", + "created-time": "2021-08-09T20:25:02.312Z", + "modified-time": "2021-09-17T16:58:37.924Z", + "size": 0, + "mime-type": "application/vnd.google-apps.spreadsheet", + "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", + "version": 0, + "web-view-link": webViewSheetLink, + "web-content-link": webContentSheetLink, + }, + }, + }, + }, + } + + bc := base.Component{} + component := Init(bc) + + b, err := os.ReadFile("testdata/credentials.json") + + c.Assert(err, qt.IsNil) + + secrets := map[string]interface{}{ + "oauthcredentials": base64.StdEncoding.EncodeToString(b), + } + + component = component.WithOAuthCredentials(secrets) + + setup := map[string]any{ + "refresh-token": "fake-refresh-token", + } + + setupStruct, err := structpb.NewStruct(setup) + + c.Assert(err, qt.IsNil) + + for _, tc := range testcases { + c.Run(tc.name, func(c *qt.C) { + + exec, err := component.CreateExecution(base.ComponentExecution{ + Component: component, + Task: taskReadFolder, + Setup: setupStruct, + }) + + c.Assert(err, qt.IsNil) + + mockDriveService := mock.NewIDriveServiceMock(mc) + exec.(*execution).service = mockDriveService + + fakeDriveFiles := tc.fakeDriveFiles + fakeContents := tc.fakeContents + + readContent := tc.in["read-content"].(bool) + + mockDriveService.ReadFolderMock. + Expect(fakeID, readContent). + Return(fakeDriveFiles, fakeContents, nil) + + pbIn, err := structpb.NewStruct(tc.in) + c.Assert(err, qt.IsNil) + + ir, ow, eh, job := mock.GenerateMockJob(c) + ir.ReadMock.Return(pbIn, nil) + + ow.WriteMock.Optional().Set(func(ctx context.Context, output *structpb.Struct) (err error) { + gotJSON, err := output.MarshalJSON() + + c.Check(err, qt.IsNil) + c.Check(gotJSON, qt.JSONEquals, tc.want) + + return nil + }) + + eh.ErrorMock.Optional().Set(func(ctx context.Context, err error) { + c.Check(err, qt.IsNil) + }) + + err = exec.Execute(ctx, []*base.Job{job}) + c.Check(err, qt.IsNil) + }) + } +} + +func Test_CreateExecution(t *testing.T) { + c := qt.New(t) + + testcase := struct { + name string + + task string + wantErr string + }{ + name: "nok - unsupported task", + task: "FOOBAR", + + wantErr: "not supported task: FOOBAR", + } + + bc := base.Component{} + component := Init(bc) + + b, err := os.ReadFile("testdata/credentials.json") + + c.Assert(err, qt.IsNil) + + secrets := map[string]interface{}{ + "oauthcredentials": base64.StdEncoding.EncodeToString(b), + } + + component = component.WithOAuthCredentials(secrets) + + setup := map[string]any{ + "refresh-token": "fake-refresh-token", + } + + setupStruct, err := structpb.NewStruct(setup) + + c.Assert(err, qt.IsNil) + + c.Run(testcase.name, func(c *qt.C) { + + _, err := component.CreateExecution(base.ComponentExecution{ + Component: component, + Task: testcase.task, + Setup: setupStruct, + }) + + c.Check(err, qt.ErrorMatches, testcase.wantErr) + }) + +} + +func stringPointer(s string) *string { + return &s +} diff --git a/pkg/component/data/googledrive/v0/testdata/credentials.json b/pkg/component/data/googledrive/v0/testdata/credentials.json new file mode 100644 index 000000000..29ba5d448 --- /dev/null +++ b/pkg/component/data/googledrive/v0/testdata/credentials.json @@ -0,0 +1,13 @@ +{ + "installed": { + "client_id": "fake_client_id", + "project_id": "fake_pjt_id", + "auth_uri": "https://accounts.google.com/o/oauth2/auth", + "token_uri": "https://oauth2.googleapis.com/token", + "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", + "client_secret": "fake_secret", + "redirect_uris": [ + "http://localhost" + ] + } + } \ No newline at end of file diff --git a/pkg/component/internal/util/googledriveclient/googledriveclient.go b/pkg/component/internal/util/googledriveclient/googledriveclient.go index 77ae10563..1c209f053 100644 --- a/pkg/component/internal/util/googledriveclient/googledriveclient.go +++ b/pkg/component/internal/util/googledriveclient/googledriveclient.go @@ -80,14 +80,8 @@ func (d *DriveService) ReadFolder(folderUID string, readContent bool) ([]*drive. } } - files := make([]*drive.File, 0, len(allFiles)) - - for i, f := range allFiles { - files[i] = f - } - if !readContent { - return files, nil, nil + return allFiles, nil, nil } contents := make([]*string, 0, len(allFiles)) @@ -102,7 +96,7 @@ func (d *DriveService) ReadFolder(folderUID string, readContent bool) ([]*drive. contents[i] = &content } - return files, contents, nil + return allFiles, contents, nil } From bdd8ae28c734f410eeb085cece35eed9293c6bc0 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 22 Oct 2024 19:12:16 +0100 Subject: [PATCH 16/28] chore: add comments to exported functions --- pkg/component/base/executionwrapper.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/component/base/executionwrapper.go b/pkg/component/base/executionwrapper.go index 39532547d..d16a8b1f7 100644 --- a/pkg/component/base/executionwrapper.go +++ b/pkg/component/base/executionwrapper.go @@ -178,7 +178,8 @@ func SequentialExecutor(ctx context.Context, jobs []*Job, execute func(*structpb return nil } -func ConcurrentExecutor(ctx context.Context, jobs []*Job, execute func(context.Context, *Job) error) error { +// ConcurrentExecutor executes the jobs concurrently. +func ConcurrentExecutor(ctx context.Context, jobs []*Job, execute func(*structpb.Struct, *Job, context.Context) (*structpb.Struct, error)) error { var wg sync.WaitGroup wg.Add(len(jobs)) for _, job := range jobs { From f9238c65d699b59f555ea0a39e9e8ed66c3d76c7 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 22 Oct 2024 19:12:48 +0100 Subject: [PATCH 17/28] fix(googledrive): fix the bug about panic --- .../internal/util/googledriveclient/googledriveclient.go | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/pkg/component/internal/util/googledriveclient/googledriveclient.go b/pkg/component/internal/util/googledriveclient/googledriveclient.go index 1c209f053..6d7b3df5c 100644 --- a/pkg/component/internal/util/googledriveclient/googledriveclient.go +++ b/pkg/component/internal/util/googledriveclient/googledriveclient.go @@ -86,14 +86,13 @@ func (d *DriveService) ReadFolder(folderUID string, readContent bool) ([]*drive. contents := make([]*string, 0, len(allFiles)) - for i, f := range allFiles { + for _, f := range allFiles { content, err := readFileContent(srv, f) if err != nil { return nil, nil, fmt.Errorf("read file content: %w", err) } - - contents[i] = &content + contents = append(contents, &content) } return allFiles, contents, nil From 7f22a5946641b9a95157f8e838091176face1838 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 22 Oct 2024 19:13:53 +0100 Subject: [PATCH 18/28] feat(googledrive): add error cases and test code --- .../data/googledrive/v0/main_test.go | 51 +++++++++++++++---- .../data/googledrive/v0/read_operation.go | 12 +++++ 2 files changed, 53 insertions(+), 10 deletions(-) diff --git a/pkg/component/data/googledrive/v0/main_test.go b/pkg/component/data/googledrive/v0/main_test.go index 7000a8f48..6ca730090 100644 --- a/pkg/component/data/googledrive/v0/main_test.go +++ b/pkg/component/data/googledrive/v0/main_test.go @@ -34,6 +34,8 @@ var ( sharedFileLink = fmt.Sprintf("https://drive.google.com/file/d/%s/view?usp=drivesdk", fakeID) webViewFileLink = fmt.Sprintf("https://drive.google.com/file/d/%s/view?usp=drivesdk", fakeID) webContentFileLink = fmt.Sprintf("https://drive.google.com/uc?id=%s&export=download", fakeID) + + sharedFolderLink = fmt.Sprintf("https://drive.google.com/drive/folders/%s?usp=drive_link", fakeID) ) func Test_Execute_ReadFile(t *testing.T) { @@ -49,6 +51,7 @@ func Test_Execute_ReadFile(t *testing.T) { in map[string]any fakeDriveFile *drive.File want map[string]any + wantErr string }{ { name: "ok - read CSV file with file extension", @@ -213,6 +216,13 @@ func Test_Execute_ReadFile(t *testing.T) { }, }, }, + { + name: "nok - read file with invalid shared link", + in: map[string]any{ + "shared-link": sharedFolderLink, + }, + wantErr: "the input link is a folder link, please use the read-folder operation", + }, } bc := base.Component{} @@ -252,9 +262,11 @@ func Test_Execute_ReadFile(t *testing.T) { fakeDriveFile := tc.fakeDriveFile fakeContent := "fake content" - mockDriveService.ReadFileMock. - Expect(fakeID). - Return(fakeDriveFile, &fakeContent, nil) + if tc.wantErr == "" { + mockDriveService.ReadFileMock. + Expect(fakeID). + Return(fakeDriveFile, &fakeContent, nil) + } pbIn, err := structpb.NewStruct(tc.in) c.Assert(err, qt.IsNil) @@ -272,7 +284,11 @@ func Test_Execute_ReadFile(t *testing.T) { }) eh.ErrorMock.Optional().Set(func(ctx context.Context, err error) { - c.Check(err, qt.IsNil) + if tc.wantErr != "" { + c.Check(err, qt.ErrorMatches, tc.wantErr) + } else { + c.Check(err, qt.IsNil) + } }) err = exec.Execute(ctx, []*base.Job{job}) @@ -296,11 +312,12 @@ func Test_Execute_ReadFolder(t *testing.T) { fakeDriveFiles []*drive.File fakeContents []*string want map[string]any + wantErr string }{ { name: "ok - read folder with content", in: map[string]any{ - "shared-link": sharedSheetLink, + "shared-link": sharedFolderLink, "read-content": true, }, fakeDriveFiles: []*drive.File{ @@ -341,7 +358,7 @@ func Test_Execute_ReadFolder(t *testing.T) { { name: "ok - read folder without content", in: map[string]any{ - "shared-link": sharedSheetLink, + "shared-link": sharedFolderLink, "read-content": false, }, fakeDriveFiles: []*drive.File{ @@ -377,6 +394,14 @@ func Test_Execute_ReadFolder(t *testing.T) { }, }, }, + { + name: "nok - read file", + in: map[string]any{ + "shared-link": sharedSheetLink, + "read-content": false, + }, + wantErr: "the input link is not a folder link, please check the link", + }, } bc := base.Component{} @@ -419,9 +444,11 @@ func Test_Execute_ReadFolder(t *testing.T) { readContent := tc.in["read-content"].(bool) - mockDriveService.ReadFolderMock. - Expect(fakeID, readContent). - Return(fakeDriveFiles, fakeContents, nil) + if tc.wantErr == "" { + mockDriveService.ReadFolderMock. + Expect(fakeID, readContent). + Return(fakeDriveFiles, fakeContents, nil) + } pbIn, err := structpb.NewStruct(tc.in) c.Assert(err, qt.IsNil) @@ -439,7 +466,11 @@ func Test_Execute_ReadFolder(t *testing.T) { }) eh.ErrorMock.Optional().Set(func(ctx context.Context, err error) { - c.Check(err, qt.IsNil) + if tc.wantErr != "" { + c.Check(err, qt.ErrorMatches, tc.wantErr) + } else { + c.Check(err, qt.IsNil) + } }) err = exec.Execute(ctx, []*base.Job{job}) diff --git a/pkg/component/data/googledrive/v0/read_operation.go b/pkg/component/data/googledrive/v0/read_operation.go index c01ae51e2..7719f4255 100644 --- a/pkg/component/data/googledrive/v0/read_operation.go +++ b/pkg/component/data/googledrive/v0/read_operation.go @@ -43,6 +43,10 @@ func (e *execution) readFile(input *structpb.Struct, job *base.Job, ctx context. return nil, fmt.Errorf("convert input to struct: %w", err) } + if isFolder(inputStruct.SharedLink) { + return nil, fmt.Errorf("the input link is a folder link, please use the read-folder operation") + } + fileUID, err := extractUIDFromSharedLink(inputStruct.SharedLink) if err != nil { @@ -89,6 +93,10 @@ func (e *execution) readFolder(input *structpb.Struct, job *base.Job, ctx contex return nil, fmt.Errorf("convert input to struct: %w", err) } + if !isFolder(inputStruct.SharedLink) { + return nil, fmt.Errorf("the input link is not a folder link, please check the link") + } + folderUID, err := extractUIDFromSharedLink(inputStruct.SharedLink) if err != nil { @@ -158,6 +166,10 @@ func extractUIDFromSharedLink(driveLink string) (string, error) { return "", fmt.Errorf("unrecognized Google Drive link format") } +func isFolder(link string) bool { + return strings.Contains(link, "/drive/folders/") +} + func convertDriveFileToComponentFile(driveFile *drive.File) *file { // Google Drive API only can support downloading the binary data. // So, when the file is not binary, we need to export the file as PDF/CSV first. From 8b1dc85380ef57f7ac91cf47030ea10b9f8fbfc2 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 22 Oct 2024 19:14:18 +0100 Subject: [PATCH 19/28] chore: arrange pkg --- pkg/component/data/googledrive/v0/main_test.go | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pkg/component/data/googledrive/v0/main_test.go b/pkg/component/data/googledrive/v0/main_test.go index 6ca730090..fdb2aa4b2 100644 --- a/pkg/component/data/googledrive/v0/main_test.go +++ b/pkg/component/data/googledrive/v0/main_test.go @@ -7,11 +7,12 @@ import ( "os" "testing" - qt "github.com/frankban/quicktest" "github.com/gojuno/minimock/v3" "google.golang.org/api/drive/v3" "google.golang.org/protobuf/types/known/structpb" + qt "github.com/frankban/quicktest" + "github.com/instill-ai/pipeline-backend/pkg/component/base" "github.com/instill-ai/pipeline-backend/pkg/component/internal/mock" ) From f9e77078ec42d1bb24422b3e4426021d4d7754e5 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 29 Oct 2024 15:03:00 +0000 Subject: [PATCH 20/28] chore: move google drive client under the googledrive pkg --- .../googledrive/v0/client/client.go} | 2 +- pkg/component/data/googledrive/v0/main.go | 6 +++--- pkg/component/data/googledrive/v0/main_test.go | 2 ++ pkg/component/internal/mock/generator.go | 2 +- pkg/component/internal/mock/i_drive_service_mock.gen.go | 8 ++++---- 5 files changed, 11 insertions(+), 9 deletions(-) rename pkg/component/{internal/util/googledriveclient/googledriveclient.go => data/googledrive/v0/client/client.go} (99%) diff --git a/pkg/component/internal/util/googledriveclient/googledriveclient.go b/pkg/component/data/googledrive/v0/client/client.go similarity index 99% rename from pkg/component/internal/util/googledriveclient/googledriveclient.go rename to pkg/component/data/googledrive/v0/client/client.go index 6d7b3df5c..6ce722627 100644 --- a/pkg/component/internal/util/googledriveclient/googledriveclient.go +++ b/pkg/component/data/googledrive/v0/client/client.go @@ -1,4 +1,4 @@ -package googledriveclient +package client import ( "encoding/base64" diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go index a1486185b..f8f060f7d 100644 --- a/pkg/component/data/googledrive/v0/main.go +++ b/pkg/component/data/googledrive/v0/main.go @@ -17,7 +17,7 @@ import ( "google.golang.org/protobuf/types/known/structpb" "github.com/instill-ai/pipeline-backend/pkg/component/base" - "github.com/instill-ai/pipeline-backend/pkg/component/internal/util/googledriveclient" + "github.com/instill-ai/pipeline-backend/pkg/component/data/googledrive/v0/client" "github.com/instill-ai/x/errmsg" ) @@ -49,7 +49,7 @@ type execution struct { base.ComponentExecution execute func(*structpb.Struct, *base.Job, context.Context) (*structpb.Struct, error) - service googledriveclient.IDriveService + service client.IDriveService } // Init returns an implementation of IComponent that interacts with Google Drive. @@ -79,7 +79,7 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, e := &execution{ ComponentExecution: x, - service: &googledriveclient.DriveService{Service: drive}, + service: &client.DriveService{Service: drive}, } switch x.Task { diff --git a/pkg/component/data/googledrive/v0/main_test.go b/pkg/component/data/googledrive/v0/main_test.go index fdb2aa4b2..3e1832d49 100644 --- a/pkg/component/data/googledrive/v0/main_test.go +++ b/pkg/component/data/googledrive/v0/main_test.go @@ -39,6 +39,8 @@ var ( sharedFolderLink = fmt.Sprintf("https://drive.google.com/drive/folders/%s?usp=drive_link", fakeID) ) +// We will unify the unit tests for the component development. + func Test_Execute_ReadFile(t *testing.T) { c := qt.New(t) diff --git a/pkg/component/internal/mock/generator.go b/pkg/component/internal/mock/generator.go index fa0036772..efd778c5e 100644 --- a/pkg/component/internal/mock/generator.go +++ b/pkg/component/internal/mock/generator.go @@ -7,7 +7,7 @@ package mock //go:generate minimock -g -i github.com/instill-ai/pipeline-backend/pkg/component/operator/document/v0.commandRunner -o ./ -s "_mock.gen.go" //go:generate minimock -g -i io.WriteCloser -o ./ -s "_mock.gen.go" //go:generate minimock -g -i github.com/instill-ai/protogen-go/artifact/artifact/v1alpha.ArtifactPublicServiceClient -o ./ -s "_mock.gen.go" -//go:generate minimock -g -i github.com/instill-ai/pipeline-backend/pkg/component/internal/util/googledriveclient.IDriveService -o ./ -s "_mock.gen.go" +//go:generate minimock -g -i github.com/instill-ai/pipeline-backend/pkg/component/data/googledrive/v0/client.IDriveService -o ./ -s "_mock.gen.go" // Ollama mock is generated in the source package to avoid import cycles. //go:generate minimock -i github.com/instill-ai/pipeline-backend/pkg/component/ai/ollama/v0.OllamaClientInterface -o ../../ai/ollama/v0 -s "_mock.gen.go" -p ollama diff --git a/pkg/component/internal/mock/i_drive_service_mock.gen.go b/pkg/component/internal/mock/i_drive_service_mock.gen.go index 239159bc1..f0439cf6b 100644 --- a/pkg/component/internal/mock/i_drive_service_mock.gen.go +++ b/pkg/component/internal/mock/i_drive_service_mock.gen.go @@ -11,7 +11,7 @@ import ( "google.golang.org/api/drive/v3" ) -// IDriveServiceMock implements mm_googledriveclient.IDriveService +// IDriveServiceMock implements mm_client.IDriveService type IDriveServiceMock struct { t minimock.Tester finishOnce sync.Once @@ -31,7 +31,7 @@ type IDriveServiceMock struct { ReadFolderMock mIDriveServiceMockReadFolder } -// NewIDriveServiceMock returns a mock for mm_googledriveclient.IDriveService +// NewIDriveServiceMock returns a mock for mm_client.IDriveService func NewIDriveServiceMock(t minimock.Tester) *IDriveServiceMock { m := &IDriveServiceMock{t: t} @@ -238,7 +238,7 @@ func (mmReadFile *mIDriveServiceMockReadFile) invocationsDone() bool { return totalInvocations > 0 && (expectedInvocations == 0 || expectedInvocations == totalInvocations) } -// ReadFile implements mm_googledriveclient.IDriveService +// ReadFile implements mm_client.IDriveService func (mmReadFile *IDriveServiceMock) ReadFile(fileUID string) (fp1 *drive.File, sp1 *string, err error) { mm_atomic.AddUint64(&mmReadFile.beforeReadFileCounter, 1) defer mm_atomic.AddUint64(&mmReadFile.afterReadFileCounter, 1) @@ -577,7 +577,7 @@ func (mmReadFolder *mIDriveServiceMockReadFolder) invocationsDone() bool { return totalInvocations > 0 && (expectedInvocations == 0 || expectedInvocations == totalInvocations) } -// ReadFolder implements mm_googledriveclient.IDriveService +// ReadFolder implements mm_client.IDriveService func (mmReadFolder *IDriveServiceMock) ReadFolder(folderUID string, readContent bool) (fpa1 []*drive.File, spa1 []*string, err error) { mm_atomic.AddUint64(&mmReadFolder.beforeReadFolderCounter, 1) defer mm_atomic.AddUint64(&mmReadFolder.afterReadFolderCounter, 1) From d017c477c08fad518d61418d647f1fccf3603bc0 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 29 Oct 2024 15:03:30 +0000 Subject: [PATCH 21/28] chore: update contribution guideline for unit test chore: update contribution guideline for unit test --- pkg/component/CONTRIBUTING.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/pkg/component/CONTRIBUTING.md b/pkg/component/CONTRIBUTING.md index 9ad24ad10..c571ed71d 100644 --- a/pkg/component/CONTRIBUTING.md +++ b/pkg/component/CONTRIBUTING.md @@ -537,6 +537,15 @@ func TestOperator_CreateExecution(t *testing.T) { } ``` + +In our testing methodology, we use two main approaches for mocking external services: + +1. In some components, we mock only the interface and skip testing the actual client, as with services like Slack and HubSpot. +2. In other components, we create a fake server for test purposes, as with OpenAI integrations. + +Moving forward, we plan to standardize on the second approach, integrating all components to use a fake server setup for testing. + + ### Initialize the component The last step before being able to use the component in **💧 Instill VDP** is From e427e737f8c8c0af1ac3138d6c8496c1bef05eca Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 29 Oct 2024 15:06:38 +0000 Subject: [PATCH 22/28] chore: update testing comment --- pkg/component/data/googledrive/v0/main_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pkg/component/data/googledrive/v0/main_test.go b/pkg/component/data/googledrive/v0/main_test.go index 3e1832d49..4ae236f36 100644 --- a/pkg/component/data/googledrive/v0/main_test.go +++ b/pkg/component/data/googledrive/v0/main_test.go @@ -39,7 +39,7 @@ var ( sharedFolderLink = fmt.Sprintf("https://drive.google.com/drive/folders/%s?usp=drive_link", fakeID) ) -// We will unify the unit tests for the component development. +// To integrate the unit test methodology, we will fake the http server to return the expected response rather than mocking the interface. func Test_Execute_ReadFile(t *testing.T) { From 8b7c7d2c984d93d5b2f4407722a21008e729c994 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 29 Oct 2024 15:33:45 +0000 Subject: [PATCH 23/28] chore: update injecting credentials --- pkg/component/data/googledrive/v0/main.go | 50 +++++++++-------------- 1 file changed, 19 insertions(+), 31 deletions(-) diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go index f8f060f7d..8bf715e43 100644 --- a/pkg/component/data/googledrive/v0/main.go +++ b/pkg/component/data/googledrive/v0/main.go @@ -3,15 +3,12 @@ package googledrive import ( "context" - "encoding/base64" - "encoding/json" "fmt" "sync" _ "embed" "golang.org/x/oauth2" - "golang.org/x/oauth2/google" "google.golang.org/api/drive/v3" "google.golang.org/api/option" "google.golang.org/protobuf/types/known/structpb" @@ -22,9 +19,13 @@ import ( ) const ( - taskReadFile = "TASK_READ_FILE" - taskReadFolder = "TASK_READ_FOLDER" - cfgOAuthCredential = "oauth-credentials" + taskReadFile = "TASK_READ_FILE" + taskReadFolder = "TASK_READ_FOLDER" + cfgOAuthClientID = "client-id" + cfgOAuthClientSecret = "client-secret" + + authURL = "https://accounts.google.com/o/oauth2/auth" + tokenURL = "https://oauth2.googleapis.com/token" ) var ( @@ -41,8 +42,9 @@ var ( type component struct { base.Component - // The JSON string of OAuth credentials encoded by base64. - instillAICredentials string + + instillAIClientID string + instillAIClientSecret string } type execution struct { @@ -97,17 +99,13 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, } func getDriveService(ctx context.Context, setup *structpb.Struct, c *component) (*drive.Service, error) { - - decodedBytes, err := base64.StdEncoding.DecodeString(c.instillAICredentials) - - if err != nil { - return nil, fmt.Errorf("failed to decode Instill AI credentials: %w", err) - } - - config, err := google.ConfigFromJSON(decodedBytes, getConfigScopes()...) - - if err != nil { - return nil, fmt.Errorf("failed to get Google config from JSON: %w", err) + config := &oauth2.Config{ + ClientID: c.instillAIClientID, + ClientSecret: c.instillAIClientSecret, + Endpoint: oauth2.Endpoint{ + AuthURL: authURL, + TokenURL: tokenURL, + }, } refreshToken := setup.GetFields()["refresh-token"].GetStringValue() @@ -127,17 +125,6 @@ func getDriveService(ctx context.Context, setup *structpb.Struct, c *component) return srv, nil } -func getConfigScopes() []string { - type setupConfig struct { - InstillOAuthConfig struct { - Scopes []string `json:"scopes"` - } `json:"instillOAuthConfig"` - } - var setup setupConfig - _ = json.Unmarshal(setupJSON, &setup) - return setup.InstillOAuthConfig.Scopes -} - // Execute reads the input from the job, executes the task, and writes the output // to the job. func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { @@ -146,6 +133,7 @@ func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { // WithOAuthCredentials sets the OAuth credentials for the component. func (c *component) WithOAuthCredentials(s map[string]any) *component { - c.instillAICredentials = base.ReadFromGlobalConfig(cfgOAuthCredential, s) + c.instillAIClientID = base.ReadFromGlobalConfig(cfgOAuthClientID, s) + c.instillAIClientSecret = base.ReadFromGlobalConfig(cfgOAuthClientSecret, s) return c } From dfb7d13368d5a380131483ec08d6d719bb350802 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 29 Oct 2024 15:37:06 +0000 Subject: [PATCH 24/28] fix: update loading credentials --- pkg/component/store/store.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pkg/component/store/store.go b/pkg/component/store/store.go index 5122acae9..f1da3c657 100644 --- a/pkg/component/store/store.go +++ b/pkg/component/store/store.go @@ -174,13 +174,7 @@ func Init( { conn := googledrive.Init(baseComp) - conn = conn.WithOAuthCredentials(secrets["google"]) - compStore.Import(conn) - } - - { - conn := googledrive.Init(baseComp) - conn = conn.WithOAuthCredentials(secrets["google"]) + conn = conn.WithOAuthCredentials(secrets["googledrive"]) compStore.Import(conn) } From 4371f633fd55cada0ee45e2674c1b6080fc807e2 Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Tue, 29 Oct 2024 19:02:49 +0000 Subject: [PATCH 25/28] chore: clean the googledrive pkg --- pkg/component/data/googledrive/v0/README.mdx | 22 +- .../googledrive/v0/config/definition.json | 2 +- .../data/googledrive/v0/config/tasks.json | 10 +- pkg/component/data/googledrive/v0/io.go | 34 +++ pkg/component/data/googledrive/v0/main.go | 80 +++++++ .../data/googledrive/v0/main_test.go | 23 +- .../data/googledrive/v0/read_operation.go | 212 ------------------ .../data/googledrive/v0/task_read_file.go | 52 +++++ .../data/googledrive/v0/task_read_folder.go | 58 +++++ .../googledrive/v0/testdata/credentials.json | 13 -- 10 files changed, 247 insertions(+), 259 deletions(-) create mode 100644 pkg/component/data/googledrive/v0/io.go delete mode 100644 pkg/component/data/googledrive/v0/read_operation.go create mode 100644 pkg/component/data/googledrive/v0/task_read_file.go create mode 100644 pkg/component/data/googledrive/v0/task_read_folder.go delete mode 100644 pkg/component/data/googledrive/v0/testdata/credentials.json diff --git a/pkg/component/data/googledrive/v0/README.mdx b/pkg/component/data/googledrive/v0/README.mdx index 41bc022d4..49c1135f3 100644 --- a/pkg/component/data/googledrive/v0/README.mdx +++ b/pkg/component/data/googledrive/v0/README.mdx @@ -5,7 +5,7 @@ draft: false description: "Learn about how to set up a VDP Google Drive component https://github.com/instill-ai/instill-core" --- -The Google Drive component is a data component that allows users to google Drive is a file storage and synchronization service developed by Google. It allows users to store files in the cloud, synchronize files across devices, and share files.. +The Google Drive component is a data component that allows users to google Drive is a file storage and synchronization service developed by Google. It allows users to store files in the cloud, synchronize files across devices, and share files. It can carry out the following tasks: - [Read File](#read-file) - [Read Folder](#read-folder) @@ -82,14 +82,14 @@ Read a file content and metadata from Google Drive. | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | | Content | `content` | string | Base64 encoded content of the binary file without the `data:[MIME_TYPE];base64,` prefix. Google Sheets will be exported as CSV, Google Docs as PDF, and Google Slides as PDF. If the file is not a Google file, the content will be the same as the original file. | -| Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ` | -| ID | `id` | string | ID of the file. | +| Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ`. | +| ID | `id` | string | Unique ID of the file. | | MD5 checksum | `md5-checksum` | string | MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user. | -| MIME type | `mime-type` | string | MIME type of the file. | -| Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ` | +| MIME type | `mime-type` | string | MIME type of the file. For example, `application/pdf`, `text/csv`, `image/jpeg`, etc. | +| Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ`. | | Name | `name` | string | Name of the file. The file extension will be added automatically based on the exported MIME type. For example, Google Sheets will be exported as CSV, Google Docs as PDF, and Google Slides as PDF. If the file is a Google Sheet and the name is `MySheet`, the exported file will be `MySheet.csv`. If the file is not a Google file, the name will be used as is. | | Size | `size` | integer | Size of the file in bytes. | -| Version | `version` | integer | Version of the file. | +| Version | `version` | integer | Version of the file in Google Drive. | | Web Content Link | `web-content-link` | string | Link for downloading the content of the file in a browser. | | Web View Link | `web-view-link` | string | Link for opening the file in a relevant Google editor or viewer in a browser. Usually, web view link is same as shared link. | @@ -130,14 +130,14 @@ Read metadata and content of files under the specified folder in Google Drive. | Field | Field ID | Type | Note | | :--- | :--- | :--- | :--- | | Content | `content` | string | Base64 encoded content of the binary file without the `data:[MIME_TYPE];base64,` prefix. Google Sheets will be exported as CSV, Google Docs as PDF, and Google Slides as PDF. If the file is not a Google file, the content will be the same as the original file. | -| Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ` | -| ID | `id` | string | ID of the file. | +| Created time | `created-time` | string | Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ`. | +| ID | `id` | string | Unique ID of the file. | | MD5 checksum | `md5-checksum` | string | MD5 checksum of the file. This reflects every change made to the file on the server, even those not visible to the user. | -| MIME type | `mime-type` | string | MIME type of the file. | -| Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ` | +| MIME type | `mime-type` | string | MIME type of the file. For example, `application/pdf`, `text/csv`, `image/jpeg`, etc. | +| Modified time | `modified-time` | string | Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ`. | | Name | `name` | string | Name of the file. The file extension will be added automatically based on the exported MIME type. For example, Google Sheets will be exported as CSV, Google Docs as PDF, and Google Slides as PDF. If the file is a Google Sheet and the name is `MySheet`, the exported file will be `MySheet.csv`. If the file is not a Google file, the name will be used as is. | | Size | `size` | integer | Size of the file in bytes. | -| Version | `version` | integer | Version of the file. | +| Version | `version` | integer | Version of the file in Google Drive. | | Web Content Link | `web-content-link` | string | Link for downloading the content of the file in a browser. | | Web View Link | `web-view-link` | string | Link for opening the file in a relevant Google editor or viewer in a browser. Usually, web view link is same as shared link. | diff --git a/pkg/component/data/googledrive/v0/config/definition.json b/pkg/component/data/googledrive/v0/config/definition.json index 1e92c3002..c0e55d34d 100644 --- a/pkg/component/data/googledrive/v0/config/definition.json +++ b/pkg/component/data/googledrive/v0/config/definition.json @@ -9,7 +9,7 @@ "id": "google-drive", "public": true, "title": "Google Drive", - "description": "Google Drive is a file storage and synchronization service developed by Google. It allows users to store files in the cloud, synchronize files across devices, and share files.", + "description": "Google Drive is a file storage and synchronization service developed by Google. It allows users to store files in the cloud, synchronize files across devices, and share files", "tombstone": false, "type": "COMPONENT_TYPE_DATA", "uid": "cd220d2d-3d19-468e-8b95-37dd6a57c15f", diff --git a/pkg/component/data/googledrive/v0/config/tasks.json b/pkg/component/data/googledrive/v0/config/tasks.json index 965f0a66d..eaab95594 100644 --- a/pkg/component/data/googledrive/v0/config/tasks.json +++ b/pkg/component/data/googledrive/v0/config/tasks.json @@ -5,7 +5,7 @@ "instillUIOrder": 0, "properties": { "id": { - "description": "ID of the file.", + "description": "Unique ID of the file.", "instillFormat": "string", "instillUIOrder": 0, "title": "ID", @@ -27,14 +27,14 @@ "type": "string" }, "created-time": { - "description": "Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ`", + "description": "Time when the file was created. Format: `YYYY-MM-DDTHH:MM:SSZ`.", "instillFormat": "string", "instillUIOrder": 3, "title": "Created time", "type": "string" }, "modified-time": { - "description": "Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ`", + "description": "Time when the file was last modified. Format: `YYYY-MM-DDTHH:MM:SSZ`.", "instillFormat": "string", "instillUIOrder": 4, "title": "Modified time", @@ -48,7 +48,7 @@ "type": "integer" }, "mime-type": { - "description": "MIME type of the file.", + "description": "MIME type of the file. For example, `application/pdf`, `text/csv`, `image/jpeg`, etc.", "instillFormat": "string", "instillUIOrder": 6, "title": "MIME type", @@ -62,7 +62,7 @@ "type": "string" }, "version": { - "description": "Version of the file.", + "description": "Version of the file in Google Drive.", "instillFormat": "integer", "instillUIOrder": 8, "title": "Version", diff --git a/pkg/component/data/googledrive/v0/io.go b/pkg/component/data/googledrive/v0/io.go new file mode 100644 index 000000000..6676a12f1 --- /dev/null +++ b/pkg/component/data/googledrive/v0/io.go @@ -0,0 +1,34 @@ +package googledrive + +// TODO: Change to Instill Format. + +type readFileInput struct { + SharedLink string `json:"shared-link"` +} + +type readFileOutput struct { + File file `json:"file"` +} + +type file struct { + ID string `json:"id"` + Name string `json:"name"` + Content string `json:"content"` + CreatedTime string `json:"created-time"` + ModifiedTime string `json:"modified-time"` + Size int64 `json:"size"` + MimeType string `json:"mime-type"` + Md5Checksum string `json:"md5-checksum,omitempty"` + Version int64 `json:"version"` + WebViewLink string `json:"web-view-link"` + WebContentLink string `json:"web-content-link,omitempty"` +} + +type readFolderInput struct { + SharedLink string `json:"shared-link"` + ReadContent bool `json:"read-content"` +} + +type readFolderOutput struct { + Files []*file `json:"files"` +} diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go index 8bf715e43..be57aa934 100644 --- a/pkg/component/data/googledrive/v0/main.go +++ b/pkg/component/data/googledrive/v0/main.go @@ -4,6 +4,7 @@ package googledrive import ( "context" "fmt" + "strings" "sync" _ "embed" @@ -137,3 +138,82 @@ func (c *component) WithOAuthCredentials(s map[string]any) *component { c.instillAIClientSecret = base.ReadFromGlobalConfig(cfgOAuthClientSecret, s) return c } + +// Now, we support the following types of Google Drive links: +// 1. Folder: https://drive.google +// 2. File: https://drive.google.com/file/d/ +// 3. Spreadsheet: https://docs.google.com/spreadsheets/d/ +// 4. Document: https://docs.google.com/document/d/ +// 5. Presentation: https://docs.google.com/presentation/d/ +// 6. Colab: https://colab.research.google.com/drive/ +// So, it means the Google Form, Google Map and other types of links are not supported +func extractUIDFromSharedLink(driveLink string) (string, error) { + patterns := map[string]string{ + "folder": "/drive/folders/", + "file": "/file/d/", + "spreadsheet": "/spreadsheets/d/", + "document": "/document/d/", + "presentation": "/presentation/d/", + } + + for _, pattern := range patterns { + if strings.Contains(driveLink, pattern) { + parts := strings.Split(driveLink, pattern) + if len(parts) < 2 { + return "", fmt.Errorf("invalid Google Drive link") + } + // Sample link: https://drive.google.com/drive/folders/xxxxxx?usp=drive_link + // Sample link: https://drive.google.com/file/d/xxxxxx/view?usp=drive_link + uidParts := strings.SplitN(parts[1], "?", 2) + uidParts = strings.SplitN(uidParts[0], "/", 2) + return uidParts[0], nil + } + } + + return "", fmt.Errorf("unrecognized Google Drive link format") +} + +func isFolder(link string) bool { + return strings.Contains(link, "/drive/folders/") +} + +func convertDriveFileToComponentFile(driveFile *drive.File) *file { + // Google Drive API only can support downloading the binary data. + // So, when the file is not binary, we need to export the file as PDF/CSV first. + // To make Google Drive Component can seamlessly work with other components, we need to add the file extension to the file name. + fileExtension := exportFileExtension(driveFile.MimeType) + if fileExtension != "" { + driveFile.Name = addFileExtension(driveFile.Name, fileExtension) + } + + return &file{ + ID: driveFile.Id, + Name: driveFile.Name, + CreatedTime: driveFile.CreatedTime, + ModifiedTime: driveFile.ModifiedTime, + Size: driveFile.Size, + MimeType: driveFile.MimeType, + Md5Checksum: driveFile.Md5Checksum, + Version: driveFile.Version, + WebViewLink: driveFile.WebViewLink, + WebContentLink: driveFile.WebContentLink, + } +} + +func exportFileExtension(mimeType string) string { + switch mimeType { + case "application/vnd.google-apps.spreadsheet": + return ".csv" + case "application/vnd.google-apps.presentation", "application/vnd.google-apps.document": + return ".pdf" + default: + return "" + } +} + +func addFileExtension(fileName, Extension string) string { + if !strings.HasSuffix(fileName, Extension) { + return fileName + Extension + } + return fileName +} diff --git a/pkg/component/data/googledrive/v0/main_test.go b/pkg/component/data/googledrive/v0/main_test.go index 4ae236f36..c21c131fc 100644 --- a/pkg/component/data/googledrive/v0/main_test.go +++ b/pkg/component/data/googledrive/v0/main_test.go @@ -2,9 +2,7 @@ package googledrive import ( "context" - "encoding/base64" "fmt" - "os" "testing" "github.com/gojuno/minimock/v3" @@ -231,12 +229,9 @@ func Test_Execute_ReadFile(t *testing.T) { bc := base.Component{} component := Init(bc) - b, err := os.ReadFile("testdata/credentials.json") - - c.Assert(err, qt.IsNil) - secrets := map[string]interface{}{ - "oauthcredentials": base64.StdEncoding.EncodeToString(b), + "clientid": "fake-client-id", + "clientsecret": "fake-client-secret", } component = component.WithOAuthCredentials(secrets) @@ -410,12 +405,9 @@ func Test_Execute_ReadFolder(t *testing.T) { bc := base.Component{} component := Init(bc) - b, err := os.ReadFile("testdata/credentials.json") - - c.Assert(err, qt.IsNil) - secrets := map[string]interface{}{ - "oauthcredentials": base64.StdEncoding.EncodeToString(b), + "clientid": "fake-client-id", + "clientsecret": "fake-client-secret", } component = component.WithOAuthCredentials(secrets) @@ -500,12 +492,9 @@ func Test_CreateExecution(t *testing.T) { bc := base.Component{} component := Init(bc) - b, err := os.ReadFile("testdata/credentials.json") - - c.Assert(err, qt.IsNil) - secrets := map[string]interface{}{ - "oauthcredentials": base64.StdEncoding.EncodeToString(b), + "clientid": "fake-client-id", + "clientsecret": "fake-client-secret", } component = component.WithOAuthCredentials(secrets) diff --git a/pkg/component/data/googledrive/v0/read_operation.go b/pkg/component/data/googledrive/v0/read_operation.go deleted file mode 100644 index 7719f4255..000000000 --- a/pkg/component/data/googledrive/v0/read_operation.go +++ /dev/null @@ -1,212 +0,0 @@ -package googledrive - -import ( - "context" - "fmt" - "strings" - - "google.golang.org/api/drive/v3" - "google.golang.org/protobuf/types/known/structpb" - - "github.com/instill-ai/pipeline-backend/pkg/component/base" -) - -type readFileInput struct { - SharedLink string `json:"shared-link"` -} - -type readFileOutput struct { - File file `json:"file"` -} - -type file struct { - ID string `json:"id"` - Name string `json:"name"` - Content string `json:"content"` - CreatedTime string `json:"created-time"` - ModifiedTime string `json:"modified-time"` - Size int64 `json:"size"` - MimeType string `json:"mime-type"` - Md5Checksum string `json:"md5-checksum,omitempty"` - Version int64 `json:"version"` - WebViewLink string `json:"web-view-link"` - WebContentLink string `json:"web-content-link,omitempty"` -} - -func (e *execution) readFile(input *structpb.Struct, job *base.Job, ctx context.Context) (*structpb.Struct, error) { - - inputStruct := readFileInput{} - - err := base.ConvertFromStructpb(input, &inputStruct) - - if err != nil { - return nil, fmt.Errorf("convert input to struct: %w", err) - } - - if isFolder(inputStruct.SharedLink) { - return nil, fmt.Errorf("the input link is a folder link, please use the read-folder operation") - } - - fileUID, err := extractUIDFromSharedLink(inputStruct.SharedLink) - - if err != nil { - return nil, fmt.Errorf("extract UID from Google Drive link: %w", err) - } - - driveFile, content, err := e.service.ReadFile(fileUID) - - if err != nil { - return nil, fmt.Errorf("read file from Google Drive: %w", err) - } - - file := convertDriveFileToComponentFile(driveFile) - file.Content = *content - - output := readFileOutput{ - File: *file, - } - - outputStruct, err := base.ConvertToStructpb(output) - - if err != nil { - return nil, fmt.Errorf("convert output to struct: %w", err) - } - - return outputStruct, nil -} - -type readFolderInput struct { - SharedLink string `json:"shared-link"` - ReadContent bool `json:"read-content"` -} - -type readFolderOutput struct { - Files []*file `json:"files"` -} - -func (e *execution) readFolder(input *structpb.Struct, job *base.Job, ctx context.Context) (*structpb.Struct, error) { - inputStruct := readFolderInput{} - - err := base.ConvertFromStructpb(input, &inputStruct) - - if err != nil { - return nil, fmt.Errorf("convert input to struct: %w", err) - } - - if !isFolder(inputStruct.SharedLink) { - return nil, fmt.Errorf("the input link is not a folder link, please check the link") - } - - folderUID, err := extractUIDFromSharedLink(inputStruct.SharedLink) - - if err != nil { - return nil, fmt.Errorf("extract UID from Google Drive link: %w", err) - } - - driveFiles, contents, err := e.service.ReadFolder(folderUID, inputStruct.ReadContent) - - if err != nil { - return nil, fmt.Errorf("read folder from Google Drive: %w", err) - } - - files := make([]*file, len(driveFiles)) - - for i, driveFile := range driveFiles { - file := convertDriveFileToComponentFile(driveFile) - if inputStruct.ReadContent { - file.Content = *contents[i] - } - files[i] = file - } - - output := readFolderOutput{ - Files: files, - } - - outputStruct, err := base.ConvertToStructpb(output) - - if err != nil { - return nil, fmt.Errorf("convert output to struct: %w", err) - } - - return outputStruct, nil -} - -// Now, we support the following types of Google Drive links: -// 1. Folder: https://drive.google -// 2. File: https://drive.google.com/file/d/ -// 3. Spreadsheet: https://docs.google.com/spreadsheets/d/ -// 4. Document: https://docs.google.com/document/d/ -// 5. Presentation: https://docs.google.com/presentation/d/ -// 6. Colab: https://colab.research.google.com/drive/ -// So, it means the Google Form, Google Map and other types of links are not supported -func extractUIDFromSharedLink(driveLink string) (string, error) { - patterns := map[string]string{ - "folder": "/drive/folders/", - "file": "/file/d/", - "spreadsheet": "/spreadsheets/d/", - "document": "/document/d/", - "presentation": "/presentation/d/", - } - - for _, pattern := range patterns { - if strings.Contains(driveLink, pattern) { - parts := strings.Split(driveLink, pattern) - if len(parts) < 2 { - return "", fmt.Errorf("invalid Google Drive link") - } - // Sample link: https://drive.google.com/drive/folders/xxxxxx?usp=drive_link - // Sample link: https://drive.google.com/file/d/xxxxxx/view?usp=drive_link - uidParts := strings.SplitN(parts[1], "?", 2) - uidParts = strings.SplitN(uidParts[0], "/", 2) - return uidParts[0], nil - } - } - - return "", fmt.Errorf("unrecognized Google Drive link format") -} - -func isFolder(link string) bool { - return strings.Contains(link, "/drive/folders/") -} - -func convertDriveFileToComponentFile(driveFile *drive.File) *file { - // Google Drive API only can support downloading the binary data. - // So, when the file is not binary, we need to export the file as PDF/CSV first. - // To make Google Drive Component can seamlessly work with other components, we need to add the file extension to the file name. - fileExtension := exportFileExtension(driveFile.MimeType) - if fileExtension != "" { - driveFile.Name = addFileExtension(driveFile.Name, fileExtension) - } - - return &file{ - ID: driveFile.Id, - Name: driveFile.Name, - CreatedTime: driveFile.CreatedTime, - ModifiedTime: driveFile.ModifiedTime, - Size: driveFile.Size, - MimeType: driveFile.MimeType, - Md5Checksum: driveFile.Md5Checksum, - Version: driveFile.Version, - WebViewLink: driveFile.WebViewLink, - WebContentLink: driveFile.WebContentLink, - } -} - -func exportFileExtension(mimeType string) string { - switch mimeType { - case "application/vnd.google-apps.spreadsheet": - return ".csv" - case "application/vnd.google-apps.presentation", "application/vnd.google-apps.document": - return ".pdf" - default: - return "" - } -} - -func addFileExtension(fileName, Extension string) string { - if !strings.HasSuffix(fileName, Extension) { - return fileName + Extension - } - return fileName -} diff --git a/pkg/component/data/googledrive/v0/task_read_file.go b/pkg/component/data/googledrive/v0/task_read_file.go new file mode 100644 index 000000000..e0beebd30 --- /dev/null +++ b/pkg/component/data/googledrive/v0/task_read_file.go @@ -0,0 +1,52 @@ +package googledrive + +import ( + "context" + "fmt" + + "google.golang.org/protobuf/types/known/structpb" + + "github.com/instill-ai/pipeline-backend/pkg/component/base" +) + +func (e *execution) readFile(input *structpb.Struct, job *base.Job, ctx context.Context) (*structpb.Struct, error) { + + inputStruct := readFileInput{} + + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, fmt.Errorf("convert input to struct: %w", err) + } + + if isFolder(inputStruct.SharedLink) { + return nil, fmt.Errorf("the input link is a folder link, please use the read-folder operation") + } + + fileUID, err := extractUIDFromSharedLink(inputStruct.SharedLink) + + if err != nil { + return nil, fmt.Errorf("extract UID from Google Drive link: %w", err) + } + + driveFile, content, err := e.service.ReadFile(fileUID) + + if err != nil { + return nil, fmt.Errorf("read file from Google Drive: %w", err) + } + + file := convertDriveFileToComponentFile(driveFile) + file.Content = *content + + output := readFileOutput{ + File: *file, + } + + outputStruct, err := base.ConvertToStructpb(output) + + if err != nil { + return nil, fmt.Errorf("convert output to struct: %w", err) + } + + return outputStruct, nil +} diff --git a/pkg/component/data/googledrive/v0/task_read_folder.go b/pkg/component/data/googledrive/v0/task_read_folder.go new file mode 100644 index 000000000..4cd5e6bf3 --- /dev/null +++ b/pkg/component/data/googledrive/v0/task_read_folder.go @@ -0,0 +1,58 @@ +package googledrive + +import ( + "context" + "fmt" + + "google.golang.org/protobuf/types/known/structpb" + + "github.com/instill-ai/pipeline-backend/pkg/component/base" +) + +func (e *execution) readFolder(input *structpb.Struct, job *base.Job, ctx context.Context) (*structpb.Struct, error) { + inputStruct := readFolderInput{} + + err := base.ConvertFromStructpb(input, &inputStruct) + + if err != nil { + return nil, fmt.Errorf("convert input to struct: %w", err) + } + + if !isFolder(inputStruct.SharedLink) { + return nil, fmt.Errorf("the input link is not a folder link, please check the link") + } + + folderUID, err := extractUIDFromSharedLink(inputStruct.SharedLink) + + if err != nil { + return nil, fmt.Errorf("extract UID from Google Drive link: %w", err) + } + + driveFiles, contents, err := e.service.ReadFolder(folderUID, inputStruct.ReadContent) + + if err != nil { + return nil, fmt.Errorf("read folder from Google Drive: %w", err) + } + + files := make([]*file, len(driveFiles)) + + for i, driveFile := range driveFiles { + file := convertDriveFileToComponentFile(driveFile) + if inputStruct.ReadContent { + file.Content = *contents[i] + } + files[i] = file + } + + output := readFolderOutput{ + Files: files, + } + + outputStruct, err := base.ConvertToStructpb(output) + + if err != nil { + return nil, fmt.Errorf("convert output to struct: %w", err) + } + + return outputStruct, nil +} diff --git a/pkg/component/data/googledrive/v0/testdata/credentials.json b/pkg/component/data/googledrive/v0/testdata/credentials.json deleted file mode 100644 index 29ba5d448..000000000 --- a/pkg/component/data/googledrive/v0/testdata/credentials.json +++ /dev/null @@ -1,13 +0,0 @@ -{ - "installed": { - "client_id": "fake_client_id", - "project_id": "fake_pjt_id", - "auth_uri": "https://accounts.google.com/o/oauth2/auth", - "token_uri": "https://oauth2.googleapis.com/token", - "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs", - "client_secret": "fake_secret", - "redirect_uris": [ - "http://localhost" - ] - } - } \ No newline at end of file From 3832ff19c4baddac68d7918aad712e928f0f110b Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Thu, 31 Oct 2024 09:37:49 +0000 Subject: [PATCH 26/28] fix: adopt new interface --- pkg/component/base/executionwrapper.go | 2 +- pkg/component/data/googledrive/v0/io.go | 34 +++++++++---------- pkg/component/data/googledrive/v0/main.go | 3 +- .../data/googledrive/v0/task_read_file.go | 20 +++++------ .../data/googledrive/v0/task_read_folder.go | 20 +++++------ 5 files changed, 36 insertions(+), 43 deletions(-) diff --git a/pkg/component/base/executionwrapper.go b/pkg/component/base/executionwrapper.go index d16a8b1f7..b58b23a3a 100644 --- a/pkg/component/base/executionwrapper.go +++ b/pkg/component/base/executionwrapper.go @@ -179,7 +179,7 @@ func SequentialExecutor(ctx context.Context, jobs []*Job, execute func(*structpb } // ConcurrentExecutor executes the jobs concurrently. -func ConcurrentExecutor(ctx context.Context, jobs []*Job, execute func(*structpb.Struct, *Job, context.Context) (*structpb.Struct, error)) error { +func ConcurrentExecutor(ctx context.Context, jobs []*Job, execute func(context.Context, *Job) error) error { var wg sync.WaitGroup wg.Add(len(jobs)) for _, job := range jobs { diff --git a/pkg/component/data/googledrive/v0/io.go b/pkg/component/data/googledrive/v0/io.go index 6676a12f1..c86583c32 100644 --- a/pkg/component/data/googledrive/v0/io.go +++ b/pkg/component/data/googledrive/v0/io.go @@ -1,34 +1,32 @@ package googledrive -// TODO: Change to Instill Format. - type readFileInput struct { - SharedLink string `json:"shared-link"` + SharedLink string `instill:"shared-link"` } type readFileOutput struct { - File file `json:"file"` + File file `instill:"file"` } type file struct { - ID string `json:"id"` - Name string `json:"name"` - Content string `json:"content"` - CreatedTime string `json:"created-time"` - ModifiedTime string `json:"modified-time"` - Size int64 `json:"size"` - MimeType string `json:"mime-type"` - Md5Checksum string `json:"md5-checksum,omitempty"` - Version int64 `json:"version"` - WebViewLink string `json:"web-view-link"` - WebContentLink string `json:"web-content-link,omitempty"` + ID string `instill:"id"` + Name string `instill:"name"` + Content string `instill:"content"` + CreatedTime string `instill:"created-time"` + ModifiedTime string `instill:"modified-time"` + Size int64 `instill:"size"` + MimeType string `instill:"mime-type"` + Md5Checksum string `instill:"md5-checksum,omitempty"` + Version int64 `instill:"version"` + WebViewLink string `instill:"web-view-link"` + WebContentLink string `instill:"web-content-link,omitempty"` } type readFolderInput struct { - SharedLink string `json:"shared-link"` - ReadContent bool `json:"read-content"` + SharedLink string `instill:"shared-link"` + ReadContent bool `instill:"read-content"` } type readFolderOutput struct { - Files []*file `json:"files"` + Files []*file `instill:"files"` } diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go index be57aa934..4bef118b2 100644 --- a/pkg/component/data/googledrive/v0/main.go +++ b/pkg/component/data/googledrive/v0/main.go @@ -50,7 +50,7 @@ type component struct { type execution struct { base.ComponentExecution - execute func(*structpb.Struct, *base.Job, context.Context) (*structpb.Struct, error) + execute func(context.Context, *base.Job) error service client.IDriveService } @@ -145,7 +145,6 @@ func (c *component) WithOAuthCredentials(s map[string]any) *component { // 3. Spreadsheet: https://docs.google.com/spreadsheets/d/ // 4. Document: https://docs.google.com/document/d/ // 5. Presentation: https://docs.google.com/presentation/d/ -// 6. Colab: https://colab.research.google.com/drive/ // So, it means the Google Form, Google Map and other types of links are not supported func extractUIDFromSharedLink(driveLink string) (string, error) { patterns := map[string]string{ diff --git a/pkg/component/data/googledrive/v0/task_read_file.go b/pkg/component/data/googledrive/v0/task_read_file.go index e0beebd30..ff251cf94 100644 --- a/pkg/component/data/googledrive/v0/task_read_file.go +++ b/pkg/component/data/googledrive/v0/task_read_file.go @@ -4,35 +4,33 @@ import ( "context" "fmt" - "google.golang.org/protobuf/types/known/structpb" - "github.com/instill-ai/pipeline-backend/pkg/component/base" ) -func (e *execution) readFile(input *structpb.Struct, job *base.Job, ctx context.Context) (*structpb.Struct, error) { +func (e *execution) readFile(ctx context.Context, job *base.Job) error { inputStruct := readFileInput{} - err := base.ConvertFromStructpb(input, &inputStruct) + err := job.Input.ReadData(ctx, inputStruct) if err != nil { - return nil, fmt.Errorf("convert input to struct: %w", err) + return fmt.Errorf("read input data: %w", err) } if isFolder(inputStruct.SharedLink) { - return nil, fmt.Errorf("the input link is a folder link, please use the read-folder operation") + return fmt.Errorf("the input link is a folder link, please use the read-folder operation") } fileUID, err := extractUIDFromSharedLink(inputStruct.SharedLink) if err != nil { - return nil, fmt.Errorf("extract UID from Google Drive link: %w", err) + return fmt.Errorf("extract UID from Google Drive link: %w", err) } driveFile, content, err := e.service.ReadFile(fileUID) if err != nil { - return nil, fmt.Errorf("read file from Google Drive: %w", err) + return fmt.Errorf("read file from Google Drive: %w", err) } file := convertDriveFileToComponentFile(driveFile) @@ -42,11 +40,11 @@ func (e *execution) readFile(input *structpb.Struct, job *base.Job, ctx context. File: *file, } - outputStruct, err := base.ConvertToStructpb(output) + err = job.Output.WriteData(ctx, output) if err != nil { - return nil, fmt.Errorf("convert output to struct: %w", err) + return fmt.Errorf("write output data: %w", err) } - return outputStruct, nil + return nil } diff --git a/pkg/component/data/googledrive/v0/task_read_folder.go b/pkg/component/data/googledrive/v0/task_read_folder.go index 4cd5e6bf3..07d29f61e 100644 --- a/pkg/component/data/googledrive/v0/task_read_folder.go +++ b/pkg/component/data/googledrive/v0/task_read_folder.go @@ -4,34 +4,32 @@ import ( "context" "fmt" - "google.golang.org/protobuf/types/known/structpb" - "github.com/instill-ai/pipeline-backend/pkg/component/base" ) -func (e *execution) readFolder(input *structpb.Struct, job *base.Job, ctx context.Context) (*structpb.Struct, error) { +func (e *execution) readFolder(ctx context.Context, job *base.Job) error { inputStruct := readFolderInput{} - err := base.ConvertFromStructpb(input, &inputStruct) + err := job.Input.ReadData(ctx, inputStruct) if err != nil { - return nil, fmt.Errorf("convert input to struct: %w", err) + return fmt.Errorf("read input data: %w", err) } if !isFolder(inputStruct.SharedLink) { - return nil, fmt.Errorf("the input link is not a folder link, please check the link") + return fmt.Errorf("the input link is not a folder link, please check the link") } folderUID, err := extractUIDFromSharedLink(inputStruct.SharedLink) if err != nil { - return nil, fmt.Errorf("extract UID from Google Drive link: %w", err) + return fmt.Errorf("extract UID from Google Drive link: %w", err) } driveFiles, contents, err := e.service.ReadFolder(folderUID, inputStruct.ReadContent) if err != nil { - return nil, fmt.Errorf("read folder from Google Drive: %w", err) + return fmt.Errorf("read folder from Google Drive: %w", err) } files := make([]*file, len(driveFiles)) @@ -48,11 +46,11 @@ func (e *execution) readFolder(input *structpb.Struct, job *base.Job, ctx contex Files: files, } - outputStruct, err := base.ConvertToStructpb(output) + err = job.Output.WriteData(ctx, output) if err != nil { - return nil, fmt.Errorf("convert output to struct: %w", err) + return fmt.Errorf("write output data: %w", err) } - return outputStruct, nil + return nil } From 2ebb9973203af9dbeacd5586a2cb3a1f015055bb Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Thu, 31 Oct 2024 11:34:53 +0000 Subject: [PATCH 27/28] chore: apply instill format to google drive --- pkg/component/data/googledrive/v0/main.go | 15 +- .../data/googledrive/v0/main_test.go | 296 +++++++++--------- .../data/googledrive/v0/task_read_file.go | 4 +- .../data/googledrive/v0/task_read_folder.go | 4 +- pkg/component/store/store.go | 12 +- 5 files changed, 167 insertions(+), 164 deletions(-) diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go index 4bef118b2..8ee67158e 100644 --- a/pkg/component/data/googledrive/v0/main.go +++ b/pkg/component/data/googledrive/v0/main.go @@ -20,10 +20,8 @@ import ( ) const ( - taskReadFile = "TASK_READ_FILE" - taskReadFolder = "TASK_READ_FOLDER" - cfgOAuthClientID = "client-id" - cfgOAuthClientSecret = "client-secret" + taskReadFile = "TASK_READ_FILE" + taskReadFolder = "TASK_READ_FOLDER" authURL = "https://accounts.google.com/o/oauth2/auth" tokenURL = "https://oauth2.googleapis.com/token" @@ -43,6 +41,7 @@ var ( type component struct { base.Component + base.OAuthConnector instillAIClientID string instillAIClientSecret string @@ -132,11 +131,9 @@ func (e *execution) Execute(ctx context.Context, jobs []*base.Job) error { return base.ConcurrentExecutor(ctx, jobs, e.execute) } -// WithOAuthCredentials sets the OAuth credentials for the component. -func (c *component) WithOAuthCredentials(s map[string]any) *component { - c.instillAIClientID = base.ReadFromGlobalConfig(cfgOAuthClientID, s) - c.instillAIClientSecret = base.ReadFromGlobalConfig(cfgOAuthClientSecret, s) - return c +// SupportsOAuth checks whether the component is configured to support OAuth. +func (c *component) SupportsOAuth() bool { + return c.OAuthConnector.SupportsOAuth() } // Now, we support the following types of Google Drive links: diff --git a/pkg/component/data/googledrive/v0/main_test.go b/pkg/component/data/googledrive/v0/main_test.go index c21c131fc..3c3e9ebda 100644 --- a/pkg/component/data/googledrive/v0/main_test.go +++ b/pkg/component/data/googledrive/v0/main_test.go @@ -49,15 +49,15 @@ func Test_Execute_ReadFile(t *testing.T) { testcases := []struct { name string - in map[string]any + in readFileInput fakeDriveFile *drive.File - want map[string]any + want readFileOutput wantErr string }{ { name: "ok - read CSV file with file extension", - in: map[string]any{ - "shared-link": sharedSheetLink, + in: readFileInput{ + SharedLink: sharedSheetLink, }, fakeDriveFile: &drive.File{ Id: fakeID, @@ -71,26 +71,26 @@ func Test_Execute_ReadFile(t *testing.T) { WebViewLink: webViewSheetLink, WebContentLink: webContentSheetLink, }, - want: map[string]any{ - "file": map[string]any{ - "id": fakeID, - "name": "testdata.csv", - "content": "fake content", - "created-time": "2021-08-09T20:25:02.312Z", - "modified-time": "2021-09-17T16:58:37.924Z", - "size": 0, - "mime-type": "application/vnd.google-apps.spreadsheet", - "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", - "version": 0, - "web-view-link": webViewSheetLink, - "web-content-link": webContentSheetLink, + want: readFileOutput{ + File: file{ + ID: fakeID, + Name: "testdata.csv", + Content: "fake content", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "application/vnd.google-apps.spreadsheet", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewSheetLink, + WebContentLink: webContentSheetLink, }, }, }, { name: "ok - read CSV file without file extension", - in: map[string]any{ - "shared-link": sharedSheetLink, + in: readFileInput{ + SharedLink: sharedSheetLink, }, fakeDriveFile: &drive.File{ Id: fakeID, @@ -104,26 +104,26 @@ func Test_Execute_ReadFile(t *testing.T) { WebViewLink: webViewSheetLink, WebContentLink: webContentSheetLink, }, - want: map[string]any{ - "file": map[string]any{ - "id": fakeID, - "name": "testdata.csv", - "content": "fake content", - "created-time": "2021-08-09T20:25:02.312Z", - "modified-time": "2021-09-17T16:58:37.924Z", - "size": 0, - "mime-type": "application/vnd.google-apps.spreadsheet", - "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", - "version": 0, - "web-view-link": webViewSheetLink, - "web-content-link": webContentSheetLink, + want: readFileOutput{ + File: file{ + ID: fakeID, + Name: "testdata.csv", + Content: "fake content", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "application/vnd.google-apps.spreadsheet", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewSheetLink, + WebContentLink: webContentSheetLink, }, }, }, { name: "ok - read file Google doc file", - in: map[string]any{ - "shared-link": sharedDocLink, + in: readFileInput{ + SharedLink: sharedDocLink, }, fakeDriveFile: &drive.File{ Id: fakeID, @@ -137,25 +137,25 @@ func Test_Execute_ReadFile(t *testing.T) { WebViewLink: webViewDocLink, WebContentLink: webContentDocLink, }, - want: map[string]any{ - "file": map[string]any{ - "id": fakeID, - "name": "testdata.pdf", - "content": "fake content", - "created-time": "2021-08-09T20:25:02.312Z", - "modified-time": "2021-09-17T16:58:37.924Z", - "size": 0, - "mime-type": "application/vnd.google-apps.document", - "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", - "version": 0, - "web-view-link": webViewDocLink, + want: readFileOutput{ + File: file{ + ID: fakeID, + Name: "testdata.pdf", + Content: "fake content", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "application/vnd.google-apps.document", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewDocLink, }, }, }, { name: "ok - read file Google slide file", - in: map[string]any{ - "shared-link": sharedSlideLink, + in: readFileInput{ + SharedLink: sharedSlideLink, }, fakeDriveFile: &drive.File{ Id: fakeID, @@ -169,25 +169,25 @@ func Test_Execute_ReadFile(t *testing.T) { WebViewLink: webViewSlideLink, WebContentLink: webContentSlideLink, }, - want: map[string]any{ - "file": map[string]any{ - "id": fakeID, - "name": "testdata.pdf", - "content": "fake content", - "created-time": "2021-08-09T20:25:02.312Z", - "modified-time": "2021-09-17T16:58:37.924Z", - "size": 0, - "mime-type": "application/vnd.google-apps.presentation", - "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", - "version": 0, - "web-view-link": webViewSlideLink, + want: readFileOutput{ + File: file{ + ID: fakeID, + Name: "testdata.pdf", + Content: "fake content", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "application/vnd.google-apps.presentation", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewSlideLink, }, }, }, { name: "ok - read file", - in: map[string]any{ - "shared-link": sharedFileLink, + in: readFileInput{ + SharedLink: sharedFileLink, }, fakeDriveFile: &drive.File{ Id: fakeID, @@ -201,26 +201,26 @@ func Test_Execute_ReadFile(t *testing.T) { WebViewLink: webViewFileLink, WebContentLink: webContentFileLink, }, - want: map[string]any{ - "file": map[string]any{ - "id": fakeID, - "name": "testdata.png", - "content": "fake content", - "created-time": "2021-08-09T20:25:02.312Z", - "modified-time": "2021-09-17T16:58:37.924Z", - "size": 0, - "mime-type": "image/jpeg", - "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", - "version": 0, - "web-view-link": webViewFileLink, - "web-content-link": webContentFileLink, + want: readFileOutput{ + File: file{ + ID: fakeID, + Name: "testdata.png", + Content: "fake content", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "image/jpeg", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewFileLink, + WebContentLink: webContentFileLink, }, }, }, { name: "nok - read file with invalid shared link", - in: map[string]any{ - "shared-link": sharedFolderLink, + in: readFileInput{ + SharedLink: sharedFolderLink, }, wantErr: "the input link is a folder link, please use the read-folder operation", }, @@ -230,11 +230,11 @@ func Test_Execute_ReadFile(t *testing.T) { component := Init(bc) secrets := map[string]interface{}{ - "clientid": "fake-client-id", - "clientsecret": "fake-client-secret", + "oauthclientid": "fake-client-id", + "oauthclientsecret": "fake-client-secret", } - component = component.WithOAuthCredentials(secrets) + component.WithOAuthConfig(secrets) setup := map[string]any{ "refresh-token": "fake-refresh-token", @@ -266,31 +266,34 @@ func Test_Execute_ReadFile(t *testing.T) { Return(fakeDriveFile, &fakeContent, nil) } - pbIn, err := structpb.NewStruct(tc.in) - c.Assert(err, qt.IsNil) - ir, ow, eh, job := mock.GenerateMockJob(c) - ir.ReadMock.Return(pbIn, nil) - - ow.WriteMock.Optional().Set(func(ctx context.Context, output *structpb.Struct) (err error) { - gotJSON, err := output.MarshalJSON() - c.Check(err, qt.IsNil) - c.Check(gotJSON, qt.JSONEquals, tc.want) + ir.ReadDataMock.Set(func(ctx context.Context, input any) error { + switch input := input.(type) { + case *readFileInput: + *input = tc.in + } + return nil + }) + ow.WriteDataMock.Optional().Set(func(ctx context.Context, output any) (err error) { + switch output := output.(type) { + case *readFileOutput: + c.Assert(output, qt.DeepEquals, &tc.want) + } return nil }) eh.ErrorMock.Optional().Set(func(ctx context.Context, err error) { if tc.wantErr != "" { - c.Check(err, qt.ErrorMatches, tc.wantErr) + c.Assert(err, qt.ErrorMatches, tc.wantErr) } else { - c.Check(err, qt.IsNil) + c.Assert(err, qt.IsNil) } }) err = exec.Execute(ctx, []*base.Job{job}) - c.Check(err, qt.IsNil) + c.Assert(err, qt.IsNil) }) @@ -306,17 +309,17 @@ func Test_Execute_ReadFolder(t *testing.T) { testcases := []struct { name string - in map[string]any + in readFolderInput fakeDriveFiles []*drive.File fakeContents []*string - want map[string]any + want readFolderOutput wantErr string }{ { name: "ok - read folder with content", - in: map[string]any{ - "shared-link": sharedFolderLink, - "read-content": true, + in: readFolderInput{ + SharedLink: sharedFolderLink, + ReadContent: true, }, fakeDriveFiles: []*drive.File{ { @@ -335,29 +338,29 @@ func Test_Execute_ReadFolder(t *testing.T) { fakeContents: []*string{ stringPointer("fake content"), }, - want: map[string]any{ - "files": []map[string]any{ + want: readFolderOutput{ + Files: []*file{ { - "id": fakeID, - "name": "testdata.csv", - "content": "fake content", - "created-time": "2021-08-09T20:25:02.312Z", - "modified-time": "2021-09-17T16:58:37.924Z", - "size": 0, - "mime-type": "application/vnd.google-apps.spreadsheet", - "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", - "version": 0, - "web-view-link": webViewSheetLink, - "web-content-link": webContentSheetLink, + ID: fakeID, + Name: "testdata.csv", + Content: "fake content", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "application/vnd.google-apps.spreadsheet", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewSheetLink, + WebContentLink: webContentSheetLink, }, }, }, }, { name: "ok - read folder without content", - in: map[string]any{ - "shared-link": sharedFolderLink, - "read-content": false, + in: readFolderInput{ + SharedLink: sharedFolderLink, + ReadContent: false, }, fakeDriveFiles: []*drive.File{ { @@ -374,29 +377,29 @@ func Test_Execute_ReadFolder(t *testing.T) { }, }, fakeContents: nil, - want: map[string]any{ - "files": []map[string]any{ + want: readFolderOutput{ + Files: []*file{ { - "id": fakeID, - "name": "testdata.csv", - "content": "", - "created-time": "2021-08-09T20:25:02.312Z", - "modified-time": "2021-09-17T16:58:37.924Z", - "size": 0, - "mime-type": "application/vnd.google-apps.spreadsheet", - "md5-checksum": "7db67eab9238f9a63df30f570fda2bac", - "version": 0, - "web-view-link": webViewSheetLink, - "web-content-link": webContentSheetLink, + ID: fakeID, + Name: "testdata.csv", + Content: "", + CreatedTime: "2021-08-09T20:25:02.312Z", + ModifiedTime: "2021-09-17T16:58:37.924Z", + Size: 0, + MimeType: "application/vnd.google-apps.spreadsheet", + Md5Checksum: "7db67eab9238f9a63df30f570fda2bac", + Version: 0, + WebViewLink: webViewSheetLink, + WebContentLink: webContentSheetLink, }, }, }, }, { name: "nok - read file", - in: map[string]any{ - "shared-link": sharedSheetLink, - "read-content": false, + in: readFolderInput{ + SharedLink: sharedSheetLink, + ReadContent: false, }, wantErr: "the input link is not a folder link, please check the link", }, @@ -406,11 +409,11 @@ func Test_Execute_ReadFolder(t *testing.T) { component := Init(bc) secrets := map[string]interface{}{ - "clientid": "fake-client-id", - "clientsecret": "fake-client-secret", + "oauthclientid": "fake-client-id", + "oauthclientsecret": "fake-client-secret", } - component = component.WithOAuthCredentials(secrets) + component.WithOAuthConfig(secrets) setup := map[string]any{ "refresh-token": "fake-refresh-token", @@ -437,7 +440,7 @@ func Test_Execute_ReadFolder(t *testing.T) { fakeDriveFiles := tc.fakeDriveFiles fakeContents := tc.fakeContents - readContent := tc.in["read-content"].(bool) + readContent := tc.in.ReadContent if tc.wantErr == "" { mockDriveService.ReadFolderMock. @@ -445,18 +448,21 @@ func Test_Execute_ReadFolder(t *testing.T) { Return(fakeDriveFiles, fakeContents, nil) } - pbIn, err := structpb.NewStruct(tc.in) - c.Assert(err, qt.IsNil) - ir, ow, eh, job := mock.GenerateMockJob(c) - ir.ReadMock.Return(pbIn, nil) - ow.WriteMock.Optional().Set(func(ctx context.Context, output *structpb.Struct) (err error) { - gotJSON, err := output.MarshalJSON() - - c.Check(err, qt.IsNil) - c.Check(gotJSON, qt.JSONEquals, tc.want) + ir.ReadDataMock.Set(func(ctx context.Context, input any) error { + switch input := input.(type) { + case *readFolderInput: + *input = tc.in + } + return nil + }) + ow.WriteDataMock.Optional().Set(func(ctx context.Context, output any) (err error) { + switch output := output.(type) { + case *readFolderOutput: + c.Assert(output, qt.DeepEquals, &tc.want) + } return nil }) @@ -493,11 +499,11 @@ func Test_CreateExecution(t *testing.T) { component := Init(bc) secrets := map[string]interface{}{ - "clientid": "fake-client-id", - "clientsecret": "fake-client-secret", + "oauthclientid": "fake-client-id", + "oauthclientsecret": "fake-client-secret", } - component = component.WithOAuthCredentials(secrets) + component.WithOAuthConfig(secrets) setup := map[string]any{ "refresh-token": "fake-refresh-token", diff --git a/pkg/component/data/googledrive/v0/task_read_file.go b/pkg/component/data/googledrive/v0/task_read_file.go index ff251cf94..fee85b7af 100644 --- a/pkg/component/data/googledrive/v0/task_read_file.go +++ b/pkg/component/data/googledrive/v0/task_read_file.go @@ -9,7 +9,7 @@ import ( func (e *execution) readFile(ctx context.Context, job *base.Job) error { - inputStruct := readFileInput{} + inputStruct := &readFileInput{} err := job.Input.ReadData(ctx, inputStruct) @@ -36,7 +36,7 @@ func (e *execution) readFile(ctx context.Context, job *base.Job) error { file := convertDriveFileToComponentFile(driveFile) file.Content = *content - output := readFileOutput{ + output := &readFileOutput{ File: *file, } diff --git a/pkg/component/data/googledrive/v0/task_read_folder.go b/pkg/component/data/googledrive/v0/task_read_folder.go index 07d29f61e..757216365 100644 --- a/pkg/component/data/googledrive/v0/task_read_folder.go +++ b/pkg/component/data/googledrive/v0/task_read_folder.go @@ -8,7 +8,7 @@ import ( ) func (e *execution) readFolder(ctx context.Context, job *base.Job) error { - inputStruct := readFolderInput{} + inputStruct := &readFolderInput{} err := job.Input.ReadData(ctx, inputStruct) @@ -42,7 +42,7 @@ func (e *execution) readFolder(ctx context.Context, job *base.Job) error { files[i] = file } - output := readFolderOutput{ + output := &readFolderOutput{ Files: files, } diff --git a/pkg/component/store/store.go b/pkg/component/store/store.go index f1da3c657..82c5e091d 100644 --- a/pkg/component/store/store.go +++ b/pkg/component/store/store.go @@ -172,12 +172,6 @@ func Init( compStore.Import(conn) } - { - conn := googledrive.Init(baseComp) - conn = conn.WithOAuthCredentials(secrets["googledrive"]) - compStore.Import(conn) - } - compStore.Import(instillapp.Init(baseComp)) compStore.Import(bigquery.Init(baseComp)) compStore.Import(googlecloudstorage.Init(baseComp)) @@ -208,6 +202,12 @@ func Init( conn.WithOAuthConfig(secrets[conn.GetDefinitionID()]) compStore.Import(conn) } + { + // Google Drive + conn := googledrive.Init(baseComp) + conn.WithOAuthConfig(secrets["googledrive"]) + compStore.Import(conn) + } compStore.Import(email.Init(baseComp)) compStore.Import(jira.Init(baseComp)) compStore.Import(ollama.Init(baseComp)) From b7d8f7407b622ff45f1cb76c6fa1f83a77009eac Mon Sep 17 00:00:00 2001 From: chuang8511 Date: Fri, 1 Nov 2024 10:28:06 +0000 Subject: [PATCH 28/28] feat: get client info in component --- pkg/component/base/oauth.go | 10 ++++++++++ pkg/component/data/googledrive/v0/main.go | 7 ++----- 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/pkg/component/base/oauth.go b/pkg/component/base/oauth.go index 9d3602e9c..ea838c488 100644 --- a/pkg/component/base/oauth.go +++ b/pkg/component/base/oauth.go @@ -28,3 +28,13 @@ func (c *OAuthConnector) WithOAuthConfig(s map[string]any) { func (c *OAuthConnector) SupportsOAuth() bool { return c.oAuthClientID != "" && c.oAuthClientSecret != "" } + +// GetOAuthClientID returns the OAuth client ID. +func (c *OAuthConnector) GetOAuthClientID() string { + return c.oAuthClientID +} + +// GetOAuthClientSecret returns the OAuth client secret. +func (c *OAuthConnector) GetOAuthClientSecret() string { + return c.oAuthClientSecret +} diff --git a/pkg/component/data/googledrive/v0/main.go b/pkg/component/data/googledrive/v0/main.go index 8ee67158e..723e7ef34 100644 --- a/pkg/component/data/googledrive/v0/main.go +++ b/pkg/component/data/googledrive/v0/main.go @@ -42,9 +42,6 @@ var ( type component struct { base.Component base.OAuthConnector - - instillAIClientID string - instillAIClientSecret string } type execution struct { @@ -100,8 +97,8 @@ func (c *component) CreateExecution(x base.ComponentExecution) (base.IExecution, func getDriveService(ctx context.Context, setup *structpb.Struct, c *component) (*drive.Service, error) { config := &oauth2.Config{ - ClientID: c.instillAIClientID, - ClientSecret: c.instillAIClientSecret, + ClientID: c.GetOAuthClientID(), + ClientSecret: c.GetOAuthClientSecret(), Endpoint: oauth2.Endpoint{ AuthURL: authURL, TokenURL: tokenURL,