From 194349037c05de5389d04654b38899d197291c6c Mon Sep 17 00:00:00 2001
From: uguy <hugues.bretin@gmail.com>
Date: Fri, 14 Jun 2024 15:32:51 +0200
Subject: [PATCH] docs(cli): fix missing adoc tag, some precision on actions,
 rephrasing

---
 modules/cli/pages/actions-anonymization.adoc  | 113 +++++++++---------
 .../configuration-for-anonymization.adoc      |  58 ++++-----
 modules/cli/pages/index.adoc                  |  13 +-
 modules/cli/taxonomy.adoc                     |   6 +-
 4 files changed, 102 insertions(+), 88 deletions(-)

diff --git a/modules/cli/pages/actions-anonymization.adoc b/modules/cli/pages/actions-anonymization.adoc
index 4c94c01..e05991e 100644
--- a/modules/cli/pages/actions-anonymization.adoc
+++ b/modules/cli/pages/actions-anonymization.adoc
@@ -3,6 +3,7 @@
 :javase-javadoc-base-url: https://docs.oracle.com/en/java/javase/21/docs/api
 
 == Overview
+
 Action types are mechanisms used to anonymize specific values in the exported data. A value can have one or multiple actions associated with it.
 If no actions are explicitly specified in the configuration for a particular value, it will default to a predefined action as described in the page
 xref:configuration-for-anonymization.adoc[Configure the anonymization].
@@ -19,23 +20,22 @@ If neither of these actions match, the fallback action `REMOVE_LINE` is executed
 
 [source,yaml]
 ----
-  arch_process_comment:
-      content:
-        actions:
-          - action: REGEX_REPLACE
-            pattern: [a-zA-Z0-9._-]+(@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)
-            value: '***$1'
-          - action: REGEX_REPLACE
-            pattern: ( [a-zA-Z0-9_\-]*\.[a-zA-Z0-9_\-]* )
-            value: '*****'
-      fallback:
-        action: REMOVE_LINE
-        where:
-          - column: content
-            regex: '.*'
+arch_process_comment:
+    content:
+      actions:
+        - action: REGEX_REPLACE
+          pattern: [a-zA-Z0-9._-]+(@[a-zA-Z0-9._-]+\.[a-zA-Z0-9_-]+)
+          value: '***$1'
+        - action: REGEX_REPLACE
+          pattern: ( [a-zA-Z0-9_\-]*\.[a-zA-Z0-9_\-]* )
+          value: '*****'
+    fallback:
+      action: REMOVE_LINE
+      where:
+        - column: content
+          regex: '.*'
 ----
 
-
 == Available actions
 
 === HASH
@@ -51,11 +51,11 @@ Replace the values by a Hash, this allows to remove readable values but keeps th
 Sample configuration :
 [source,yaml]
 ----
-    arch_process_instance:
-      stringindex1:
-        actions:
-          - action: hash
-            value: SHA-256
+arch_process_instance:
+  stringindex1:
+    actions:
+      - action: hash
+        value: SHA-256
 ----
 
 === REPLACE
@@ -71,11 +71,11 @@ Replace the value with a specified value ( which can be empty )
 Sample configuration :
 [source,yaml]
 ----
-    arch_flownode_instance:
-      displayname:
-        actions:
-          - action: REPLACE
-            value: 'hidden'
+arch_flownode_instance:
+  displayname:
+    actions:
+      - action: REPLACE
+        value: 'hidden'
 ----
 
 === REPLACE_WITH_OTHER
@@ -91,12 +91,13 @@ Replace a value by the value of another column of the same table and line.
 Sample configuration :
 [source,yaml]
 ----
-    arch_flownode_instance:
-      displayname:
-        actions:
-          - action: REPLACE_WITH_OTHER
-            value: name
+arch_flownode_instance:
+  displayname:
+    actions:
+      - action: REPLACE_WITH_OTHER
+        value: name
 ----
+
 === REGEX_REPLACE
 
 Replace the value if the regexp match and use the matching group to create the new value.
@@ -122,20 +123,22 @@ The pattern is a regular expression using the {javase-javadoc-base-url}/java.bas
 Sample configuration :
 [source,yaml]
 ----
-    arch_process_comment:
-      content:
-        actions:
-          - action: REGEX_REPLACE
-            pattern: contract (\d+) is ready for user (\S+)\.(\S+)
-            value: contract XXXX is ready for $2
-          - action: REGEX_REPLACE
-            pattern: The task Allocate repair agent on car (\S+) (is now assigned to .*)
-            value: The task Allocate repair agent on car *** $2
-        fallback:
-          - action: REPLACE
-            value: hidden comment
+arch_process_comment:
+  content:
+    actions:
+      - action: REGEX_REPLACE
+        pattern: contract (\d+) is ready for user (\S+)\.(\S+)
+        value: contract XXXX is ready for $2
+      - action: REGEX_REPLACE
+        pattern: The task Allocate repair agent on car (\S+) (is now assigned to .*)
+        value: The task Allocate repair agent on car *** $2
+    fallback:
+      - action: REPLACE
+        value: hidden comment
 ----
 
+This action can be useful for comments and description or free text like data. It allow to mask such things as emails, username or login with an specific pattern and so on. Because you can define a list of regex action in a specific order, you can chain regex replacements and break anonymization in smaller sucessive replacements.
+
 === KEEP
 
 Keep the value, no anonymization done.
@@ -145,10 +148,10 @@ Keep the value, no anonymization done.
 Sample configuration :
 [source,yaml]
 ----
-    arch_flownode_instance:
-      displayname:
-        actions:
-          - action: KEEP
+arch_flownode_instance:
+  displayname:
+    actions:
+      - action: KEEP
 ----
 
 === REMOVE_LINE
@@ -160,14 +163,14 @@ In example below, we remove line of `arch_contract_data` where column `name` has
 Sample configuration :
 [source,yaml]
 ----
-      arch_contract_data:
-        val:
-          actions:
-          - action: REMOVE_LINE
-            where:
-            - column: name
-              regex: PurchasedLicenseInput\.bypassSysDate
-            - column: name
-              regex: PurchasedLicenseInput\.caseCounterStartDate
+arch_contract_data:
+  val:
+    actions:
+    - action: REMOVE_LINE
+      where:
+      - column: name
+        regex: PurchasedLicenseInput\.bypassSysDate
+      - column: name
+        regex: PurchasedLicenseInput\.caseCounterStartDate
 
 ----
diff --git a/modules/cli/pages/configuration-for-anonymization.adoc b/modules/cli/pages/configuration-for-anonymization.adoc
index bfe3219..acde711 100644
--- a/modules/cli/pages/configuration-for-anonymization.adoc
+++ b/modules/cli/pages/configuration-for-anonymization.adoc
@@ -1,20 +1,20 @@
-= How-to configure the anonymization in the CLI
-:description: Learn how-to fine-tune the anonymization in the CLI
+= How-to configure anonymization with the CLI
+:description: Learn how-to fine-tune the anonymization performed by the CLI
 
 == Default anonymization
 
-Using the command `export` of the export tool will activate automatically the anonymization.
+Using the command `export` of the export tool will automatically activate the anonymization.
 
 [NOTE]
 ====
-In case you don't want to activate the anonymization during your export, you can add the argument `-da=true` at the end of the `export` command.
+In case you don't want to activate the anonymization during your export and see exported data as is, you can add the argument `-da` or `-da=true` at the end of the `export` command. This option stand for `disable anoymization`.
 ====
 
 Exported data will be anonymized by default to avoid sensitive data leak or with a specified configuration of your own. 
 
 === List of tables anonymized by default
 
-This section lists the tables being exported and identified fields that may contain sensitive information. These fields are handled with specific rules to ensure data security.
+This section lists the tables being exported and the identified fields that may contain sensitive information. These fields are anonymized with specific rules to ensure data security.
 
 ==== arch_process_instance
 
@@ -64,7 +64,6 @@ The `actor` table contains information related to the actors of a process. It de
 ** **Default anonymization**: KEEP
 ** **Details**: Normally, an actor shoudn't have sensitive data because it represent a department, team, job of a company. Those data are not sensitive and can be used for statistic purpose. 
 
-
 ==== arch_process_comment
 
 The `arch_process_comment` table contains information about users who have interact with specific flow nodes, along with other sensitive details. Those comments can include name of the users.
@@ -80,27 +79,31 @@ The `arch_contract_data` table contains information about contracts, including i
 ** **Default anonymization**: REMOVE_LINE
 ** **Details**: Without a specific configuration, the anonymization by default will never export the content of this table because of the sensible data in it. You need to have your own anonymization configuration to handle those data.
 
-
 == Advanced anonymization
 
+The default anonymization can be customized. A list of anonymization action can be configuration for each columns of each tables. All the actions will be applied in the order as declared in the configuration file.
+
+There is one exception, the `REPLACE_WITH_OTHER` actions will be applied after all other actions listed for a column and columns for a row, because we want to ensure that the replacement value (value from another column of the same row) has been anonymized first.
+
 === Generate a sample configuration for data anonymization
 
-Before performing a full export, you can configure the anonymization of specific fields. To assist with this, a command is available in the tool to generate a sample configuration file based on a default setup, allowing you to choose which columns and tables to anonymize.
+Before performing a full export, you can customize the anonymization configuration of specific fields. To assist you with this, a command is available in the tool that generate a sample configuration file based on a default setup, allowing you to choose which columns and tables to anonymize.
 
 The command `gen_default_anon_conf` has been added to the export tool to streamline this process. If needed, you can use the `--output` argument to specify the location for the generated file.
 
 [NOTE]
 ====
-The generated file itself is only a sample of the configuration file, the anonymization section. You'll need to copy and paste that part into your own configuration file used by your export tool.
+The generated file itself is only a sample part of a configuration file, it only generate the anonymization section. You'll need to copy and paste that part into your own configuration file used by your export tool (the `application.yaml` file).
 ====
 
-The generated configuration will also contains all your data contracts key to allow you a convenient way to anonymize them.
+The generated configuration will also list all discovered contract data "keys" to let you know what can be anonymized or not and how (see xref:_contract_data_anonymization[Contract data anonymization]).
 
 == Example of a generated configuration
 
 After executing the command `gen_default_anon_conf`, you will get a configuration file with the anonymization section filled with the default anonymization rules, like below .
 
 [source,yaml]
+----
 bpi:
   anonymizations:
     global:
@@ -140,29 +143,30 @@ bpi:
 
 === Anonymization Rules
 
-Anonymization rules are defined in the configuration file under the `bpi.anonymizations.rules` section. This section contains a list of tables and fields that require anonymization, along with the actions to be performed on them. Each table contains a columns needs to be anonymized sort by their name except if a column contains a `REPLACE_WITH_OTHER` action defined. In this case, the anonymization of the column with this action will be executed at the end of the list to get the anonymized value of the target column.
+Anonymization rules are defined in the configuration file under the `bpi.anonymizations.rules` section. This section contains a list of tables and fields that require anonymization, along with the actions to be performed on them. Each table contains columns that needs to be anonymized sorted by their name.
+If a column contains a `REPLACE_WITH_OTHER` action defined, the anonymization of the column with this action will be executed at the end of the list to get the anonymized value of the targeted replacement column.
 
 .Example
 [source, yaml]
 ----
-  anonymizations:
-    rules:
-     arch_flownode_instance:
-        displayname:
-          actions:
-            - action: REPLACE
-              value: ''
-        displaydescription:
-          actions:
-            - action: REPLACE_WITH_OTHER
-              value: description
-        description:
-          actions:
-            - action: REPLACE
-              value: 'New Value
+anonymizations:
+  rules:
+    arch_flownode_instance:
+      displayname:
+        actions:
+          - action: REPLACE
+            value: ''
+      displaydescription:
+        actions:
+          - action: REPLACE_WITH_OTHER
+            value: description
+      description:
+        actions:
+          - action: REPLACE
+            value: 'New Value'
 ----
 
-In this example, the `arch_flownode_instance` table contains three columns that require anonymization: `displayname`, `displaydescription`, and `description`. First the `description` column will have its value replaced with the string `New Value`. Then the `displayname` column will have its value replaced with an empty string. Finally the `displaydescription` column will be replaced with the value of the `description` column.
+In this example, the `arch_flownode_instance` table contains three columns that require anonymization: `displayname`, `displaydescription`, and `description`. First the `description` column will have its value replaced with the string `New Value`. Then the `displayname` column will have its value replaced with an empty string. Finally the `displaydescription` column will be replaced with the value of the `description` column ( => `New Value` ).
 
 
 === Content max size
diff --git a/modules/cli/pages/index.adoc b/modules/cli/pages/index.adoc
index f823fd5..03bd1c4 100644
--- a/modules/cli/pages/index.adoc
+++ b/modules/cli/pages/index.adoc
@@ -1,23 +1,28 @@
-= Command Line tool for Export
+= How to export data from Bonita for BPI
 :description: Explain how to use and configure the CLI to export data from a Bonita database
 
 == Overview
-The Command Line tool is used to provision data extracted from Bonita instances into a Bonita Process Insights environment for deeper process analysis.
+A Command Line Interface tool (CLI) is used to provision data extracted from Bonita instances into a Bonita Process Insights environment for deeper process analysis.
 
-The command line tool is delivered in a package, containing a basic documentation about commands and a sample configuration file.
+The command line tool is delivered as a package, containing a basic documentation about commands and a sample configuration file.
 
 == Export data
+
 === Configuration
+
 Before exporting, you need to configure the connection to the Bonita Instance from which you want to export the data.
 
 Export tool can support two type of databases Postgresql and Oracle. The jdbc url must be adapted according to the type of database you will use. 
 
 * **Oracle** :    
+
 [source,yaml]
 ----
 jdbc-url: jdbc:oracle:thin:@${bonita.database.host}:${bonita.database.port}/${bonita.database.name}?oracle.net.disableOob=true`jdbc-url`
 ----
+
 * **PostgresSql** : 
+
 [source,yaml]
 ----
 jdbc-url: jdbc:postgresql://${bonita.database.host}:${bonita.database.port}/${bonita.database.name}
@@ -29,12 +34,14 @@ After you finish to configure your file, place it next to the executable jar dir
 ====
 
 === Exporting data and Importing to BPI
+
 To export your data, use the following command line :
 `pi-cli bonita export`
 
 You can add some arguments like `-output` to specify the exact path of the exported zip file. 
 
 === Anonymize exported data
+
 By default, your exported data will be anonymized. It's possible to deactivate the anonymization or adding your own configuration. 
 
 For more details, see xref:configuration-for-anonymization.adoc[Configure the anonymization]
\ No newline at end of file
diff --git a/modules/cli/taxonomy.adoc b/modules/cli/taxonomy.adoc
index 0a28826..1e9b413 100644
--- a/modules/cli/taxonomy.adoc
+++ b/modules/cli/taxonomy.adoc
@@ -1,3 +1,3 @@
-* xref:index.adoc[Command line for Exporting from Bonita]
-** xref:configuration-for-anonymization.adoc[Configure the anonymization]
-*** xref:actions-anonymization.adoc[Actions types for anonymization]
+* xref:index.adoc[Exporting data from Bonita]
+** xref:configuration-for-anonymization.adoc[How to configure anonymization]
+*** xref:actions-anonymization.adoc[Anonymization actions]