From 7cab163db778f2f25ad34f0ff4b12a3f08566542 Mon Sep 17 00:00:00 2001
From: joke1196 <joke1196@users.noreply.github.com>
Date: Tue, 28 Jan 2025 08:51:57 +0000
Subject: [PATCH 1/2] Create rule S7187

---
 rules/S7187/metadata.json        |  2 ++
 rules/S7187/python/metadata.json | 25 ++++++++++++++++++
 rules/S7187/python/rule.adoc     | 44 ++++++++++++++++++++++++++++++++
 3 files changed, 71 insertions(+)
 create mode 100644 rules/S7187/metadata.json
 create mode 100644 rules/S7187/python/metadata.json
 create mode 100644 rules/S7187/python/rule.adoc

diff --git a/rules/S7187/metadata.json b/rules/S7187/metadata.json
new file mode 100644
index 00000000000..2c63c085104
--- /dev/null
+++ b/rules/S7187/metadata.json
@@ -0,0 +1,2 @@
+{
+}
diff --git a/rules/S7187/python/metadata.json b/rules/S7187/python/metadata.json
new file mode 100644
index 00000000000..b1742f82691
--- /dev/null
+++ b/rules/S7187/python/metadata.json
@@ -0,0 +1,25 @@
+{
+  "title": "FIXME",
+  "type": "CODE_SMELL",
+  "status": "ready",
+  "remediation": {
+    "func": "Constant\/Issue",
+    "constantCost": "5min"
+  },
+  "tags": [
+  ],
+  "defaultSeverity": "Major",
+  "ruleSpecification": "RSPEC-7187",
+  "sqKey": "S7187",
+  "scope": "All",
+  "defaultQualityProfiles": ["Sonar way"],
+  "quickfix": "unknown",
+  "code": {
+    "impacts": {
+      "MAINTAINABILITY": "HIGH",
+      "RELIABILITY": "MEDIUM",
+      "SECURITY": "LOW"
+    },
+    "attribute": "CONVENTIONAL"
+  }
+}
diff --git a/rules/S7187/python/rule.adoc b/rules/S7187/python/rule.adoc
new file mode 100644
index 00000000000..caae0d69054
--- /dev/null
+++ b/rules/S7187/python/rule.adoc
@@ -0,0 +1,44 @@
+FIXME: add a description
+
+// If you want to factorize the description uncomment the following line and create the file.
+//include::../description.adoc[]
+
+== Why is this an issue?
+
+FIXME: remove the unused optional headers (that are commented out)
+
+//=== What is the potential impact?
+
+== How to fix it
+//== How to fix it in FRAMEWORK NAME
+
+=== Code examples
+
+==== Noncompliant code example
+
+[source,python,diff-id=1,diff-type=noncompliant]
+----
+FIXME
+----
+
+==== Compliant solution
+
+[source,python,diff-id=1,diff-type=compliant]
+----
+FIXME
+----
+
+//=== How does this work?
+
+//=== Pitfalls
+
+//=== Going the extra mile
+
+
+//== Resources
+//=== Documentation
+//=== Articles & blog posts
+//=== Conference presentations
+//=== Standards
+//=== External coding guidelines
+//=== Benchmarks

From 55266d91b69feca562bd3224365ecb97a24e887a Mon Sep 17 00:00:00 2001
From: David Kunzmann <david.kunzmann@sonarsource.com>
Date: Tue, 28 Jan 2025 15:31:50 +0100
Subject: [PATCH 2/2] Create rule S7187: PySpark Pandas DataFrame columns
 should not use a reserved name

---
 rules/S7187/python/metadata.json | 10 ++++-----
 rules/S7187/python/rule.adoc     | 38 +++++++++++++++-----------------
 2 files changed, 23 insertions(+), 25 deletions(-)

diff --git a/rules/S7187/python/metadata.json b/rules/S7187/python/metadata.json
index b1742f82691..08a4e812622 100644
--- a/rules/S7187/python/metadata.json
+++ b/rules/S7187/python/metadata.json
@@ -1,5 +1,5 @@
 {
-  "title": "FIXME",
+  "title": "PySpark Pandas DataFrame columns should not use a reserved name",
   "type": "CODE_SMELL",
   "status": "ready",
   "remediation": {
@@ -7,18 +7,18 @@
     "constantCost": "5min"
   },
   "tags": [
+    "data-science",
+    "pyspark"
   ],
   "defaultSeverity": "Major",
   "ruleSpecification": "RSPEC-7187",
   "sqKey": "S7187",
   "scope": "All",
   "defaultQualityProfiles": ["Sonar way"],
-  "quickfix": "unknown",
+  "quickfix": "infeasible",
   "code": {
     "impacts": {
-      "MAINTAINABILITY": "HIGH",
-      "RELIABILITY": "MEDIUM",
-      "SECURITY": "LOW"
+      "RELIABILITY": "MEDIUM"
     },
     "attribute": "CONVENTIONAL"
   }
diff --git a/rules/S7187/python/rule.adoc b/rules/S7187/python/rule.adoc
index caae0d69054..926bdbf50df 100644
--- a/rules/S7187/python/rule.adoc
+++ b/rules/S7187/python/rule.adoc
@@ -1,16 +1,18 @@
-FIXME: add a description
-
-// If you want to factorize the description uncomment the following line and create the file.
-//include::../description.adoc[]
+This rule raises an issue when a PySpark Pandas DataFrame column name is set to a reserved name.
 
 == Why is this an issue?
 
-FIXME: remove the unused optional headers (that are commented out)
+PySpark offers powerful APIs to work with Pandas DataFrames in a distributed environment. 
+While the integration between PySpark and Pandas is seamless, there are some caveats that should be taken into account.
 
-//=== What is the potential impact?
+Spark Pandas API uses some special column names for internal purposes. 
+These column names contain leading `++__++` and trailing `++__++`.
+Therefore, when using PySpark with Pandas and naming or renaming columns,
+it is discouraged to use such reserved column names as they are not guaranteed to yield the expected results.
 
 == How to fix it
-//== How to fix it in FRAMEWORK NAME
+
+To fix this issue provide a column name without leading and trailing `++__++`.
 
 === Code examples
 
@@ -18,27 +20,23 @@ FIXME: remove the unused optional headers (that are commented out)
 
 [source,python,diff-id=1,diff-type=noncompliant]
 ----
-FIXME
+import pyspark.pandas as ps
+
+df = ps.DataFrame({'__value__': [1, 2, 3]}) # Noncompliant: __value__ is a reserved column name
 ----
 
 ==== Compliant solution
 
 [source,python,diff-id=1,diff-type=compliant]
 ----
-FIXME
-----
+import pyspark.pandas as ps
 
-//=== How does this work?
+df = ps.DataFrame({'value': [1, 2, 3]}) # Compliant
+----
 
-//=== Pitfalls
 
-//=== Going the extra mile
+== Resources
+=== Documentation
 
+* PySpark Documentation - https://spark.apache.org/docs/latest/api/python/user_guide/pandas_on_spark/best_practices.html#avoid-reserved-column-names[Best Practices]
 
-//== Resources
-//=== Documentation
-//=== Articles & blog posts
-//=== Conference presentations
-//=== Standards
-//=== External coding guidelines
-//=== Benchmarks