From 18cd68720e71619ca11da4be05e2362798d4b905 Mon Sep 17 00:00:00 2001
From: Slobodan Ilic <slobodan@crunch.io>
Date: Mon, 11 Sep 2023 17:20:51 +0200
Subject: [PATCH] Add examples for MapArray.from_arrays

---
 python/pyarrow/array.pxi | 73 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 73 insertions(+)

diff --git a/python/pyarrow/array.pxi b/python/pyarrow/array.pxi
index e26b1ad3291b5..1d7501f81e2b4 100644
--- a/python/pyarrow/array.pxi
+++ b/python/pyarrow/array.pxi
@@ -2358,11 +2358,84 @@ cdef class MapArray(ListArray):
         offsets : array-like or sequence (int32 type)
         keys : array-like or sequence (any type)
         items : array-like or sequence (any type)
+        mask : pyarrow.Array[bool] (optional)
+            Indicate which values are null (True) or not null (False).
         pool : MemoryPool
 
         Returns
         -------
         map_array : MapArray
+
+        Examples
+        --------
+        First construct a rectangular model of the data. The total of 5 respondents
+        answered the question "How much did you like the movie x?". The value -1
+        means that the values is missing.
+
+        >>> movies_rectangular = np.ma.masked_array([
+        >>>     [10, -1, -1],
+        >>>     [8, 4, 5],
+        >>>     [-1, 10, 3],
+        >>>     [-1, -1, -1],
+        >>>     [-1, -1, -1]
+        >>> ],
+        >>> [
+        >>>     [False, True, True],
+        >>>     [False, False, False],
+        >>>     [True, False, False],
+        >>>     [True, True, True],
+        >>>     [True, True, True],
+        >>> ])
+
+        To represent the same data with the MapArray and from_arrays, the data is
+        formed like this:
+
+        >>> offsets = [
+        >>>     0, #  -- row 1 start
+        >>>     1, #  -- row 2 start
+        >>>     4, #  -- row 3 start
+        >>>     6, #  -- row 4 start
+        >>>     6, #  -- row 5 start
+        >>>     6, #  -- row 5 end
+        >>> ]
+        >>> movies = [
+        >>>     "Dark Knight", #  ---------------------------------- row 1
+        >>>     "Dark Knight", "Meet the Parents", "Superman", #  -- row 2
+        >>>     "Meet the Parents", "Superman", #  ----------------- row 3
+        >>> ]
+        >>> likings = [
+        >>>     10, #  -------- row 1
+        >>>     8, 4, 9, #  --- row 2
+        >>>     10, 5 #  ------ row 3
+        >>> ]
+        >>> pa.MapArray.from_arrays(offsets, movies, likings).to_pandas()
+        0                                  [(Dark Knight, 10)]
+        1    [(Dark Knight, 8), (Meet the Parents, 4), (Sup...
+        2              [(Meet the Parents, 10), (Superman, 5)]
+        3                                                   []
+        4                                                   []
+        dtype: object
+
+        If the data in the empty rows needs to be marked as missing, it's possible
+        to do so by modifying the offsets argument, so that we specify `None` as
+        the starting positions of the rows we want marked as missing. The end row
+        offset still has to refer to the existing value from keys (and values):
+
+        >>> offsets = [
+        >>>     0, #  ----- row 1 start
+        >>>     1, #  ----- row 2 start
+        >>>     4, #  ----- row 3 start
+        >>>     None, #  -- row 4 start
+        >>>     None, #  -- row 5 start
+        >>>     6, #  ----- row 5 end
+        >>> ]
+        >>> pa.MapArray.from_arrays(offsets, movies, likings).to_pandas()
+        0                                  [(Dark Knight, 10)]
+        1    [(Dark Knight, 8), (Meet the Parents, 4), (Sup...
+        2              [(Meet the Parents, 10), (Superman, 5)]
+        3                                                 None
+        4                                                 None
+        dtype: object
         """
         cdef:
             Array _offsets, _keys, _items