Bravos-Power · staadecker · Nov 8, 2024 · Nov 8, 2024 · Nov 8, 2024 · Nov 8, 2024
diff --git a/docs/quadratic_expressions.md b/docs/quadratic_expressions.md
@@ -0,0 +1,34 @@
+# Quadratic Expressions
+
+Quadratic expressions work as you'd expect. Simply multiply two linear expression together (or square an expression with `**2`) and you'll get a quadratic. The quadratic can then be used in constraints or the objective. 
+
+## Example
+
+### Maximize area of box
+Here's a short example that shows that a square maximizes the area of any box with a fixed perimeter.
+
+```python3
+import pyoframe as pf
+model = pf.Model("max")
+model.w = pf.Variable(lb=0)
+model.h = pf.Variable(lb=0)
+model.limit_perimter = 2 * (model.w + model.h) <= 20
+model.objective = model.w * model.h
+model.solve()
+print(f"It's a square: {model.w.solution==model.h.solution}")
+
+# Outputs: It's a square: True
+```
+### Facility Location Problem
+
+See [examples/facility_location](../tests/examples/facility_location/).
+
+## Note for Pyoframe developers: Internal Representation of Quadratics
+
+Internally, Pyoframe's `Expression` object is used for both linear and quadratic expressions. When the dataframe within an `Expression` object (i.e. `Expression.data`) contains an additional column (named `__quadratic_variable_id`) we know that the expression is a quadratic.
+
+This extra column stores the ID of the second variable in quadratic terms. For terms with only one variable, this column contains ID `0` (a reserved variable ID which can thought of as meaning 'no variable'). The variables in a quadratic are rearranged such that the ID in the `__variable_id` column is always greater or equal than the variable ID in the `__quadratic_variable_id` (recall: a*b=b*a). This rearranging not only ensures that a*b+b*a=2a*b but also generates a useful property: If the variable ID in the first column (`__variable_id`) is `0` we know the variable ID in the second must also be `0` and therefore the term must be a constant.
+
+The additional quadratic variable ID column is automatically dropped if through arithmetic the quadratic terms cancel out.
+
+One final detail: the `.to_str()` method on constraints and the objective has a special `quadratic_divider` parameter which is used when writing to a .lp file--a file format that requires quadratic terms to be enclosed by brackets and, in the case of the objective function, divided by 2. See [Gurobi Documentation](https://www.gurobi.com/documentation/current/refman/lp_format.html)
diff --git a/src/pyoframe/_arithmetic.py b/src/pyoframe/_arithmetic.py
@@ -3,6 +3,9 @@
 
 from pyoframe.constants import (
     COEF_KEY,
+    CONST_TERM,
+    QUAD_VAR_KEY,
+    VAR_TYPE,
     RESERVED_COL_KEYS,
     VAR_KEY,
     UnmatchedStrategy,
@@ -15,6 +18,41 @@
     from pyoframe.core import Expression
 
 
+def _multiply_expressions(self: "Expression", other: "Expression") -> "Expression":
+    """
+    Multiply two or more expressions together.
+
+    Examples:
+        >>> import pyoframe as pf
+        >>> m = pf.Model("min")
+        >>> m.x1 = pf.Variable()
+        >>> m.x2 = pf.Variable()
+        >>> m.x3 = pf.Variable()
+        >>> result = 5 * m.x1 * m.x2
+        >>> result
+        <Expression size=1 dimensions={} terms=1 degree=2>
+        5 x2 * x1
+        >>> result * m.x3
+        Traceback (most recent call last):
+        ...
+        pyoframe.constants.PyoframeError: Failed to multiply expressions:
+        <Expression size=1 dimensions={} terms=1 degree=2> * <Expression size=1 dimensions={} terms=1>
+        Due to error:
+        Cannot multiply a quadratic expression by a non-constant.
+    """
+    try:
+        return _multiply_expressions_core(self, other)
+    except PyoframeError as error:
+        raise PyoframeError(
+            "Failed to multiply expressions:\n"
+            + " * ".join(
+                e.to_str(include_header=True, include_data=False) for e in [self, other]
+            )
+            + "\nDue to error:\n"
+            + str(error)
+        ) from error
+
+
 def _add_expressions(*expressions: "Expression") -> "Expression":
     try:
         return _add_expressions_core(*expressions)
@@ -29,6 +67,98 @@ def _add_expressions(*expressions: "Expression") -> "Expression":
         ) from error
 
 
+def _multiply_expressions_core(self: "Expression", other: "Expression") -> "Expression":
+    self_degree, other_degree = self.degree(), other.degree()
+    if self_degree + other_degree > 2:
+        # We know one of the two must be a quadratic since 1 + 1 is not greater than 2.
+        raise PyoframeError("Cannot multiply a quadratic expression by a non-constant.")
+    if self_degree < other_degree:
+        self, other = other, self
+        self_degree, other_degree = other_degree, self_degree
+    if other_degree == 1:
+        assert (
+            self_degree == 1
+        ), "This should always be true since the sum of degrees must be <=2."
+        return _quadratic_multiplication(self, other)
+
+    assert (
+        other_degree == 0
+    ), "This should always be true since other cases have already been handled."
+    multiplier = other.data.drop(
+        VAR_KEY
+    )  # QUAD_VAR_KEY doesn't need to be dropped since we know it doesn't exist
+
+    dims = self.dimensions_unsafe
+    other_dims = other.dimensions_unsafe
+    dims_in_common = [dim for dim in dims if dim in other_dims]
+
+    data = (
+        self.data.join(
+            multiplier,
+            on=dims_in_common if len(dims_in_common) > 0 else None,
+            how="inner" if dims_in_common else "cross",
+        )
+        .with_columns(pl.col(COEF_KEY) * pl.col(COEF_KEY + "_right"))
+        .drop(COEF_KEY + "_right")
+    )
+
+    return self._new(data)
+
+
+def _quadratic_multiplication(self: "Expression", other: "Expression") -> "Expression":
+    """
+    Multiply two expressions of degree 1.
+
+    Examples:
+        >>> import polars as pl
+        >>> from pyoframe import Variable
+        >>> df = pl.DataFrame({"dim": [1, 2, 3], "value": [1, 2, 3]})
+        >>> x1 = Variable()
+        >>> x2 = Variable()
+        >>> expr1 = df * x1
+        >>> expr2 = df * x2 * 2 + 4
+        >>> expr1 * expr2
+        <Expression size=3 dimensions={'dim': 3} terms=6 degree=2>
+        [1]: 4 x1 +2 x2 * x1
+        [2]: 8 x1 +8 x2 * x1
+        [3]: 12 x1 +18 x2 * x1
+        >>> (expr1 * expr2) - df * x1 * df * x2 * 2
+        <Expression size=3 dimensions={'dim': 3} terms=3>
+        [1]: 4 x1
+        [2]: 8 x1
+        [3]: 12 x1
+    """
+    dims = self.dimensions_unsafe
+    other_dims = other.dimensions_unsafe
+    dims_in_common = [dim for dim in dims if dim in other_dims]
+
+    data = (
+        self.data.join(
+            other.data,
+            on=dims_in_common if len(dims_in_common) > 0 else None,
+            how="inner" if dims_in_common else "cross",
+        )
+        .with_columns(pl.col(COEF_KEY) * pl.col(COEF_KEY + "_right"))
+        .drop(COEF_KEY + "_right")
+        .rename({VAR_KEY + "_right": QUAD_VAR_KEY})
+        # Swap VAR_KEY and QUAD_VAR_KEY so that VAR_KEy is always the larger one
+        .with_columns(
+            pl.when(pl.col(VAR_KEY) < pl.col(QUAD_VAR_KEY))
+            .then(pl.col(QUAD_VAR_KEY))
+            .otherwise(pl.col(VAR_KEY))
+            .alias(VAR_KEY),
+            pl.when(pl.col(VAR_KEY) < pl.col(QUAD_VAR_KEY))
+            .then(pl.col(VAR_KEY))
+            .otherwise(pl.col(QUAD_VAR_KEY))
+            .alias(QUAD_VAR_KEY),
+        )
+    )
+
+    data = _sum_like_terms(data)
+
+    return self._new(data)
+
+
 def _add_expressions_core(*expressions: "Expression") -> "Expression":
     # Mapping of how a sum of two expressions should propogate the unmatched strategy
     propogatation_strategies = {
@@ -162,11 +292,24 @@ def get_indices(expr):
         propogate_strat = expressions[0].unmatched_strategy
         expr_data = [expr.data for expr in expressions]
 
+    # Add quadratic column if it is needed and doesn't already exist
+    if any(QUAD_VAR_KEY in df.columns for df in expr_data):
+        expr_data = [
+            (
+                df.with_columns(pl.lit(CONST_TERM).alias(QUAD_VAR_KEY).cast(VAR_TYPE))
+                if QUAD_VAR_KEY not in df.columns
+                else df
+            )
+            for df in expr_data
+        ]
+
     # Sort columns to allow for concat
-    expr_data = [e.select(sorted(e.columns)) for e in expr_data]
+    expr_data = [
+        e.select(dims + [c for c in e.columns if c not in dims]) for e in expr_data
+    ]
 
     data = pl.concat(expr_data, how="vertical_relaxed")
-    data = data.group_by(dims + [VAR_KEY], maintain_order=True).sum()
+    data = _sum_like_terms(data)
 
     new_expr = expressions[0]._new(data)
     new_expr.unmatched_strategy = propogate_strat
@@ -214,6 +357,20 @@ def _add_dimension(self: "Expression", target: "Expression") -> "Expression":
     return self._new(result)
 
 
+def _sum_like_terms(df: pl.DataFrame) -> pl.DataFrame:
+    """Combines terms with the same variables. Removes quadratic column if they all happen to cancel."""
+    dims = [c for c in df.columns if c not in RESERVED_COL_KEYS]
+    var_cols = [VAR_KEY] + ([QUAD_VAR_KEY] if QUAD_VAR_KEY in df.columns else [])
+    df = (
+        df.group_by(dims + var_cols, maintain_order=True)
+        .sum()
+        .filter(pl.col(COEF_KEY) != 0)
+    )
+    if QUAD_VAR_KEY in df.columns and (df.get_column(QUAD_VAR_KEY) == CONST_TERM).all():
+        df = df.drop(QUAD_VAR_KEY)
+    return df
+
+
 def _get_dimensions(df: pl.DataFrame) -> Optional[List[str]]:
     """
     Returns the dimensions of the DataFrame. Reserved columns do not count as dimensions.

diff --git a/src/pyoframe/constants.py b/src/pyoframe/constants.py
@@ -20,17 +20,31 @@
 
 COEF_KEY = "__coeff"
 VAR_KEY = "__variable_id"
+QUAD_VAR_KEY = "__quadratic_variable_id"
 CONSTRAINT_KEY = "__constraint_id"
 SOLUTION_KEY = "solution"
 DUAL_KEY = "dual"
 RC_COL = "RC"
 SLACK_COL = "slack"
 
-CONST_TERM = 0
+COL_DTYPES = {
+    COEF_KEY: pl.Float64,
+    VAR_KEY: pl.UInt32,
+    QUAD_VAR_KEY: pl.UInt32,
+    CONSTRAINT_KEY: pl.UInt32,
+    SOLUTION_KEY: pl.Float64,
+    DUAL_KEY: pl.Float64,
+    RC_COL: pl.Float64,
+    SLACK_COL: pl.Float64,
+}
+VAR_TYPE = COL_DTYPES[VAR_KEY]
+
+CONST_TERM = 0  # 0 is a reserved value which makes it easy to detect. It must be zero for e.g. ensuring consistency during quadratic multiplication.
 
 RESERVED_COL_KEYS = (
     COEF_KEY,
     VAR_KEY,
+    QUAD_VAR_KEY,
     CONSTRAINT_KEY,
     SOLUTION_KEY,
     DUAL_KEY,