Skip to content

Commit

Permalink
[ClangIR][CIRGen] Handle nested union in arrays of struct (#1007)
Browse files Browse the repository at this point in the history
Reproducer:

```
struct nested
{
  union {
    const char *single;
    const char *const *multi;
  } output;
};
static const char * const test[] = {
  "test",
};
const struct nested data[] = 
{
    {
        {
            .multi = test,
        },
    },
    {
        {
            .single = "hello",
        },
    },
};
```

ClangIR now failed to recognize `data` as an array since it failed to
recognize the initializer for union. This comes from a fundamental
difference between CIR and LLVM IR. In LLVM IR, the union is simply a
struct with the largest member. So it is fine to have only one init
element. But in CIR, the union has the information for all members. So
if we only pass a single init element, we may be in trouble. We solve
the problem by appending placeholder attribute for the uninitialized
fields.
  • Loading branch information
ChuanqiXu9 authored Nov 4, 2024
1 parent d2df0f6 commit 6f15a2e
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 8 deletions.
15 changes: 15 additions & 0 deletions clang/include/clang/CIR/Dialect/IR/CIRAttrs.td
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,21 @@ def CIR_BoolAttr : CIR_Attr<"Bool", "bool", [TypedAttrInterface]> {
}];
}

//===----------------------------------------------------------------------===//
// InactiveUnionFieldAttr
//===----------------------------------------------------------------------===//

def InactiveUnionFieldAttr : CIR_Attr<"InactiveUnionField", "inactive_field", [TypedAttrInterface]> {
let summary = "Attribute to represent an uninitialized field for a union.";
let description = [{
The InactiveUnionFieldAttr is used to represent an uninitialized field
for a union.
}];

let parameters = (ins AttributeSelfTypeParameter<"">:$type);
let assemblyFormat = [{}];
}

//===----------------------------------------------------------------------===//
// ZeroAttr
//===----------------------------------------------------------------------===//
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/CIR/CodeGen/CIRGenBuilder.h
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,9 @@ class CIRGenBuilderTy : public CIRBaseBuilderTy {
return true;
}

if (mlir::isa<mlir::cir::InactiveUnionFieldAttr>(attr))
return true;

llvm_unreachable("NYI");
}

Expand Down
29 changes: 27 additions & 2 deletions clang/lib/CIR/CodeGen/CIRGenExprConst.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -378,15 +378,40 @@ mlir::Attribute ConstantAggregateBuilder::buildFrom(
CharUnits AlignedSize = Size.alignTo(Align);

bool Packed = false;
ArrayRef<mlir::Attribute> UnpackedElems = Elems;
ArrayRef<mlir::Attribute> UnpackedElems;

// Fill the init elements for union. This comes from a fundamental
// difference between CIR and LLVM IR. In LLVM IR, the union is simply a
// struct with the largest member. So it is fine to have only one init
// element. But in CIR, the union has the information for all members. So if
// we only pass a single init element, we may be in trouble. We solve the
// problem by appending placeholder attribute for the uninitialized fields.
if (auto desired = dyn_cast<mlir::cir::StructType>(DesiredTy);
desired && desired.isUnion() &&
Elems.size() != desired.getNumElements()) {
llvm::SmallVector<mlir::Attribute, 32> UnionElemsStorage;

for (auto elemTy : desired.getMembers()) {
if (auto Ty = mlir::dyn_cast<mlir::TypedAttr>(Elems.back());
Ty && Ty.getType() == elemTy)
UnionElemsStorage.push_back(Elems.back());
else
UnionElemsStorage.push_back(mlir::cir::InactiveUnionFieldAttr::get(
CGM.getBuilder().getContext(), elemTy));
}

UnpackedElems = UnionElemsStorage;
} else
UnpackedElems = Elems;

llvm::SmallVector<mlir::Attribute, 32> UnpackedElemStorage;
if (DesiredSize < AlignedSize || DesiredSize.alignTo(Align) != DesiredSize) {
NaturalLayout = false;
Packed = true;
} else if (DesiredSize > AlignedSize) {
// The natural layout would be too small. Add padding to fix it. (This
// is ignored if we choose a packed layout.)
UnpackedElemStorage.assign(Elems.begin(), Elems.end());
UnpackedElemStorage.assign(UnpackedElems.begin(), UnpackedElems.end());
UnpackedElemStorage.push_back(Utils.getPadding(DesiredSize - Size));
UnpackedElems = UnpackedElemStorage;
}
Expand Down
9 changes: 9 additions & 0 deletions clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,15 @@ mlir::Value lowerCirAttrAsValue(mlir::Operation *parentOp,

// Iteratively lower each constant element of the struct.
for (auto [idx, elt] : llvm::enumerate(constStruct.getMembers())) {
if (auto constStructType =
dyn_cast<mlir::cir::StructType>(constStruct.getType());
constStructType && constStructType.isUnion()) {
if (isa<mlir::cir::InactiveUnionFieldAttr>(elt))
continue;

idx = 0;
}

mlir::Value init = lowerCirAttrAsValue(parentOp, elt, rewriter, converter);
result = rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx);
}
Expand Down
33 changes: 33 additions & 0 deletions clang/test/CIR/CodeGen/nested-union-array.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir
// RUN: FileCheck --input-file=%t.cir %s --check-prefix=CIR
// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t.ll
// RUN: FileCheck --input-file=%t.ll %s --check-prefix=LLVM

struct nested
{
union {
const char *single;
const char *const *multi;
} output;
};
static const char * const test[] = {
"test",
};
const struct nested data[] =
{
{
{
.multi = test,
},
},
{
{
.single = "hello",
},
},
};

// CIR: ![[ANON_TY:.+]] = !cir.struct<union "anon.0" {!cir.ptr<!s8i>, !cir.ptr<!cir.ptr<!s8i>>
// CIR: ![[NESTED_TY:.+]] = !cir.struct<struct "nested" {![[ANON_TY]]
// CIR: cir.global constant external @data = #cir.const_array<[#cir.const_struct<{#cir.const_struct<{#cir.inactive_field : !cir.ptr<!s8i>, #cir.global_view<@test> : !cir.ptr<!cir.ptr<!s8i>>}> : ![[ANON_TY]]}> : ![[NESTED_TY:.+]], #cir.const_struct<{#cir.const_struct<{#cir.global_view<@".str"> : !cir.ptr<!s8i>, #cir.inactive_field : !cir.ptr<!cir.ptr<!s8i>>}> : ![[ANON_TY]]}> : ![[NESTED_TY:.+]]]> : !cir.array<![[NESTED_TY:.+]] x 2>
// LLVM: @data = constant [2 x {{.*}}]
13 changes: 7 additions & 6 deletions clang/test/CIR/CodeGen/union-init.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@ void foo(int x) {
A a = {.x = x};
}

// CHECK-DAG: ![[anon0:.*]] = !cir.struct<struct {!u32i}>
// CHECK-DAG: ![[anon:.*]] = !cir.struct<struct {!s32i}>
// CHECK-DAG: ![[TY_U:.*]] = !cir.struct<union "U" {!s32i}>
// CHECK-DAG: ![[anon0:.*]] = !cir.struct<struct {{.*}}{!u32i}
// CHECK-DAG: ![[TY_u:.*]] = !cir.struct<union {{.*}}{!s32i, !cir.float}
// CHECK-DAG: #[[bfi_x:.*]] = #cir.bitfield_info<name = "x", storage_type = !u32i, size = 16, offset = 0, is_signed = true>
// CHECK-DAG: #[[bfi_y:.*]] = #cir.bitfield_info<name = "y", storage_type = !u32i, size = 16, offset = 16, is_signed = true>
// CHECK-DAG: ![[TY_A:.*]] = !cir.struct<union "A" {!s32i, ![[anon0]]}>
// CHECK-DAG: ![[anon1:.*]] = !cir.struct<union "{{.*}}" {!u32i, !cir.array<!u8i x 4>}

// CHECK-LABEL: cir.func @foo(
Expand All @@ -32,7 +34,7 @@ void foo(int x) {
// CHECK: cir.return

union { int i; float f; } u = { };
// CHECK: cir.global external @u = #cir.zero : ![[anon]]
// CHECK: cir.global external @u = #cir.zero : ![[TY_u]]

unsigned is_little(void) {
const union {
Expand All @@ -43,9 +45,8 @@ unsigned is_little(void) {
}

// CHECK: cir.func @is_little
// CHECK: %[[VAL_1:.*]] = cir.get_global @is_little.one : !cir.ptr<![[anon0]]>
// CHECK: %[[VAL_2:.*]] = cir.cast(bitcast, %[[VAL_1]] : !cir.ptr<![[anon0]]>), !cir.ptr<![[anon1]]>
// CHECK: %[[VAL_3:.*]] = cir.get_member %[[VAL_2]][1] {name = "c"} : !cir.ptr<![[anon1]]> -> !cir.ptr<!cir.array<!u8i x 4>>
// CHECK: %[[VAL_1:.*]] = cir.get_global @is_little.one : !cir.ptr<![[anon1]]>
// CHECK: %[[VAL_2:.*]] = cir.get_member %[[VAL_1]][1] {name = "c"} : !cir.ptr<![[anon1]]> -> !cir.ptr<!cir.array<!u8i x 4>>

typedef union {
int x;
Expand Down

0 comments on commit 6f15a2e

Please sign in to comment.