From 2aa605a1d99ee5f08bb145237bbb0bef19403657 Mon Sep 17 00:00:00 2001 From: Shubham Verma Date: Wed, 22 Jan 2025 11:03:04 -0500 Subject: [PATCH] z: NULL initialize dataAddr field for 0 size arrays Update array inline allocation sequence to initialize dataAddr field only for non-zero size arrays. Field should be left blank for zero size arrays. Signed-off-by: Shubham Verma --- .../compiler/z/codegen/J9TreeEvaluator.cpp | 302 ++++++++++-------- 1 file changed, 170 insertions(+), 132 deletions(-) diff --git a/runtime/compiler/z/codegen/J9TreeEvaluator.cpp b/runtime/compiler/z/codegen/J9TreeEvaluator.cpp index 2af9369469f..780b761df70 100644 --- a/runtime/compiler/z/codegen/J9TreeEvaluator.cpp +++ b/runtime/compiler/z/codegen/J9TreeEvaluator.cpp @@ -4869,11 +4869,6 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, TR::LabelSymbol *cFlowRegionDone = generateLabelSymbol(cg); TR::LabelSymbol *oolFailLabel = generateLabelSymbol(cg); -#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) - bool isOffHeapAllocationEnabled = TR::Compiler->om.isOffHeapAllocationEnabled(); - TR::LabelSymbol *populateFirstDimDataAddrSlot = isOffHeapAllocationEnabled ? generateLabelSymbol(cg) : NULL; -#endif /* defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) */ - // oolJumpLabel is a common point that all branches will jump to. From this label, we branch to OOL code. // We do this instead of jumping directly to OOL code from mainline because the RA can only handle the case where there's // a single jump point to OOL code. @@ -4888,13 +4883,18 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, TR::Register *dimReg = cg->evaluate(secondChild); TR::Register *classReg = cg->evaluate(thirdChild); - // In the mainline, first load the first and second dimensions' lengths into registers. + /* In the mainline, first load the first and second dimensions' lengths into registers. + * + * LGF is used instead of L so that array size register can be used to NULL dataAddr + * field for 0-size arrays. dataAddr field is 64 bits whereas size field is 32 bits so + * we need LGF to clear upper 32 bits of the register. + */ TR::Register *firstDimLenReg = cg->allocateRegister(); cursor = generateRXInstruction(cg, TR::InstOpCode::LGF, node, firstDimLenReg, generateS390MemoryReference(dimsPtrReg, 4, cg)); iComment("Load 1st dim length."); TR::Register *secondDimLenReg = cg->allocateRegister(); - cursor = generateRXInstruction(cg, TR::InstOpCode::L, node, secondDimLenReg, generateS390MemoryReference(dimsPtrReg, 0, cg)); + cursor = generateRXInstruction(cg, TR::InstOpCode::LGF, node, secondDimLenReg, generateS390MemoryReference(dimsPtrReg, 0, cg)); iComment("Load 2nd dim length."); // Check to see if second dimension is indeed 0. If yes, then proceed to handle the case here. Otherwise jump to OOL code. @@ -4948,33 +4948,26 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, iComment("Init 1st dim mustBeZero field."); cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, firstDimLenReg, generateS390MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg)); iComment("Init 1st dim size field."); + if (TR::Compiler->om.compressObjectReferences()) + { + // Clear padding in Discontiguous array header layout. Since size field is 4 bytes wide, adding 4 to size field gets us to the padding. + cursor = generateSILInstruction(cg, TR::InstOpCode::MVHI, node, generateS390MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousArraySizeField() + 4, cg), 0); + iComment("Clear padding."); } #if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) - if (isOffHeapAllocationEnabled) - { - TR_ASSERT_FATAL_WITH_NODE(node, - (TR::Compiler->om.compressObjectReferences() - && (fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8) - || (!TR::Compiler->om.compressObjectReferences() - && fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField()), - "Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array if using compressed refs, " - "or same if using full refs. But was %d bytes for discontiguous and %d bytes for contiguous array.\n", - fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField()); - - // Load dataAddr slot offset difference since 0 size arrays are treated as discontiguous. - generateRIInstruction(cg, - TR::InstOpCode::LGHI, - node, - temp1Reg, - static_cast(fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField())); - cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, populateFirstDimDataAddrSlot); - } - else + bool isOffHeapAllocationEnabled = TR::Compiler->om.isOffHeapAllocationEnabled(); + if (isOffHeapAllocationEnabled) + { + // Since 1st dimension is 0 length, we can use firstDimLenReg to clear + // dataAddr field. secondDimLenReg is expected to be NULL at this point. + cursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, firstDimLenReg, generateS390MemoryReference(targetReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg)); + iComment("Clear 1st dim dataAddr field."); + } #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */ - { - cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone); } + + cursor = generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone); iComment("Init class field and jump."); // We end up in this region of the ICF if the first dimension is non-zero and the second dimension is zero. @@ -5026,6 +5019,33 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, iComment("Init 1st dim class field."); cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, firstDimLenReg, generateS390MemoryReference(targetReg, fej9->getOffsetOfContiguousArraySizeField(), cg)); iComment("Init 1st dim size field."); + if (!TR::Compiler->om.compressObjectReferences()) + { + // Clear padding in contiguous array header layout. Since size field is 4 bytes wide, adding 4 to size field gets us to the padding. + cursor = generateSILInstruction(cg, TR::InstOpCode::MVHI, node, generateS390MemoryReference(targetReg, fej9->getOffsetOfContiguousArraySizeField() + 4, cg), 0); + iComment("Clear padding."); + } + + TR::Register *temp3Reg = NULL; + #if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) + if (isOffHeapAllocationEnabled) + { + temp3Reg = cg->allocateRegister(); + // Populate dataAddr slot of 1st dimension array. We don't need to worry + // about zero length array since it has already been taken care of. + generateRXInstruction(cg, + TR::InstOpCode::LA, + node, + temp3Reg, + generateS390MemoryReference(targetReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg)); + cursor = generateRXInstruction(cg, + TR::InstOpCode::STG, + node, + temp3Reg, + generateS390MemoryReference(targetReg, fej9->getOffsetOfContiguousDataAddrField(), cg)); + iComment("populateFirstDimDataAddrSlot."); + } + #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */ // temp2 point to end of 1st dim array i.e. start of 2nd dim generateRRInstruction(cg, TR::InstOpCode::LGR, node, temp2Reg, targetReg); generateRRInstruction(cg, TR::InstOpCode::AGR, node, temp2Reg, temp1Reg); @@ -5056,26 +5076,27 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, iComment("Init 2st dim mustBeZero field."); cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, secondDimLenReg, generateS390MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg)); iComment("Init 2st dim size field."); + if (TR::Compiler->om.compressObjectReferences()) + { + // Clear padding in Discontiguous array header layout. Since size field is 4 bytes wide, adding 4 to size field gets us to the padding. + cursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, secondDimLenReg, generateS390MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousArraySizeField() + 4, cg)); + iComment("Clear padding."); } - TR::Register *temp3Reg = cg->allocateRegister(); - #if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) - if (isOffHeapAllocationEnabled) - { - // Populate dataAddr slot for 2nd dimension zero size array. - generateRXInstruction(cg, - TR::InstOpCode::LA, - node, - temp3Reg, - generateS390MemoryReference(temp2Reg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg)); - generateRXInstruction(cg, - TR::InstOpCode::STG, - node, - temp3Reg, - generateS390MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg)); - } + if (isOffHeapAllocationEnabled) + { + // Since 2nd dimension array is 0 length, we can use secondDimLenReg + // to clear dataAddr field. secondDimLenReg is expected to be NULL + // at this point. + cursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, secondDimLenReg, generateS390MemoryReference(temp2Reg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg)); + iComment("Clear 2nd dim dataAddr field."); + } #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */ + } + + if (NULL == temp3Reg) + temp3Reg = cg->allocateRegister(); // Store 2nd dim element into 1st dim array slot, compress temp2 if needed if (comp->target().is64Bit() && comp->useCompressedPointers()) @@ -5100,18 +5121,7 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, generateRILInstruction(cg, TR::InstOpCode::SLFI, node, firstDimLenReg, 1); generateS390CompareAndBranchInstruction(cg, TR::InstOpCode::CL, node, firstDimLenReg, 0, TR::InstOpCode::COND_BNE, loopLabel, false); -#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) - if (isOffHeapAllocationEnabled) - { - // No offset is needed since 1st dimension array is contiguous. - generateRRInstruction(cg, TR::InstOpCode::getXORRegOpCode(), node, temp1Reg, temp1Reg); - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, populateFirstDimDataAddrSlot); - } - else -#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */ - { - generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone); - } + generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, cFlowRegionDone); TR::RegisterDependencyConditions *dependencies = generateRegisterDependencyConditions(0,10,cg); dependencies->addPostCondition(dimReg, TR::RealRegister::AssignAny); @@ -5128,28 +5138,6 @@ static TR::Register * generateMultianewArrayWithInlineAllocators(TR::Node *node, generateS390LabelInstruction(cg, TR::InstOpCode::label, node, oolJumpLabel); generateS390BranchInstruction(cg, TR::InstOpCode::BRC, TR::InstOpCode::COND_BRC, node, oolFailLabel); -#if defined(J9VM_GC_SPARSE_HEAP_ALLOCATION) - if (isOffHeapAllocationEnabled) - { - /* Populate dataAddr slot of 1st dimension array. Arrays of non-zero size - * use contiguous header layout while zero size arrays use discontiguous header layout. - */ - cursor = generateS390LabelInstruction(cg, TR::InstOpCode::label, node, populateFirstDimDataAddrSlot); - iComment("populateFirstDimDataAddrSlot."); - - generateRXInstruction(cg, - TR::InstOpCode::LA, - node, - temp3Reg, - generateS390MemoryReference(targetReg, temp1Reg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg)); - generateRXInstruction(cg, - TR::InstOpCode::STG, - node, - temp3Reg, - generateS390MemoryReference(targetReg, temp1Reg, fej9->getOffsetOfContiguousDataAddrField(), cg)); - } -#endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */ - generateS390LabelInstruction(cg, TR::InstOpCode::label, node, cFlowRegionDone, dependencies); TR::Register *targetRegisterFinal = cg->allocateCollectedReferenceRegister(); @@ -10762,10 +10750,40 @@ genInitArrayHeader(TR::Node * node, TR::Instruction *& iCursor, bool isVariableL static char * allocZeroArrayWithVM = feGetEnv("TR_VMALLOCZEROARRAY"); - // Zero size arrays are discontiguous. If the array is not discontiguous, it writes the length in first element which should not cause any issue. if(node->canSkipZeroInitialization() && allocZeroArrayWithVM == NULL) + { + // Zero size arrays are discontiguous. If the array is not discontiguous, it writes the length in first element which should not cause any issues. iCursor = generateRXInstruction(cg, TR::InstOpCode::ST, node, eNumReg, generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousArraySizeField(), cg), iCursor); + + // Clear 4 byte padding after the size field. + int32_t paddingOffset = 0; + if (TR::Compiler->om.compressObjectReferences()) + { + /* Padding is present only in discontiguous array header shape. If the + * array is contiguous the offset would map to array element space + * (or dataAddr field if enabled), writing 0 to it won't cause any + * issues since it will get overwritten. + */ + paddingOffset = fej9->getOffsetOfDiscontiguousArraySizeField() + 4; + } + else + { + /* Padding is present only in contiguous array header shape. If the + * array is discontiguous the offset would map to array size field. + * Writing 0 to it won't cause any issues because we only inline + * allocate 0 size arrays which are expected to have 0 in the size + * field. + */ + paddingOffset = fej9->getOffsetOfContiguousArraySizeField() + 4; + } + iCursor = generateSS1Instruction(cg, + TR::InstOpCode::XC, + node, + 3, + generateS390MemoryReference(resReg, paddingOffset, cg), + generateS390MemoryReference(resReg, paddingOffset, cg)); + } } TR::Register * @@ -11126,73 +11144,93 @@ J9::Z::TreeEvaluator::VMnewEvaluator(TR::Node * node, TR::CodeGenerator * cg) * In both scenarios, arrays of non-zero size use contiguous header layout while zero size arrays use * discontiguous header layout. */ - TR::Register *offsetReg = NULL; - TR::MemoryReference *dataAddrMR = NULL; - TR::MemoryReference *dataAddrSlotMR = NULL; - - if (isVariableLen && TR::Compiler->om.compressObjectReferences()) + if (!TR::Compiler->om.compressObjectReferences()) { - /* We need to check enumReg (array size) at runtime to determine correct offset of dataAddr field. - * Here we deal only with compressed refs because dataAddr offset for discontiguous - * and contiguous arrays is the same in full refs. - */ - if (comp->getOption(TR_TraceCG)) - traceMsg(comp, "Node (%p): Dealing with compressed refs variable length array.\n", node); - TR_ASSERT_FATAL_WITH_NODE(node, - (fej9->getOffsetOfDiscontiguousDataAddrField() - fej9->getOffsetOfContiguousDataAddrField()) == 8, - "Offset of dataAddr field in discontiguous array is expected to be 8 bytes more than contiguous array. " - "But was %d bytes for discontiguous and %d bytes for contiguous array.\n", + fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(), + "dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. " + "But was %d bytes for discontiguous and %d bytes for contiguous array.\n", fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField()); - - offsetReg = cg->allocateRegister(); - // Invert enumReg sign. 0 and negative numbers remain unchanged. - iCursor = generateRREInstruction(cg, TR::InstOpCode::LNGFR, node, offsetReg, enumReg, iCursor); - iCursor = generateRSInstruction(cg, TR::InstOpCode::SRLG, node, dataSizeReg, offsetReg, 63, iCursor); - iCursor = generateRSInstruction(cg, TR::InstOpCode::SLLG, node, offsetReg, dataSizeReg, 3, iCursor); - // Inverting the sign bit will leave us with either -8 (if enumCopyReg > 0) or 0 (if enumCopyReg == 0). - iCursor = generateRREInstruction(cg, TR::InstOpCode::LNGR, node, offsetReg, offsetReg, iCursor); - - dataAddrMR = generateS390MemoryReference(resReg, offsetReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg); - dataAddrSlotMR = generateS390MemoryReference(resReg, offsetReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg); } - else if (!isVariableLen && node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0) + + if (isVariableLen) { + /* We need to check enumReg (array size) at runtime to adjust what we write to dataAddr field. + * We write NULL if array is 0 size and first data element address otherwise. + * + * We don't adjust dataAddr field offset because if we are incorrect: + * - compressed refs: we overwrite size and must be zero field in array header, + * which is going to be 0 anyways + * - full refs: we write 0s to dataAddr field + */ if (comp->getOption(TR_TraceCG)) - traceMsg(comp, "Node (%p): Dealing with full/compressed refs fixed length zero size array.\n", node); + traceMsg(comp, "Node (%p): Dealing with compressed refs variable length array.\n", node); - dataAddrMR = generateS390MemoryReference(resReg, TR::Compiler->om.discontiguousArrayHeaderSizeInBytes(), cg); - dataAddrSlotMR = generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg); + TR::Register *offsetReg = cg->allocateRegister(); + iCursor = generateRRInstruction(cg, TR::InstOpCode::XGR, node, offsetReg, offsetReg, iCursor); + iCursor = generateRILInstruction(cg, TR::InstOpCode::CFI, node, enumReg, 0, iCursor); + + // Load address of first array element + iCursor = generateRXInstruction(cg, + TR::InstOpCode::LA, + node, + dataSizeReg, + generateS390MemoryReference(resReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), + iCursor); + // Use cc from CFI to load NULL into dataSizeReg for 0 size array + iCursor = generateRRFInstruction(cg, TR::InstOpCode::LOCGR, node, dataSizeReg, offsetReg, getMaskForBranchCondition(TR::InstOpCode::COND_BE), true, iCursor); + + // Write element address to dataAddr field + iCursor = generateRXInstruction(cg, + TR::InstOpCode::STG, + node, + dataSizeReg, + generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousDataAddrField(), cg), + iCursor); + + conditions->addPostCondition(offsetReg, TR::RealRegister::AssignAny); + cg->stopUsingRegister(offsetReg); } - else + else if (node->getFirstChild()->getOpCode().isLoadConst() && node->getFirstChild()->getInt() == 0) { if (comp->getOption(TR_TraceCG)) - { - traceMsg(comp, - "Node (%p): Dealing with either full/compressed refs fixed length non-zero size array or full refs variable length array.\n", - node); - } + traceMsg(comp, "Node (%p): Dealing with fixed length 0 size array.\n", node); - if (!TR::Compiler->om.compressObjectReferences()) + if (!comp->getOption(TR_DisableDualTLH) && node->canSkipZeroInitialization()) { - TR_ASSERT_FATAL_WITH_NODE(node, - fej9->getOffsetOfDiscontiguousDataAddrField() == fej9->getOffsetOfContiguousDataAddrField(), - "dataAddr field offset is expected to be same for both contiguous and discontiguous arrays in full refs. " - "But was %d bytes for discontiguous and %d bytes for contiguous array.\n", - fej9->getOffsetOfDiscontiguousDataAddrField(), fej9->getOffsetOfContiguousDataAddrField()); + if (comp->getOption(TR_TraceCG)) + traceMsg(comp, "Node (%p): Clean out dataAddr field.\n", node); + + uint16_t bytesToClear = static_cast(TR::Compiler->om.discontiguousArrayHeaderSizeInBytes() - fej9->getOffsetOfDiscontiguousDataAddrField()); + TR_ASSERT_FATAL_WITH_NODE(node, bytesToClear == 8, "dataAddr field is expected to be 8 bytes wide, but was %d bytes.\n", bytesToClear); + + iCursor = generateSS1Instruction(cg, TR::InstOpCode::XC, + node, + bytesToClear - 1, + generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg), + generateS390MemoryReference(resReg, fej9->getOffsetOfDiscontiguousDataAddrField(), cg), + iCursor); } - - dataAddrMR = generateS390MemoryReference(resReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg); - dataAddrSlotMR = generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousDataAddrField(), cg); } - - iCursor = generateRXInstruction(cg, TR::InstOpCode::LA, node, dataSizeReg, dataAddrMR, iCursor); - iCursor = generateRXInstruction(cg, TR::InstOpCode::STG, node, dataSizeReg, dataAddrSlotMR, iCursor); - - if (offsetReg) + else { - conditions->addPostCondition(offsetReg, TR::RealRegister::AssignAny); - cg->stopUsingRegister(offsetReg); + if (comp->getOption(TR_TraceCG)) + traceMsg(comp, "Node (%p): Dealing with full/compressed refs fixed length non-zero size array.\n", node); + + // Load address of first array element + iCursor = generateRXInstruction(cg, + TR::InstOpCode::LA, + node, + dataSizeReg, + generateS390MemoryReference(resReg, TR::Compiler->om.contiguousArrayHeaderSizeInBytes(), cg), + iCursor); + // Write first data element address to dataAddr field + iCursor = generateRXInstruction(cg, + TR::InstOpCode::STG, + node, + dataSizeReg, + generateS390MemoryReference(resReg, fej9->getOffsetOfContiguousDataAddrField(), cg), + iCursor); } } #endif /* J9VM_GC_SPARSE_HEAP_ALLOCATION */