//===----- HexagonShuffler.cpp - Instruction bundle shuffling -------------===// // // The LLVM Compiler Infrastructure // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // This implements the shuffling of insns inside a bundle according to the // packet formation rules of the Hexagon ISA. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "hexagon-shuffle" #include #include #include "Hexagon.h" #include "MCTargetDesc/HexagonBaseInfo.h" #include "MCTargetDesc/HexagonMCTargetDesc.h" #include "MCTargetDesc/HexagonMCInstrInfo.h" #include "HexagonShuffler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" using namespace llvm_ks; namespace { // Insn shuffling priority. class HexagonBid { // The priority is directly proportional to how restricted the insn is based // on its flexibility to run on the available slots. So, the fewer slots it // may run on, the higher its priority. enum { MAX = 360360 }; // LCD of 1/2, 1/3, 1/4,... 1/15. unsigned Bid; public: HexagonBid() : Bid(0){}; HexagonBid(unsigned B) { Bid = B ? MAX / countPopulation(B) : 0; }; // Check if the insn priority is overflowed. bool isSold() const { return (Bid >= MAX); }; HexagonBid &operator+=(const HexagonBid &B) { Bid += B.Bid; return *this; }; }; // Slot shuffling allocation. class HexagonUnitAuction { HexagonBid Scores[HEXAGON_PACKET_SIZE]; // Mask indicating which slot is unavailable. unsigned isSold : HEXAGON_PACKET_SIZE; public: HexagonUnitAuction() : isSold(0){}; // Allocate slots. bool bid(unsigned B) { // Exclude already auctioned slots from the bid. unsigned b = B & ~isSold; if (b) { for (unsigned i = 0; i < HEXAGON_PACKET_SIZE; ++i) if (b & (1 << i)) { // Request candidate slots. Scores[i] += HexagonBid(b); isSold |= Scores[i].isSold() << i; } return true; ; } else // Error if the desired slots are already full. return false; }; }; } // end anonymous namespace unsigned HexagonResource::setWeight(unsigned s) { const unsigned SlotWeight = 8; const unsigned MaskWeight = SlotWeight - 1; bool Key = (1 << s) & getUnits(); // TODO: Improve this API so that we can prevent misuse statically. assert(SlotWeight * s < 32 && "Argument to setWeight too large."); // Calculate relative weight of the insn for the given slot, weighing it the // heavier the more restrictive the insn is and the lowest the slots that the // insn may be executed in. Weight = (Key << (SlotWeight * s)) * ((MaskWeight - countPopulation(getUnits())) << countTrailingZeros(getUnits())); return (Weight); } void HexagonCVIResource::SetupTUL(TypeUnitsAndLanes *TUL, StringRef CPU) { (*TUL)[HexagonII::TypeCVI_VA] = UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2); (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2); (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1); (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2); (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1); (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1); (*TUL)[HexagonII::TypeCVI_VM_LD] = UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0); (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] = UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1); (*TUL)[HexagonII::TypeCVI_VM_ST] = UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0); (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1); (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4); } HexagonCVIResource::HexagonCVIResource(TypeUnitsAndLanes *TUL, MCInstrInfo const &MCII, unsigned s, MCInst const *id) : HexagonResource(s), TUL(TUL) { unsigned T = HexagonMCInstrInfo::getType(MCII, *id); if (TUL->count(T)) { // For an HVX insn. Valid = true; setUnits((*TUL)[T].first); setLanes((*TUL)[T].second); setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad()); setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore()); } else { // For core insns. Valid = false; setUnits(0); setLanes(0); setLoad(false); setStore(false); } } HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI) : MCII(MCII), STI(STI) { reset(); HexagonCVIResource::SetupTUL(&TUL, STI.getCPU()); } void HexagonShuffler::reset() { Packet.clear(); BundleFlags = 0; Error = SHUFFLE_SUCCESS; } void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender, unsigned S, bool X) { HexagonInstr PI(&TUL, MCII, ID, Extender, S, X); Packet.push_back(PI); } /// Check that the packet is legal and enforce relative insn order. bool HexagonShuffler::check() { // Descriptive slot masks. const unsigned slotSingleLoad = 0x1, slotSingleStore = 0x1, slotOne = 0x2, slotThree = 0x8, slotFirstJump = 0x8, slotLastJump = 0x4, slotFirstLoadStore = 0x2, slotLastLoadStore = 0x1; // Highest slots for branches and stores used to keep their original order. unsigned slotJump = slotFirstJump; unsigned slotLoadStore = slotFirstLoadStore; // Number of branches, solo branches, indirect branches. unsigned jumps = 0, jump1 = 0, jumpr = 0; // Number of memory operations, loads, solo loads, stores, solo stores, single // stores. unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; // Number of HVX loads, HVX stores. unsigned CVIloads = 0, CVIstores = 0; // Number of duplex insns, solo insns. unsigned duplex = 0, solo = 0; // Number of insns restricting other insns in the packet to A and X types, // which is neither A or X types. unsigned onlyAX = 0, neitherAnorX = 0; // Number of insns restricting other insns in slot #1 to A type. unsigned onlyAin1 = 0; // Number of insns restricting any insn in slot #1, except A2_nop. unsigned onlyNo1 = 0; unsigned xtypeFloat = 0; unsigned pSlot3Cnt = 0; iterator slot3ISJ = end(); // Collect information from the insns in the packet. for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { MCInst const *ID = ISJ->getDesc(); if (HexagonMCInstrInfo::isSolo(MCII, *ID)) solo += !ISJ->isSoloException(); else if (HexagonMCInstrInfo::isSoloAX(MCII, *ID)) onlyAX += !ISJ->isSoloException(); else if (HexagonMCInstrInfo::isSoloAin1(MCII, *ID)) onlyAin1 += !ISJ->isSoloException(); if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32 && HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeXTYPE) ++neitherAnorX; if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) { ++pSlot3Cnt; slot3ISJ = ISJ; } switch (HexagonMCInstrInfo::getType(MCII, *ID)) { case HexagonII::TypeXTYPE: if (HexagonMCInstrInfo::isFloat(MCII, *ID)) ++xtypeFloat; break; case HexagonII::TypeJR: ++jumpr; // Fall-through. case HexagonII::TypeJ: ++jumps; break; case HexagonII::TypeCVI_VM_VP_LDU: ++onlyNo1; case HexagonII::TypeCVI_VM_LD: case HexagonII::TypeCVI_VM_TMP_LD: case HexagonII::TypeCVI_VM_CUR_LD: ++CVIloads; case HexagonII::TypeLD: ++loads; ++memory; if (ISJ->Core.getUnits() == slotSingleLoad) ++load0; if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn()) ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. break; case HexagonII::TypeCVI_VM_STU: ++onlyNo1; case HexagonII::TypeCVI_VM_ST: case HexagonII::TypeCVI_VM_NEW_ST: ++CVIstores; case HexagonII::TypeST: ++stores; ++memory; if (ISJ->Core.getUnits() == slotSingleStore) ++store0; break; case HexagonII::TypeMEMOP: ++loads; ++stores; ++store1; ++memory; break; case HexagonII::TypeNV: ++memory; // NV insns are memory-like. if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch()) ++jumps, ++jump1; break; case HexagonII::TypeCR: // Legacy conditional branch predicated on a register. case HexagonII::TypeSYSTEM: if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) ++loads; break; } } // Check if the packet is legal. if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) || (duplex > 1 || (duplex && memory)) || (solo && size() > 1) || (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) { Error = SHUFFLE_ERROR_INVALID; return false; } if (jump1 && jumps > 1) { // Error if single branch with another branch. Error = SHUFFLE_ERROR_BRANCHES; return false; } // Modify packet accordingly. // TODO: need to reserve slots #0 and #1 for duplex insns. bool bOnlySlot3 = false; for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { MCInst const *ID = ISJ->getDesc(); if (!ISJ->Core.getUnits()) { // Error if insn may not be executed in any slot. Error = SHUFFLE_ERROR_UNKNOWN; return false; } // Exclude from slot #1 any insn but A2_nop. if (HexagonMCInstrInfo::getDesc(MCII, *ID).getOpcode() != Hexagon::A2_nop) if (onlyNo1) ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne); // Exclude from slot #1 any insn but A-type. if (HexagonMCInstrInfo::getType(MCII, *ID) != HexagonII::TypeALU32) if (onlyAin1) ISJ->Core.setUnits(ISJ->Core.getUnits() & ~slotOne); // Branches must keep the original order. if (HexagonMCInstrInfo::getDesc(MCII, *ID).isBranch() || HexagonMCInstrInfo::getDesc(MCII, *ID).isCall()) if (jumps > 1) { if (jumpr || slotJump < slotLastJump) { // Error if indirect branch with another branch or // no more slots available for branches. Error = SHUFFLE_ERROR_BRANCHES; return false; } // Pin the branch to the highest slot available to it. ISJ->Core.setUnits(ISJ->Core.getUnits() & slotJump); // Update next highest slot available to branches. slotJump >>= 1; } // A single load must use slot #0. if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayLoad()) { if (loads == 1 && loads == memory) // Pin the load to slot #0. ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleLoad); } // A single store must use slot #0. if (HexagonMCInstrInfo::getDesc(MCII, *ID).mayStore()) { if (!store0) { if (stores == 1) ISJ->Core.setUnits(ISJ->Core.getUnits() & slotSingleStore); else if (stores > 1) { if (slotLoadStore < slotLastLoadStore) { // Error if no more slots available for stores. Error = SHUFFLE_ERROR_STORES; return false; } // Pin the store to the highest slot available to it. ISJ->Core.setUnits(ISJ->Core.getUnits() & slotLoadStore); // Update the next highest slot available to stores. slotLoadStore >>= 1; } } if (store1 && stores > 1) { // Error if a single store with another store. Error = SHUFFLE_ERROR_STORES; return false; } } // flag if an instruction can only be executed in slot 3 if (ISJ->Core.getUnits() == slotThree) bOnlySlot3 = true; if (!ISJ->Core.getUnits()) { // Error if insn may not be executed in any slot. Error = SHUFFLE_ERROR_NOSLOTS; return false; } } bool validateSlots = true; if (bOnlySlot3 == false && pSlot3Cnt == 1 && slot3ISJ != end()) { // save off slot mask of instruction marked with A_PREFER_SLOT3 // and then pin it to slot #3 unsigned saveUnits = slot3ISJ->Core.getUnits(); slot3ISJ->Core.setUnits(saveUnits & slotThree); HexagonUnitAuction AuctionCore; std::sort(begin(), end(), HexagonInstr::lessCore); // see if things ok with that instruction being pinned to slot #3 bool bFail = false; for (iterator I = begin(); I != end() && bFail != true; ++I) if (!AuctionCore.bid(I->Core.getUnits())) bFail = true; // if yes, great, if not then restore original slot mask if (!bFail) validateSlots = false; // all good, no need to re-do auction else for (iterator ISJ = begin(); ISJ != end(); ++ISJ) { MCInst const *ID = ISJ->getDesc(); if (HexagonMCInstrInfo::prefersSlot3(MCII, *ID)) ISJ->Core.setUnits(saveUnits); } } // Check if any slot, core, is over-subscribed. // Verify the core slot subscriptions. if (validateSlots) { HexagonUnitAuction AuctionCore; std::sort(begin(), end(), HexagonInstr::lessCore); for (iterator I = begin(); I != end(); ++I) if (!AuctionCore.bid(I->Core.getUnits())) { Error = SHUFFLE_ERROR_SLOTS; return false; } } // Verify the CVI slot subscriptions. { HexagonUnitAuction AuctionCVI; std::sort(begin(), end(), HexagonInstr::lessCVI); for (iterator I = begin(); I != end(); ++I) for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid? if (!AuctionCVI.bid(I->CVI.getUnits() << i)) { Error = SHUFFLE_ERROR_SLOTS; return false; } } Error = SHUFFLE_SUCCESS; return true; } bool HexagonShuffler::shuffle() { if (size() > HEXAGON_PACKET_SIZE) { // Ignore a packet with with more than what a packet can hold // or with compound or duplex insns for now. Error = SHUFFLE_ERROR_INVALID; return false; } // Check and prepare packet. if (size() > 1 && check()) // Reorder the handles for each slot. for (unsigned nSlot = 0, emptySlots = 0; nSlot < HEXAGON_PACKET_SIZE; ++nSlot) { iterator ISJ, ISK; unsigned slotSkip, slotWeight; // Prioritize the handles considering their restrictions. for (ISJ = ISK = Packet.begin(), slotSkip = slotWeight = 0; ISK != Packet.end(); ++ISK, ++slotSkip) if (slotSkip < nSlot - emptySlots) // Note which handle to begin at. ++ISJ; else // Calculate the weight of the slot. slotWeight += ISK->Core.setWeight(HEXAGON_PACKET_SIZE - nSlot - 1); if (slotWeight) // Sort the packet, favoring source order, // beginning after the previous slot. std::sort(ISJ, Packet.end()); else // Skip unused slot. ++emptySlots; } for (iterator ISJ = begin(); ISJ != end(); ++ISJ) DEBUG(dbgs().write_hex(ISJ->Core.getUnits()); dbgs() << ':' << HexagonMCInstrInfo::getDesc(MCII, *ISJ->getDesc()) .getOpcode(); dbgs() << '\n'); DEBUG(dbgs() << '\n'); return (!getError()); }