001/* 002 * This file is part of the Jikes RVM project (http://jikesrvm.org). 003 * 004 * This file is licensed to You under the Eclipse Public License (EPL); 005 * You may not use this file except in compliance with the License. You 006 * may obtain a copy of the License at 007 * 008 * http://www.opensource.org/licenses/eclipse-1.0.php 009 * 010 * See the COPYRIGHT.txt file distributed with this work for information 011 * regarding copyright ownership. 012 */ 013package org.jikesrvm.compilers.opt.controlflow; 014 015import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH_opcode; 016import static org.jikesrvm.compilers.opt.ir.Operators.GOTO; 017import static org.jikesrvm.compilers.opt.ir.Operators.GOTO_opcode; 018import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD; 019import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD_opcode; 020import static org.jikesrvm.compilers.opt.ir.Operators.INT_AND; 021import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP; 022import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP_opcode; 023import static org.jikesrvm.compilers.opt.ir.Operators.INT_MOVE; 024import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB; 025 026import java.lang.reflect.Constructor; 027import java.util.Enumeration; 028import java.util.HashMap; 029import java.util.Map; 030 031import org.jikesrvm.VM; 032import org.jikesrvm.compilers.opt.DefUse; 033import org.jikesrvm.compilers.opt.OptOptions; 034import org.jikesrvm.compilers.opt.Simple; 035import org.jikesrvm.compilers.opt.driver.CompilerPhase; 036import org.jikesrvm.compilers.opt.ir.BasicBlock; 037import org.jikesrvm.compilers.opt.ir.Binary; 038import org.jikesrvm.compilers.opt.ir.ExceptionHandlerBasicBlock; 039import org.jikesrvm.compilers.opt.ir.Goto; 040import org.jikesrvm.compilers.opt.ir.GuardResultCarrier; 041import org.jikesrvm.compilers.opt.ir.GuardedUnary; 042import org.jikesrvm.compilers.opt.ir.IR; 043import org.jikesrvm.compilers.opt.ir.IfCmp; 044import org.jikesrvm.compilers.opt.ir.Instruction; 045import org.jikesrvm.compilers.opt.ir.Label; 046import org.jikesrvm.compilers.opt.ir.Move; 047import org.jikesrvm.compilers.opt.ir.Register; 048import org.jikesrvm.compilers.opt.ir.operand.BranchProfileOperand; 049import org.jikesrvm.compilers.opt.ir.operand.ConditionOperand; 050import org.jikesrvm.compilers.opt.ir.operand.ConstantOperand; 051import org.jikesrvm.compilers.opt.ir.operand.IntConstantOperand; 052import org.jikesrvm.compilers.opt.ir.operand.Operand; 053import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand; 054import org.jikesrvm.compilers.opt.util.GraphNode; 055import org.jikesrvm.util.BitVector; 056 057public class LoopUnrolling extends CompilerPhase { 058 059 static final boolean DEBUG = false; 060 static final int MAX_BLOCKS_FOR_NAIVE_UNROLLING = 20; 061 062 private final Map<BasicBlock, BasicBlock> copiedBlocks; 063 private int theVisit = 1; 064 private Map<Instruction, Integer> visitInts; 065 066 public LoopUnrolling() { 067 copiedBlocks = new HashMap<BasicBlock, BasicBlock>(); 068 } 069 070 /** 071 * Returns the name of the phase. 072 */ 073 @Override 074 public String getName() { 075 return "Loop Unrolling"; 076 } 077 078 /** 079 * Constructor for this compiler phase 080 */ 081 private static final Constructor<CompilerPhase> constructor = 082 getCompilerPhaseConstructor(LoopUnrolling.class); 083 084 /** 085 * Get a constructor object for this compiler phase 086 * @return compiler phase constructor 087 */ 088 @Override 089 public Constructor<CompilerPhase> getClassConstructor() { 090 return constructor; 091 } 092 093 /** 094 * This phase is disabled by default. 095 * <p> 096 * It will run only on O3 but O2 is the default maximum optimization level. 097 */ 098 @Override 099 public boolean shouldPerform(OptOptions options) { 100 return ((options.getOptLevel() >= 3) && (options.CONTROL_UNROLL_LOG >= 1) && (!options.SSA_LOOP_VERSIONING)); 101 } 102 103 @Override 104 public void perform(IR ir) { 105 unrollFactor = (1 << ir.options.CONTROL_UNROLL_LOG); 106 107 if (ir.hasReachableExceptionHandlers()) return; 108 DefUse.computeDU(ir); 109 new Simple(1, true, true, true, false).perform(ir); 110 new BranchOptimizations(-1, true, true).perform(ir, true); 111 112 //new CFGTransformations().perform(ir); 113 // Note: the following unfactors the CFG 114 new DominatorsPhase(true).perform(ir); 115 DefUse.computeDU(ir); 116 117 visitInts = new HashMap<Instruction, Integer>(); 118 Enumeration<Instruction> instEnum = ir.forwardInstrEnumerator(); 119 while (instEnum.hasMoreElements()) { 120 Instruction inst = instEnum.nextElement(); 121 visitInts.put(inst, Integer.valueOf(0)); 122 } 123 124 unrollLoops(ir); 125 126 CFGTransformations.splitCriticalEdges(ir); 127 ir.cfg.compactNodeNumbering(); 128 } 129 130 void unrollLoops(IR ir) { 131 LSTGraph lstg = ir.HIRInfo.loopStructureTree; 132 133 for (int i = 1; lstg != null && i <= 1; ++i) { 134 unrollLoopTree((LSTNode) lstg.firstNode(), ir, i); 135 (new BuildLST()).perform(ir); 136 } 137 } 138 139 int unrollLoopTree(LSTNode t, IR ir, int target) { 140 int height = 1; 141 Enumeration<GraphNode> e = t.outNodes(); 142 if (!e.hasMoreElements()) { 143 if (t.loop != null) { 144 report("Leaf loop in " + ir.method + ": " + t.loop + "\n"); 145 // check infrequency 146 if (t.header.getInfrequent()) { 147 report("no unrolling of infrequent loop\n"); 148 } else { 149 boolean doNaiveUnrolling = height == target && unrollLeaf(t, ir); 150 if (doNaiveUnrolling) naiveUnroller(t, ir); 151 } 152 } 153 } else { 154 while (e.hasMoreElements()) { 155 LSTNode n = (LSTNode) e.nextElement(); 156 int heightOfTree = unrollLoopTree(n, ir, target); 157 height = Math.max(height, heightOfTree) + 1; 158 } 159 } 160 return height; 161 } 162 163 static final int MaxInstructions = 100; 164 private int unrollFactor = 1; 165 166 boolean unrollLeaf(LSTNode t, IR ir) { 167 int instructionsInLoop = 0; 168 BasicBlock exitBlock = null, backEdgeBlock = null, succBlock = null, predBlock = null; 169 BitVector nloop = t.loop; 170 BasicBlock header = t.header; 171 Instruction tmp; 172 173 if (ir.hasReachableExceptionHandlers()) { 174 report("0 IR may have exception handlers\n"); 175 return false; 176 } 177 178 // determine loop structure by looking at its blocks 179 Enumeration<BasicBlock> loopBlocks = ir.getBasicBlocks(nloop); 180 int blocks = 0; 181 while (loopBlocks.hasMoreElements()) { 182 BasicBlock b = loopBlocks.nextElement(); 183 blocks++; 184 185 // check for size 186 instructionsInLoop += b.getNumberOfRealInstructions(); 187 if (instructionsInLoop > MaxInstructions) { 188 report("1 is too big\n"); 189 return false; 190 } 191 192 // look at the in edges. We want the header to be the only 193 // block with out of loop incoming edges. 194 Enumeration<BasicBlock> e = b.getIn(); 195 if (b != header) { 196 while (e.hasMoreElements()) { 197 BasicBlock o = e.nextElement(); 198 if (!CFGTransformations.inLoop(o, nloop)) { 199 report("2 interior pointers.\n"); 200 return true; 201 } 202 } 203 } else { 204 // check the headers predecessors: there should be 205 // one out of loop input and one backedge. 206 // We can extend this for loops with several backedges, 207 // if they all have the same conditions. 208 int inEdges = 0; 209 while (e.hasMoreElements()) { 210 inEdges++; 211 BasicBlock o = e.nextElement(); 212 if (!CFGTransformations.inLoop(o, nloop)) { 213 if (predBlock == null) { 214 predBlock = o; 215 } else { 216 report("3 multi entry header.\n"); 217 return true; 218 } 219 } else { 220 if (backEdgeBlock == null) { 221 backEdgeBlock = o; 222 } else { 223 report("4 multiple back edges.\n"); 224 return true; 225 } 226 } 227 } 228 } 229 230 // look at the out edges to find loop exits 231 e = b.getOut(); 232 while (e.hasMoreElements()) { 233 BasicBlock out = e.nextElement(); 234 if (!CFGTransformations.inLoop(out, nloop)) { 235 if (exitBlock == null) { 236 exitBlock = b; 237 } else { 238 report("5 multiple exit blocks.\n"); 239 return true; 240 } 241 } 242 } 243 } 244 245 // exitBlock must equal backEdgeBlock 246 if (exitBlock == null) { 247 report("6 no exit block found...infinite loop?"); 248 return true; 249 } 250 if (exitBlock != backEdgeBlock) { 251 report("7 exit block is not immediate predecessor of loop head"); 252 return true; 253 } 254 255 // exitBlock must exit (skip over pads in critical edges) 256 while (exitBlock.getNumberOfOut() == 1 && exitBlock.getNumberOfIn() == 1) { 257 exitBlock = exitBlock.getIn().nextElement(); 258 } 259 260 if (exitBlock == header && blocks > 1) { 261 report("6 while loop? (" + blocks + ")\n"); 262 return true; 263 } 264 265 // So far, so good. Examine the exit test. 266 Instruction origBranch = exitBlock.firstBranchInstruction(); 267 if (origBranch != exitBlock.lastRealInstruction()) { 268 Instruction aGoto = origBranch.nextInstructionInCodeOrder(); 269 if (aGoto.getOpcode() != GOTO_opcode) { 270 report("7 too complex exit\n"); 271 return true; 272 } 273 succBlock = Label.getBlock(Goto.getTarget(aGoto).target).block; 274 if (VM.VerifyAssertions) { 275 VM._assert(aGoto == exitBlock.lastRealInstruction()); 276 } 277 } else { 278 succBlock = exitBlock.getFallThroughBlock(); 279 } 280 281 if (origBranch.getOpcode() != INT_IFCMP_opcode) { 282 report("8 branch isn't int_ifcmp: " + origBranch.operator() + ".\n"); 283 return true; 284 } 285 286 // examine operands: 287 Operand op1 = follow(IfCmp.getVal1(origBranch)); 288 Operand op2 = follow(IfCmp.getVal2(origBranch)); 289 ConditionOperand cond = (ConditionOperand) IfCmp.getCond(origBranch).copy(); 290 RegisterOperand ifcmpGuard = IfCmp.getGuardResult(origBranch); 291 float backBranchProbability = IfCmp.getBranchProfile(origBranch).takenProbability; 292 if (!loopInvariant(op2, nloop, 4)) { 293 if (loopInvariant(op1, nloop, 4)) { 294 Operand op = op1; 295 op1 = op2; 296 op2 = op; 297 cond.flipOperands(); 298 } else { 299 if (DEBUG) { 300 printDefs(op1, nloop, 4); 301 printDefs(op2, nloop, 4); 302 VM.sysWrite("" + origBranch + "\n"); 303 } 304 report("8a op1 and op2 may not be loop invariant\n"); 305 return true; 306 } 307 } 308 BasicBlock target = Label.getBlock(IfCmp.getTarget(origBranch).target).block; 309 310 if (!(op1 instanceof RegisterOperand)) { 311 report("9 op1 of ifcmp isn't a register\n"); 312 return true; 313 } 314 315 RegisterOperand rop1 = (RegisterOperand) op1; 316 317 Register reg = rop1.getRegister(); 318 if (reg.isPhysical()) { 319 report("10 loops over physical register\n"); 320 return false; 321 } 322 if (succBlock == header && !CFGTransformations.inLoop(target, nloop)) { 323 succBlock = target; 324 target = header; 325 cond.flipCode(); 326 } 327 if (target != header) { 328 report("11 ifcmp doesn't jump to header\n"); 329 return true; 330 } 331 332 Instruction iterator = null; 333 Enumeration<Operand> defs = new RealDefs(rop1); 334 while (defs.hasMoreElements()) { 335 Operand def = defs.nextElement(); 336 Instruction inst = def.instruction; 337 BasicBlock block = inst.getBasicBlock(); 338 //VM.sysWrite (""+block+": "+inst+"\n"); 339 if (CFGTransformations.inLoop(block, nloop)) { 340 if (iterator == null) { 341 iterator = inst; 342 } else { 343 report("12 iterator not unique.\n"); 344 return true; 345 } 346 } 347 } 348 349 if (iterator == null) { 350 report("15 iterator not found.\n"); 351 return true; 352 } 353 354 if (iterator.getOpcode() != INT_ADD_opcode) { 355 //dumpIR (ir, "malformed"); 356 report("16 iterator is no addition: " + iterator.operator() + "\n"); 357 return true; 358 } 359 360 if (!rop1.similar(follow(Binary.getVal1(iterator)))) { 361 //dumpIR (ir, "malformed"); 362 report("17 malformed iterator.\n" + iterator + "\n"); 363 return true; 364 } 365 366 Operand strideOp = follow(Binary.getVal2(iterator)); 367 if (!(strideOp instanceof IntConstantOperand)) { 368 report("18 stride not constant\n"); 369 return true; 370 } 371 372 int stride = ((IntConstantOperand) strideOp).value; 373 if (stride != 1 && stride != -1) { 374 report("18b stride != +/-1 (" + stride + ")\n"); 375 return true; 376 } 377 378 if ((stride == 1 && 379 ((cond.value != ConditionOperand.LESS) && 380 cond.value != ConditionOperand.LESS_EQUAL && 381 cond.value != ConditionOperand.NOT_EQUAL)) || 382 (stride == -1 && 383 ((cond.value != ConditionOperand.GREATER) && 384 cond.value != ConditionOperand.GREATER_EQUAL && 385 cond.value != ConditionOperand.NOT_EQUAL))) { 386 report("19 unexpected condition: " + cond + "\n" + iterator + "\n" + origBranch + "\n\n"); 387 return true; 388 } 389 390 RegisterOperand outerGuard; 391 BasicBlock outer = predBlock; 392 while (outer.getNumberOfOut() == 1 && outer.getNumberOfIn() == 1) { 393 outer = outer.getIn().nextElement(); 394 } 395 if (outer.getNumberOfIn() > 0 && outer.getNumberOfOut() < 2) { 396 report("23 no suitable outer guard found.\n"); 397 return true; 398 } 399 400 tmp = outer.firstBranchInstruction(); 401 if (tmp != null && GuardResultCarrier.conforms(tmp)) { 402 outerGuard = GuardResultCarrier.getGuardResult(tmp); 403 } else { 404 outerGuard = ir.regpool.makeTempValidation(); 405 } 406 407 //////////// 408 // transfom 409 410 // transform this: 411 // 412 // Orig: 413 // B 414 // if i CC b goto Orig 415 // else goto exit 416 // 417 // exit: 418 // 419 // into this: 420 // 421// 422// stride == 1: common: stride == -1: 423//-------------------------------------------------------------------------- 424// guard0: 425// limit = b; 426// if a > b goto Orig if b > a goto Orig 427// else guard1 428// 429// 430// guard 1: 431// remainder = b - a; remainder = a - b; 432// if cond == '<=' if cond == '>=' 433// remainder++; remainder++; 434// remainder = remainder & 3 435// limit = a + remainder limit = a - remainder 436// if cond == '<=' if cond == '>=' 437// limit--; limit++; 438// if remainder == 0 goto mllp 439// goto Orig 440// 441// Orig: 442// LOOP; 443// if i CC limit goto Orig 444// else guard2 445// 446// guard2: if i CC b goto mllp 447// else exit 448// 449// mllp: // landing pad 450// goto ml 451// 452// ml: 453// LOOP;LOOP;LOOP;LOOP; 454// if i CC b goto ml 455// else exit 456// 457// exit: 458//-------------------------------------------------------------------------- 459 report("...transforming.\n"); 460 if (DEBUG && ir.options.hasMETHOD_TO_PRINT() && ir.options.fuzzyMatchMETHOD_TO_PRINT(ir.method.toString())) { 461 dumpIR(ir, "before unroll"); 462 } 463 464 CFGTransformations.killFallThroughs(ir, nloop); 465 BasicBlock[] handles = makeSomeCopies(unrollFactor, ir, nloop, blocks, header, exitBlock, exitBlock); 466 BasicBlock mainHeader = handles[0]; 467 BasicBlock mainExit = handles[1]; 468 469 // test block for well formed bounds 470 BasicBlock guardBlock0 = header.createSubBlock(header.firstInstruction().bcIndex, ir); 471 predBlock.redirectOuts(header, guardBlock0, ir); 472 473 // test block for iteration alignemnt 474 BasicBlock guardBlock1 = header.createSubBlock(header.firstInstruction().bcIndex, ir); 475 476 // landing pad for orig loop 477 BasicBlock olp = header.createSubBlock(header.firstInstruction().bcIndex, ir); 478 olp.setLandingPad(); 479 480 BasicBlock predSucc = predBlock.nextBasicBlockInCodeOrder(); 481 if (predSucc != null) { 482 ir.cfg.breakCodeOrder(predBlock, predSucc); 483 ir.cfg.linkInCodeOrder(olp, predSucc); 484 } 485 ir.cfg.linkInCodeOrder(predBlock, guardBlock0); 486 ir.cfg.linkInCodeOrder(guardBlock0, guardBlock1); 487 ir.cfg.linkInCodeOrder(guardBlock1, olp); 488 489 // guard block for main loop 490 BasicBlock guardBlock2 = header.createSubBlock(header.firstInstruction().bcIndex, ir); 491 492 // landing pad for main loop 493 BasicBlock landingPad = header.createSubBlock(header.firstInstruction().bcIndex, ir); 494 landingPad.setLandingPad(); 495 496 BasicBlock mainLoop = exitBlock.nextBasicBlockInCodeOrder(); 497 ir.cfg.breakCodeOrder(exitBlock, mainLoop); 498 ir.cfg.linkInCodeOrder(exitBlock, guardBlock2); 499 ir.cfg.linkInCodeOrder(guardBlock2, landingPad); 500 ir.cfg.linkInCodeOrder(landingPad, mainLoop); 501 502 RegisterOperand remainder = ir.regpool.makeTemp(rop1.getType()); 503 RegisterOperand limit = ir.regpool.makeTemp(rop1.getType()); 504 505 // test whether a <= b for stride == 1 and a >= b for stride == -1 506 tmp = guardBlock0.lastInstruction(); 507 tmp.insertBefore(Move.create(INT_MOVE, limit, op2.copy())); 508 509 ConditionOperand g0cond = ConditionOperand.GREATER_EQUAL(); 510 if (stride == -1) g0cond = ConditionOperand.LESS_EQUAL(); 511 512 tmp.insertBefore(IfCmp.create(INT_IFCMP, 513 outerGuard.copyD2D(), 514 rop1.copyD2U(), 515 op2.copy(), 516 g0cond, 517 olp.makeJumpTarget(), 518 BranchProfileOperand.unlikely())); 519 tmp.insertBefore(Goto.create(GOTO, guardBlock1.makeJumpTarget())); 520 521 // align the loop iterations 522 tmp = guardBlock1.lastInstruction(); 523 if (stride == 1) { 524 tmp.insertBefore(Binary.create(INT_SUB, remainder, op2.copy(), rop1.copyD2U())); 525 } else { 526 tmp.insertBefore(Binary.create(INT_SUB, remainder, rop1.copyD2U(), op2.copy())); 527 } 528 529 if (cond.isGREATER_EQUAL() || cond.isLESS_EQUAL()) { 530 tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(1))); 531 } 532 533 tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(-1))); 534 535 tmp.insertBefore(Binary.create(INT_AND, 536 remainder.copyD2D(), 537 remainder.copyD2U(), 538 new IntConstantOperand(unrollFactor - 1))); 539 540 tmp.insertBefore(Binary.create(INT_ADD, remainder.copyD2D(), remainder.copyD2U(), new IntConstantOperand(1))); 541 542 if (stride == 1) { 543 tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2U(), op1.copy(), remainder.copyD2U())); 544 } else { 545 tmp.insertBefore(Binary.create(INT_SUB, limit.copyD2U(), op1.copy(), remainder.copyD2U())); 546 } 547 548 if (cond.isLESS_EQUAL()) { 549 tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2D(), limit.copyD2U(), new IntConstantOperand(-1))); 550 } 551 552 if (cond.isGREATER_EQUAL()) { 553 tmp.insertBefore(Binary.create(INT_ADD, limit.copyD2D(), limit.copyD2U(), new IntConstantOperand(1))); 554 } 555 556 tmp.insertBefore(Goto.create(GOTO, olp.makeJumpTarget())); 557 558 // build landing pad for original loop 559 tmp = olp.lastInstruction(); 560 tmp.insertBefore(Goto.create(GOTO, header.makeJumpTarget())); 561 562 // change the back branch in the original loop 563 deleteBranches(exitBlock); 564 tmp = exitBlock.lastInstruction(); 565 tmp.insertBefore(IfCmp.create(INT_IFCMP, 566 outerGuard.copyD2D(), 567 rop1.copyU2U(), 568 limit.copyD2U(), 569 (ConditionOperand) cond.copy(), 570 header.makeJumpTarget(), 571 new BranchProfileOperand(1.0f - 1.0f / (unrollFactor / 2)))); 572 tmp.insertBefore(Goto.create(GOTO, guardBlock2.makeJumpTarget())); 573 574 // only enter main loop if iterations left 575 tmp = guardBlock2.lastInstruction(); 576 tmp.insertBefore(IfCmp.create(INT_IFCMP, 577 outerGuard.copyD2D(), 578 rop1.copyU2U(), 579 op2.copy(), 580 (ConditionOperand) cond.copy(), 581 landingPad.makeJumpTarget(), 582 new BranchProfileOperand(backBranchProbability))); 583 tmp.insertBefore(Goto.create(GOTO, succBlock.makeJumpTarget())); 584 585 // landing pad jumps to mainHeader 586 tmp = landingPad.lastInstruction(); 587 tmp.insertBefore(Goto.create(GOTO, mainHeader.makeJumpTarget())); 588 589 // repair back edge in mainExit 590 if (VM.VerifyAssertions) VM._assert(mainExit != null); 591 tmp = mainExit.lastInstruction(); 592 if (VM.VerifyAssertions) { 593 VM._assert((mainExit.lastRealInstruction() == null) || !mainExit.lastRealInstruction().isBranch()); 594 } 595 tmp.insertBefore(IfCmp.create(INT_IFCMP, 596 ifcmpGuard.copyU2U(), 597 rop1.copyU2U(), 598 op2.copy(), 599 (ConditionOperand) cond.copy(), 600 mainHeader.makeJumpTarget(), 601 new BranchProfileOperand(1.0f - (1.0f - backBranchProbability) * unrollFactor))); 602 tmp.insertBefore(Goto.create(GOTO, succBlock.makeJumpTarget())); 603 604 // recompute normal outs 605 guardBlock0.recomputeNormalOut(ir); 606 guardBlock1.recomputeNormalOut(ir); 607 olp.recomputeNormalOut(ir); 608 guardBlock2.recomputeNormalOut(ir); 609 exitBlock.recomputeNormalOut(ir); 610 landingPad.recomputeNormalOut(ir); 611 mainExit.recomputeNormalOut(ir); 612 if (DEBUG && ir.options.hasMETHOD_TO_PRINT() && ir.options.fuzzyMatchMETHOD_TO_PRINT(ir.method.toString())) { 613 dumpIR(ir, "after unroll"); 614 } 615 return false; 616 } 617 618 private void naiveUnroller(LSTNode t, IR ir) { 619 BitVector nloop = t.loop; 620 BasicBlock seqStart = null; 621 Enumeration<BasicBlock> bs; 622 623 if (t.loop.populationCount() > MAX_BLOCKS_FOR_NAIVE_UNROLLING) { 624 report("1 is too big\n"); 625 return; 626 } 627 report("Naively unrolling\n"); 628 629 CFGTransformations.killFallThroughs(ir, nloop); 630 631 // first, capture the blocks in the loop body. 632 int bodyBlocks = nloop.populationCount(); 633 BasicBlock[] body = new BasicBlock[bodyBlocks]; 634 { 635 int i = 0; 636 bs = ir.getBasicBlocks(nloop); 637 while (bs.hasMoreElements()) { 638 BasicBlock b = bs.nextElement(); 639 if (VM.VerifyAssertions) { 640 VM._assert(!(b instanceof ExceptionHandlerBasicBlock)); 641 } 642 body[i++] = b; 643 BasicBlock next = b.nextBasicBlockInCodeOrder(); 644 if (next == null || !CFGTransformations.inLoop(next, nloop)) { 645 seqStart = b; // end of loop in code order 646 } 647 } 648 } 649 650 BasicBlock seqEnd = seqStart.nextBasicBlockInCodeOrder(); 651 if (seqEnd != null) ir.cfg.breakCodeOrder(seqStart, seqEnd); 652 BasicBlock seqLast = seqStart; 653 BasicBlock firstHeaderCopy = null; 654 BasicBlock currentBlock = seqLast; 655 656 for (int i = 1; i <= unrollFactor; ++i) { 657 658 // copy body 659 for (BasicBlock bb : body) { 660 seqLast = copyAndLinkBlock(ir, seqLast, bb); 661 if (bb == t.header) { 662 if (firstHeaderCopy == null) { 663 firstHeaderCopy = seqLast; 664 } 665 } 666 } 667 668 // redirect internal branches 669 currentBlock = seqLast; 670 for (int j = 0; j < bodyBlocks; ++j) { 671 currentBlock.recomputeNormalOut(ir); 672 Enumeration<BasicBlock> be = currentBlock.getOut(); 673 while (be.hasMoreElements()) { 674 BasicBlock out = be.nextElement(); 675 if (out != t.header && CFGTransformations.inLoop(out, nloop)) { 676 BasicBlock outCopy = copiedBlocks.get(out); 677 currentBlock.redirectOuts(out, outCopy, ir); 678 } 679 } 680 currentBlock.recomputeNormalOut(ir); 681 currentBlock = currentBlock.prevBasicBlockInCodeOrder(); 682 } 683 684 if (i != 1) { 685 // redirect the branches to the header in the (i-1)th copy 686 for (int j = 0; j < bodyBlocks; ++j) { 687 Enumeration<BasicBlock> be = currentBlock.getOut(); 688 while (be.hasMoreElements()) { 689 BasicBlock out = be.nextElement(); 690 if (out == t.header) { 691 BasicBlock headerCopy; 692 headerCopy = copiedBlocks.get(t.header); 693 currentBlock.redirectOuts(t.header, headerCopy, ir); 694 } 695 } 696 currentBlock.recomputeNormalOut(ir); 697 currentBlock = currentBlock.prevBasicBlockInCodeOrder(); 698 } 699 } 700 } 701 if (seqEnd != null) ir.cfg.linkInCodeOrder(seqLast, seqEnd); 702 703 // in the original loop, redirect branches that go to the header 704 // and make them point to the first header copy 705 706 for (int j = 0; j < bodyBlocks; ++j) { 707 Enumeration<BasicBlock> be = body[j].getOut(); 708 while (be.hasMoreElements()) { 709 BasicBlock out = be.nextElement(); 710 if (out == t.header) { 711 body[j].redirectOuts(t.header, firstHeaderCopy, ir); 712 } 713 } 714 body[j].recomputeNormalOut(ir); 715 } 716 717 // the following loop redirects backedges that start in the last 718 // copy to point to the first copy instead and not to the original 719 // header. 720 // | | 721 // Thus we get [ ] instead of [ ]<-. 722 // | | | 723 // [ ]<-. [ ] | 724 // | | | | 725 // [ ] | [ ] | 726 // | | | | 727 // [ ] | [ ] | 728 // |\_/ |\_/ 729 // 730 // Instead of 2^(unroll_log) we only have 2^(unroll_log-1) bodies 731 // in the unrolled loop, but there is one copy of the loop's body 732 // that dominates the unrolled version. Peeling of this first 733 // version should have benefits for global code placement. 734 currentBlock = seqLast; 735 for (int j = 0; j < bodyBlocks; ++j) { 736 Enumeration<BasicBlock> be = currentBlock.getOut(); 737 while (be.hasMoreElements()) { 738 BasicBlock out = be.nextElement(); 739 if (out == t.header) { 740 currentBlock.redirectOuts(t.header, firstHeaderCopy, ir); 741 } 742 } 743 currentBlock.recomputeNormalOut(ir); 744 currentBlock = currentBlock.prevBasicBlockInCodeOrder(); 745 } 746 } 747 748 static void report(String s) { 749 if (DEBUG) VM.sysWrite("] " + s); 750 } 751 752 private Operand follow(Operand use) { 753 theVisit++; 754 return _follow(use); 755 } 756 757 private Operand _follow(Operand use) { 758 while (true) { 759 if (!(use instanceof RegisterOperand)) return use; 760 RegisterOperand rop = (RegisterOperand) use; 761 Enumeration<RegisterOperand> defs = DefUse.defs(rop.getRegister()); 762 if (!defs.hasMoreElements()) { 763 return use; 764 } 765 Instruction def = defs.nextElement().instruction; 766 if (!Move.conforms(def)) return use; 767 if (defs.hasMoreElements()) { 768 return use; 769 } 770 771 Integer defInt = visitInts.get(def); 772 if (defInt.intValue() == theVisit) { 773 return use; 774 } 775 visitInts.put(def, Integer.valueOf(theVisit)); 776 777 use = Move.getVal(def); 778 } 779 } 780 781 private static Instruction definingInstruction(Operand op) { 782 if (!(op instanceof RegisterOperand)) return op.instruction; 783 Enumeration<RegisterOperand> defs = DefUse.defs(((RegisterOperand) op).getRegister()); 784 if (!defs.hasMoreElements()) { 785 return op.instruction; 786 } 787 Instruction def = defs.nextElement().instruction; 788 if (defs.hasMoreElements()) { 789 return op.instruction; 790 } 791 return def; 792 } 793 794 private static boolean loopInvariant(Operand op, BitVector nloop, int depth) { 795 if (depth <= 0) { 796 return false; 797 } else if (op instanceof ConstantOperand) { 798 return true; 799 } else if (op instanceof RegisterOperand) { 800 Register reg = ((RegisterOperand) op).getRegister(); 801 Enumeration<RegisterOperand> defs = DefUse.defs(reg); 802 // if no definitions of this register (very strange) give up 803 if (!defs.hasMoreElements()) return false; 804 Instruction inst = defs.nextElement().instruction; 805 // if multiple definitions of a register give up as follow may 806 // fail to give the correct invariant 807 return !defs.hasMoreElements() && !CFGTransformations.inLoop(inst.getBasicBlock(), nloop); 808 } else { 809 return false; 810 } 811 } 812 813 private static boolean printDefs(Operand op, BitVector nloop, int depth) { 814 if (depth <= 0) return false; 815 if (op instanceof ConstantOperand) { 816 VM.sysWrite(">> " + op + "\n"); 817 return true; 818 } 819 if (op instanceof RegisterOperand) { 820 boolean invariant = true; 821 Register reg = ((RegisterOperand) op).getRegister(); 822 Enumeration<RegisterOperand> defs = DefUse.defs(reg); 823 while (defs.hasMoreElements()) { 824 Instruction inst = defs.nextElement().instruction; 825 VM.sysWrite(">> " + inst.getBasicBlock() + ": " + inst + "\n"); 826 if (CFGTransformations.inLoop(inst.getBasicBlock(), nloop)) { 827 if (Move.conforms(inst)) { 828 invariant &= printDefs(Move.getVal(inst), nloop, depth - 1); 829 } else if (inst.getOpcode() == ARRAYLENGTH_opcode) { 830 invariant &= printDefs(GuardedUnary.getVal(inst), nloop, depth); 831 } else { 832 invariant = false; 833 } 834 } 835 if (!invariant) break; 836 } 837 return invariant; 838 } 839 return false; 840 } 841 842 @SuppressWarnings("unused") 843 // For debugging 844 private void _printDefs(Operand op) { 845 if (op instanceof RegisterOperand) { 846 Register reg = ((RegisterOperand) op).getRegister(); 847 Enumeration<RegisterOperand> defs = DefUse.defs(reg); 848 defs = DefUse.defs(reg); 849 while (defs.hasMoreElements()) { 850 Instruction inst = defs.nextElement().instruction; 851 if (Move.conforms(inst)) { 852 inst = definingInstruction(follow(Move.getVal(inst))); 853 } 854 VM.sysWrite(">> " + inst.getBasicBlock() + ": " + inst + "\n"); 855 } 856 } else { 857 VM.sysWrite(">> " + op + "\n"); 858 } 859 } 860 861 // inserts unrollFactor copies of the loop after seqStart 862 BasicBlock[] makeSomeCopies(int unrollFactor, IR ir, BitVector nloop, int blocks, 863 BasicBlock header, BasicBlock exitBlock, BasicBlock seqStart) { 864 // make some copies of the original loop 865 866 // first, capture the blocks in the loop body. 867 BitVector loop = new BitVector(nloop); 868 loop.clear(header.getNumber()); 869 loop.clear(exitBlock.getNumber()); 870 int bodyBlocks = 0; 871 Enumeration<BasicBlock> bs = ir.getBasicBlocks(loop); 872 while (bs.hasMoreElements()) { 873 bodyBlocks++; 874 bs.nextElement(); 875 } 876 BasicBlock[] body = new BasicBlock[bodyBlocks]; 877 { 878 int i = 0; 879 bs = ir.getBasicBlocks(loop); 880 while (bs.hasMoreElements()) { 881 body[i++] = bs.nextElement(); 882 } 883 } 884 885 BasicBlock seqEnd = seqStart.nextBasicBlockInCodeOrder(); 886 if (seqEnd != null) ir.cfg.breakCodeOrder(seqStart, seqEnd); 887 BasicBlock seqLast = seqStart; 888 889 BasicBlock firstHeader = null; 890 BasicBlock lastHeader = null; 891 BasicBlock lastExit = null; 892 BasicBlock[] handles = new BasicBlock[2]; 893 894 for (int i = 0; i < unrollFactor; ++i) { 895 896 // copy header 897 seqLast = copyAndLinkBlock(ir, seqLast, header); 898 lastHeader = seqLast; 899 900 if (i == 0) { 901 firstHeader = seqLast; 902 } else { 903 // link copies by jumps 904 lastExit.appendInstruction(Goto.create(GOTO, seqLast.makeJumpTarget())); 905 lastExit.recomputeNormalOut(ir); 906 } 907 908 // copy body 909 for (BasicBlock bb : body) { 910 seqLast = copyAndLinkBlock(ir, seqLast, bb); 911 } 912 913 // copy exit block 914 if (exitBlock != header) { 915 seqLast = copyAndLinkBlock(ir, seqLast, exitBlock); 916 lastExit = seqLast; 917 } else { 918 lastExit = lastHeader; 919 } 920 921 // delete all branches in the copies of the exit block 922 deleteBranches(lastExit); 923 924 // redirect internal branches 925 BasicBlock cb = seqLast; 926 for (int j = 0; j < blocks; ++j) { 927 cb.recomputeNormalOut(ir); 928 Enumeration<BasicBlock> be = cb.getOut(); 929 while (be.hasMoreElements()) { 930 BasicBlock out = be.nextElement(); 931 if (CFGTransformations.inLoop(out, nloop)) { 932 cb.redirectOuts(out, copiedBlocks.get(out), ir); 933 } 934 } 935 cb.recomputeNormalOut(ir); 936 cb = cb.prevBasicBlockInCodeOrder(); 937 } 938 } 939 if (seqEnd != null) ir.cfg.linkInCodeOrder(seqLast, seqEnd); 940 handles[0] = firstHeader; 941 handles[1] = lastExit; 942 return handles; 943 } 944 945 BasicBlock copyAndLinkBlock(IR ir, BasicBlock seqLast, BasicBlock block) { 946 BasicBlock copy = block.copyWithoutLinks(ir); 947 ir.cfg.linkInCodeOrder(seqLast, copy); 948 copiedBlocks.put(block, copy); 949 return copy; 950 } 951 952 static void deleteBranches(BasicBlock b) { 953 Instruction branch = b.lastRealInstruction(); 954 while (branch.isBranch()) { 955 Instruction nextBranch = branch.prevInstructionInCodeOrder(); 956 branch.remove(); 957 branch = nextBranch; 958 } 959 } 960 961 final class RealDefs implements Enumeration<Operand> { 962 private Enumeration<RegisterOperand> defs = null; 963 private Operand use; 964 private RealDefs others = null; 965 966 private void init(Operand use) { 967 this.use = use; 968 if (use instanceof RegisterOperand) { 969 RegisterOperand rop = (RegisterOperand) use; 970 defs = DefUse.defs(rop.getRegister()); 971 this.use = null; 972 if (!defs.hasMoreElements()) defs = null; 973 } 974 } 975 976 RealDefs(Operand use) { 977 this.init(use); 978 theVisit++; 979 } 980 981 RealDefs(Operand use, int visit) { 982 this.init(use); 983 theVisit = visit; 984 } 985 986 @Override 987 public boolean hasMoreElements() { 988 return use != null || (others != null && others.hasMoreElements()) || (defs != null && defs.hasMoreElements()); 989 } 990 991 @Override 992 public Operand nextElement() { 993 Operand res = use; 994 if (res != null) { 995 use = null; 996 return res; 997 } 998 if (others != null && others.hasMoreElements()) { 999 return others.nextElement(); 1000 } 1001 1002 res = defs.nextElement(); 1003 Instruction inst = res.instruction; 1004 if (!(Move.conforms(inst)) || visitInts.get(inst).intValue() == theVisit) { 1005 return res; 1006 } 1007 visitInts.put(inst, Integer.valueOf(theVisit)); 1008 1009 others = new RealDefs(Move.getVal(inst), theVisit); 1010 if (!(others.hasMoreElements())) return res; 1011 return others.nextElement(); 1012 } 1013 1014 public Operand nextClear() { 1015 Operand res = nextElement(); 1016 res.instruction = null; 1017 return res; 1018 } 1019 } 1020}