001/* 002 * This file is part of the Jikes RVM project (http://jikesrvm.org). 003 * 004 * This file is licensed to You under the Eclipse Public License (EPL); 005 * You may not use this file except in compliance with the License. You 006 * may obtain a copy of the License at 007 * 008 * http://www.opensource.org/licenses/eclipse-1.0.php 009 * 010 * See the COPYRIGHT.txt file distributed with this work for information 011 * regarding copyright ownership. 012 */ 013package org.jikesrvm.compilers.opt.ssa; 014 015import static org.jikesrvm.compilers.opt.driver.OptConstants.SYNTH_LOOP_VERSIONING_BCI; 016import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH; 017import static org.jikesrvm.compilers.opt.ir.Operators.ARRAYLENGTH_opcode; 018import static org.jikesrvm.compilers.opt.ir.Operators.GOTO; 019import static org.jikesrvm.compilers.opt.ir.Operators.GUARD_MOVE; 020import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD; 021import static org.jikesrvm.compilers.opt.ir.Operators.INT_ADD_opcode; 022import static org.jikesrvm.compilers.opt.ir.Operators.INT_IFCMP; 023import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB; 024import static org.jikesrvm.compilers.opt.ir.Operators.INT_SUB_opcode; 025import static org.jikesrvm.compilers.opt.ir.Operators.PHI; 026import static org.jikesrvm.compilers.opt.ir.Operators.REF_IFCMP; 027 028import java.lang.reflect.Constructor; 029import java.util.ArrayList; 030import java.util.Enumeration; 031import java.util.HashMap; 032import java.util.HashSet; 033import java.util.Iterator; 034 035import org.jikesrvm.VM; 036import org.jikesrvm.classloader.TypeReference; 037import org.jikesrvm.compilers.opt.DefUse; 038import org.jikesrvm.compilers.opt.OptOptions; 039import org.jikesrvm.compilers.opt.OptimizingCompilerException; 040import org.jikesrvm.compilers.opt.controlflow.AnnotatedLSTGraph; 041import org.jikesrvm.compilers.opt.controlflow.AnnotatedLSTNode; 042import org.jikesrvm.compilers.opt.controlflow.DominatorTree; 043import org.jikesrvm.compilers.opt.controlflow.DominatorsPhase; 044import org.jikesrvm.compilers.opt.controlflow.LSTGraph; 045import org.jikesrvm.compilers.opt.controlflow.LTDominators; 046import org.jikesrvm.compilers.opt.driver.CompilerPhase; 047import org.jikesrvm.compilers.opt.ir.BBend; 048import org.jikesrvm.compilers.opt.ir.BasicBlock; 049import org.jikesrvm.compilers.opt.ir.Binary; 050import org.jikesrvm.compilers.opt.ir.BoundsCheck; 051import org.jikesrvm.compilers.opt.ir.Goto; 052import org.jikesrvm.compilers.opt.ir.GuardedUnary; 053import org.jikesrvm.compilers.opt.ir.IR; 054import org.jikesrvm.compilers.opt.ir.IREnumeration; 055import org.jikesrvm.compilers.opt.ir.IfCmp; 056import org.jikesrvm.compilers.opt.ir.Instruction; 057import org.jikesrvm.compilers.opt.ir.Label; 058import org.jikesrvm.compilers.opt.ir.Move; 059import org.jikesrvm.compilers.opt.ir.NullCheck; 060import org.jikesrvm.compilers.opt.ir.Phi; 061import org.jikesrvm.compilers.opt.ir.Register; 062import org.jikesrvm.compilers.opt.ir.operand.BasicBlockOperand; 063import org.jikesrvm.compilers.opt.ir.operand.BranchProfileOperand; 064import org.jikesrvm.compilers.opt.ir.operand.ConditionOperand; 065import org.jikesrvm.compilers.opt.ir.operand.HeapOperand; 066import org.jikesrvm.compilers.opt.ir.operand.IntConstantOperand; 067import org.jikesrvm.compilers.opt.ir.operand.NullConstantOperand; 068import org.jikesrvm.compilers.opt.ir.operand.Operand; 069import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand; 070import org.jikesrvm.compilers.opt.util.GraphNode; 071 072/** 073 * This optimisation works from the outer most loop inward, optimising 074 * loops that conform to being regular {@link 075 * AnnotatedLSTNode}s. The optimisations performs the following 076 * operations: 077 * <ul> 078 * <li> 079 * 1) Determine the bound and null checks to be eliminated. These are 080 * the ones that operate on the loop iterator. If no bound checks can 081 * be eliminated, stop optimising this loop. 082 * <li> 083 * 2) Determine the registers defined in the loop. 084 * <li> 085 * 3) Generate phi nodes that define the original register defined by 086 * the loop and use two newly created registers. 087 * <li> 088 * 4) Create a version of the original loop that uses the first of the 089 * newly created registers instead of the original registers. 090 * <li> 091 * 5) Create a second version, this time with the result of the 092 * eliminated checks set to true. 093 * <li> 094 * 6) Work out what the maximum value for all the bounds checks are 095 * and create branches to optimal or suboptimal loops 096 * <li> 097 * 7) Fix up the phi node predecessors 098 * <li> 099 * 8) Remove the unoptimized loop if its redundant 100 * <li> 101 * 9) Replace register definitions in the original loop with phi 102 * instructions 103 * <li> 104 * 10) Compact node numbering so that CFG number of nodes reflects 105 * that some basic blocks may have been deleted 106 * </ul> 107 * <p> 108 * Example: 109 * <pre> 110 * for (int t1=0; t1 < 100; t1++) { 111 * g1 = null_check l0 112 * g2 = bounds_check l0, t1 113 * g3 = guard_combine g1,g2 114 * t2 = aload l0, t1, g3 115 * g4 = null_check l1 116 * g5 = bounds_check l1, t1 117 * g6 = guard_combine g4,g5 118 * astore t2, l1, t1, g6 119 * } 120 * </pre> 121 * 122 * becomes: 123 * 124 * <pre> 125 * goto explicit_test_block 126 * successor_to_loops: 127 * g1 = phi g1_1, g1_2 128 * g2 = phi g2_1, g2_2 129 * g3 = phi g3_1, g3_2 130 * t2 = phi t2_1, t2_2 131 * g4 = phi g4_1, g4_2 132 * g5 = phi g5_1, g5_2 133 * g6 = phi g6_1, g6_2 134 * goto after_loop 135 * explicit_test_block: 136 * if l0 == null (unlikely) goto sub_optimal_loop 137 * if 100 >= l0.length (unlikely) goto sub_optimal_loop 138 * if l1 == null (unlikely) goto sub_optimal_loop 139 * if 100 >= l1.length (unlikely) goto sub_optimal_loop 140 * goto optimal_loop 141 * sub_optimal_loop: 142 * for (int t1_1=0; t1_1 < 100; t1_1++) { 143 * g1_1 = null_check l0 144 * g2_1 = bounds_check l0, t1_1 145 * g3_1 = guard_combine g1_1,g2_1 146 * t2_1 = aload l0, t1_1, g3_1 147 * g4_1 = null_check l1 148 * g5_1 = bounds_check l1, t1_1 149 * g6_1 = guard_combine g4_1,g5_1 150 * astore t2_1, l1, t1_1, g6_1 151 * } 152 * goto successor_to_loops 153 * optimal_loop: 154 * for (int t1_2=0; t1_2 < 100; t1_2++) { 155 * g1_2 = true_guard 156 * g2_2 = true_guard 157 * g3_2 = guard_combine g1_2,g2_2 158 * t2_2 = aload l0, t1_2, g3_2 159 * g4_2 = null_check l1 160 * g5_2 = bounds_check l1, t1_2 161 * g6_2 = guard_combine g4_2,g5_2 162 * astore t2_2, l1, t1_2, g6_2 163 * } 164 * goto successor_to_loops 165 * after_loop: 166 * </pre> 167 * 168 * The optimisation works on the Heap SSA form. A more accurate 169 * example of the transformation would be: 170 * 171 * <pre> 172 * heap1 = ...; // previous heap state 173 * t1_1 = 0; 174 * if t1_1 ≥ 100 goto label2 175 * label1: 176 * t1_2 = phi t1_1, t1_3 177 * heap2 = phi heap1, heap3 178 * g1 = null_check l0 179 * g2 = bounds_check l0, t1_2 180 * g3 = guard_combine g1,g2 181 * t2 = aload l0, t1_2, g3 182 * g4 = null_check l1 183 * g5 = bounds_check l1, t1_2 184 * g6 = guard_combine g4,g5 185 * heap3 = astore t2, l1, t1_2, g6 186 * t1_3 = t1_2 + 1 187 * if t1_3 < 100 label1 * label2: 188 * </pre> 189 * 190 * becomes: 191 * 192 * <pre> 193 * heap1 = ...; // previous heap state 194 * t1_1 = 0; 195 * if t1_1 ≥ 100 goto label2 196 * goto explicit_test_block 197 * successor_to_loops: 198 * t1_2 = phi t1_2_1, t1_2_2 199 * heap2 = phi heap2_1, heap2_2 200 * g1 = phi g1_1, g1_2 201 * g2 = phi g2_1, g2_2 202 * g3 = phi g3_1, g3_2 203 * t2 = phi t2_1, t2_2 204 * g4 = phi g4_1, g4_2 205 * g5 = phi g5_1, g5_2 206 * g6 = phi g6_1, g6_2 207 * heap3 = phi heap3_1, heap3_2 208 * t1_3 = phi t1_3_1, t1_3_2 209 * goto after_loop 210 * explicit_test_block: 211 * g1_2 = if l0 == null (unlikely) goto sub_optimal_loop 212 * g2_2 = if 100 >= l0.length (unlikely) goto sub_optimal_loop 213 * g4_2 = if l1 == null (unlikely) goto sub_optimal_loop 214 * g5_2 = if 100 >= l1.length (unlikely) goto sub_optimal_loop 215 * goto optimal_loop 216 * sub_optimal_loop: 217 * label1_1: 218 * t1_2_1 = phi t1_1, t1_3_1 219 * heap2_1 = phi heap1, heap3_1 220 * g1_1 = null_check l0 221 * g2_1 = bounds_check l0, t1_2_1 222 * g3_1 = guard_combine g1_1,g2_1 223 * t2_1 = aload l0, t1_2_1, g3_1 224 * g4_1 = null_check l1 225 * g5_1 = bounds_check l1, t1_2_1 226 * g6_1 = guard_combine g4_1,g5_1 227 * heap3_1 = astore t2_1, l1, t1_2_1, g6_1 228 * t1_3_1 = t1_2_1 + 1 229 * if t1_3_1 < 100 label1_1 230 * goto successor_to_loops 231 * optimal_loop: 232 * label1_2: 233 * t1_2_2 = phi t1_1, t1_3_2 234 * heap2_2 = phi heap1, heap3_2 235 * g3_2 = guard_combine g1_2,g2_2 236 * t2_2 = aload l0, t1_2_2, g3_2 237 * g6_2 = guard_combine g4_2,g5_2 238 * heap3_2 = astore t2_2, l1, t1_2_2, g6_2 239 * t1_3_2 = t1_2_2 + 1 240 * if t1_3_2 < 100 label1_2 241 * goto successor_to_loops 242 * after_loop: 243 * label2: 244 * </pre> 245 */ 246public final class LoopVersioning extends CompilerPhase { 247 // -oO Debug variables Oo- 248 /** 249 * Flag to optionally print verbose debugging messages 250 */ 251 private static final boolean DEBUG = false; 252 /** 253 * Flag to verify computed IR 254 */ 255 private static final boolean VERIFY = false; 256 257 // -oO Debug routines Oo- 258 /** 259 * Human readable report of what goes on 260 * 261 * @param s String to print 262 **/ 263 private static void report(String s) { 264 if (DEBUG) { 265 VM.sysWriteln(s); 266 } 267 } 268 269 /** 270 * Return a string name for this phase. 271 * @return "Loop Versioning" 272 */ 273 @Override 274 public String getName() { 275 return "Loop Versioning"; 276 } 277 278 // -oO Variables used throughout the optimisation phase Oo- 279 /** 280 * The phi instruction operand holding the optimized loop variable 281 */ 282 private static final int OPTIMIZED_LOOP_OPERAND = 0; 283 /** 284 * The phi instruction operand holding the unoptimized loop variable 285 */ 286 private static final int UNOPTIMIZED_LOOP_OPERAND = 1; 287 288 /** 289 * IR for optimisation 290 */ 291 private IR ir; 292 293 /** 294 * Set used to store the loop related register 295 */ 296 private HashSet<Register> loopRegisterSet; 297 298 /** 299 * SSA options 300 */ 301 private final SSAOptions desiredSSAOptions; 302 /** 303 * Compiler phases called from this one 304 */ 305 private CompilerPhase enterSSA, leaveSSA; 306 private final CompilerPhase domPhase; 307 /** 308 * Run inside SSA sub-phase 309 */ 310 private static final boolean inSSAphase = true; 311 312 // -oO Interface to the rest of the compiler Oo- 313 314 /** 315 * Constructor for this compiler phase 316 */ 317 private static final Constructor<CompilerPhase> constructor = 318 getCompilerPhaseConstructor(LoopVersioning.class); 319 320 /** 321 * Get a constructor object for this compiler phase 322 * @return compiler phase constructor 323 */ 324 @Override 325 public Constructor<CompilerPhase> getClassConstructor() { 326 return constructor; 327 } 328 329 /** 330 * Constructor 331 */ 332 public LoopVersioning() { 333 desiredSSAOptions = new SSAOptions(); 334 desiredSSAOptions.setScalarsOnly(true); 335 if (!inSSAphase) { 336 enterSSA = new EnterSSA(); 337 leaveSSA = new LeaveSSA(); 338 } 339 domPhase = new DominatorsPhase(false); 340 } 341 342 /** 343 * Should loop versioning be performed? 344 */ 345 @Override 346 public boolean shouldPerform(OptOptions options) { 347 return options.SSA_LOOP_VERSIONING; 348 } 349 350 /** 351 * @param _ir the IR to process 352 */ 353 @Override 354 public void perform(IR _ir) { 355 ir = _ir; 356 357 // Create SSA 358 ir.desiredSSAOptions = desiredSSAOptions; 359 if (!inSSAphase) { 360 enterSSA.perform(ir); 361 } 362 363 if (DEBUG) { 364 SSA.printInstructions(ir); 365 } 366 367 // Perform loop annotation 368 if (!ir.hasReachableExceptionHandlers()) { 369 // Build LST tree and dominator info 370 domPhase.perform(ir); 371 DefUse.computeDU(ir); 372 // Build annotated version 373 ir.HIRInfo.loopStructureTree = new AnnotatedLSTGraph(ir, ir.HIRInfo.loopStructureTree); 374 } 375 if (VERIFY) { 376 ir.verify(getName(), true); 377 } 378 379 // Check loop annotation has been performed 380 if (!(ir.HIRInfo.loopStructureTree instanceof AnnotatedLSTGraph)) { 381 report("Optimisation of " + ir.getMethod() + " failed as LST wasn't annotated\n"); 382 } else { 383 loopRegisterSet = new HashSet<Register>(); 384 385 if (DEBUG) { 386 VM.sysWriteln(ir.getMethod().toString()); 387 VM.sysWriteln(ir.HIRInfo.loopStructureTree.toString()); 388 SSA.printInstructions(ir); 389 } 390 391 while (findLoopToOptimise((AnnotatedLSTNode) ir.HIRInfo.loopStructureTree.getRoot())) { 392 if (DEBUG) { 393 VM.sysWriteln("Successful optimisation of " + ir.getMethod()); 394 SSA.printInstructions(ir); 395 VM.sysWriteln(ir.cfg.toString()); 396 } 397 // Get IR into shape for next pass 398 DefUse.computeDU(ir); 399 LTDominators.perform(ir, true, true); 400 ir.HIRInfo.dominatorTree = new DominatorTree(ir, true); 401 LSTGraph.perform(ir); 402 AnnotatedLSTGraph.perform(ir); 403 404 if (VERIFY) { 405 ir.verify(getName(), true); 406 } 407 408 if (DEBUG) { 409 VM.sysWriteln("after an optimization pass"); 410 VM.sysWriteln(ir.HIRInfo.loopStructureTree.toString()); 411 SSA.printInstructions(ir); 412 VM.sysWriteln("Finish optimize: " + ir.getMethod().toString()); 413 } 414 } 415 // No longer in use 416 loopRegisterSet = null; 417 } 418 419 if (!inSSAphase) { 420 // Leave SSA 421 leaveSSA.perform(ir); 422 } 423 424 if (VERIFY) { 425 ir.verify(getName(), true); 426 } 427 } 428 429 // -oO Optimisation routines Oo- 430 431 /** 432 * Find an outermost loop to optimise and optimise it. Focus on 433 * annotated regular loops, LICM should handle possible 434 * optimisation for the non-regular loops 435 * 436 * @param loop Loop to search 437 * @return was optimisation performed 438 */ 439 private boolean findLoopToOptimise(AnnotatedLSTNode loop) { 440 // Has this loop already been optimised? 441 Operand carriedLoopIterator = loop.getCarriedLoopIterator(); 442 if ((carriedLoopIterator instanceof RegisterOperand) && 443 (isOptimizedLoop(carriedLoopIterator.asRegister().getRegister()))) { 444 return false; 445 } 446 447 // Process inner loops first 448 Enumeration<GraphNode> innerLoops = loop.outNodes(); 449 // Iterate over loops 450 while (innerLoops.hasMoreElements()) { 451 AnnotatedLSTNode nestedLoop = (AnnotatedLSTNode) innerLoops.nextElement(); 452 // Try to optimise inner loops first 453 if (findLoopToOptimise(nestedLoop)) { 454 // Exit early if inner loop optimisation succeeded 455 return true; 456 } 457 } 458 // Don't try to optimise irregular loops 459 if (loop.isNonRegularLoop()) { 460 return false; 461 } 462 if (DEBUG) { 463 report("LoopFissionOfArrayGuards: found loop in " + ir.getMethod()); 464 VM.sysWriteln("dominator tree:"); 465 VM.sysWriteln(ir.HIRInfo.dominatorTree.toString()); 466 } 467 468 // 1) Determine the bound and null checks to be eliminated. The 469 // bound checks are the ones that operate on the loop iterator. If 470 // no checks can be eliminated, stop optimising this loop. 471 ArrayList<Instruction> checksToEliminate = new ArrayList<Instruction>(); 472 getListOfChecksToEliminate(loop, checksToEliminate); 473 if (checksToEliminate.isEmpty()) { 474 return false; 475 } else { 476 // We found instructions to eliminate 477 if (DEBUG) { 478 VM.sysWriteln("Loop being optimised:"); 479 VM.sysWriteln(loop.toString()); 480 VM.sysWriteln("Checks to eliminate:"); 481 for (Instruction instruction : checksToEliminate) { 482 VM.sysWriteln(instruction.toString()); 483 } 484 } 485 // 2) Determine the registers defined in the loop. 486 ArrayList<Register> registersDefinedInOriginalLoop = new ArrayList<Register>(); 487 ArrayList<TypeReference> typesOfRegistersDefinedInOriginalLoop = new ArrayList<TypeReference>(); 488 ArrayList<Instruction> definingInstructionsInOriginalLoop = new ArrayList<Instruction>(); 489 getRegistersDefinedInLoop(loop, 490 registersDefinedInOriginalLoop, 491 typesOfRegistersDefinedInOriginalLoop, 492 definingInstructionsInOriginalLoop); 493 if (DEBUG) { 494 VM.sysWrite("Registers in original loop:\n{"); 495 for (int i = 0; i < registersDefinedInOriginalLoop.size(); i++) { 496 VM.sysWrite(registersDefinedInOriginalLoop.get(i).toString()); 497 if (definingInstructionsInOriginalLoop.get(i) != null) { 498 VM.sysWrite("(escapes),"); 499 } else { 500 VM.sysWrite(","); 501 } 502 } 503 VM.sysWriteln("}"); 504 } 505 // 3) Generate phi nodes that define the original register 506 // defined by the loop and use two newly created registers. 507 ArrayList<Instruction> phiInstructions = new ArrayList<Instruction>(); 508 HashMap<Register, Register> subOptimalRegMap = new HashMap<Register, Register>(); 509 HashMap<Register, Register> optimalRegMap = new HashMap<Register, Register>(); 510 generatePhiNodes(loop, 511 registersDefinedInOriginalLoop, 512 typesOfRegistersDefinedInOriginalLoop, 513 phiInstructions, 514 subOptimalRegMap, 515 optimalRegMap); 516 if (DEBUG) { 517 VM.sysWriteln("subOptimalRegMap"); 518 VM.sysWriteln(subOptimalRegMap.toString()); 519 VM.sysWriteln("optimalRegMap"); 520 VM.sysWriteln(optimalRegMap.toString()); 521 } 522 523 // 4) Create a version of the original loop that uses the first of 524 // the newly created registers instead of the original 525 // registers. 526 HashMap<Register, BasicBlock> regToUnoptimizedBlockMap = new HashMap<Register, BasicBlock>(); 527 HashMap<BasicBlock, BasicBlock> unoptimizedLoopMap = 528 createCloneLoop(loop, subOptimalRegMap, regToUnoptimizedBlockMap); 529 if (DEBUG) { 530 VM.sysWriteln("subOptimalLoopMap"); 531 VM.sysWriteln(unoptimizedLoopMap.toString()); 532 } 533 // 5) Create a second version, this time with the result of the 534 // eliminated checks set to explicit test guards. 535 HashMap<Register, BasicBlock> regToOptimizedBlockMap = new HashMap<Register, BasicBlock>(); 536 HashMap<BasicBlock, BasicBlock> optimizedLoopMap = 537 createOptimizedLoop(loop, optimalRegMap, checksToEliminate, regToOptimizedBlockMap); 538 if (DEBUG) { 539 VM.sysWriteln("optimalLoopMap"); 540 VM.sysWriteln(optimizedLoopMap.toString()); 541 } 542 // 6) Work out what the maximum value for all the bounds checks 543 // are and create branches to optimal or suboptimal loops - with 544 // the unoptimized loop possibly being unreachable 545 BasicBlock firstBranchBlock = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir); 546 BasicBlock temp = (BasicBlock) loop.header.prev; 547 ir.cfg.breakCodeOrder(temp, loop.header); 548 ir.cfg.linkInCodeOrder(temp, firstBranchBlock); 549 ir.cfg.linkInCodeOrder(firstBranchBlock, loop.header); 550 temp.redirectOuts(loop.header, firstBranchBlock, ir); 551 boolean isUnoptimizedLoopReachable = 552 createBranchBlocks(loop, 553 firstBranchBlock, 554 checksToEliminate, 555 unoptimizedLoopMap.get(loop.predecessor), 556 optimizedLoopMap.get(loop.predecessor), 557 optimalRegMap); 558 // 7) Fix up the phi node predecessors 559 fixUpPhiPredecessors(phiInstructions, 560 isUnoptimizedLoopReachable ? unoptimizedLoopMap.get(loop.exit) : null, 561 optimizedLoopMap.get(loop.exit)); 562 // 8) Remove the unoptimized loop if its redundant 563 if (!isUnoptimizedLoopReachable) { 564 removeUnoptimizedLoop(loop, unoptimizedLoopMap); 565 } 566 567 // 9) Replace register definitions in the original 568 // loop with phi instructions 569 modifyOriginalLoop(loop, phiInstructions, definingInstructionsInOriginalLoop, subOptimalRegMap, optimalRegMap); 570 // 10) Compact node numbering so that CFG number of nodes 571 // reflects that some basic blocks may have been deleted 572 ir.cfg.compactNodeNumbering(); 573 return true; 574 } 575 } 576 577 /** 578 * Create a list of instructions to be eliminated 579 * @param loop the loop to examine 580 * @param instrToEliminate the instructions to remove 581 */ 582 private void getListOfChecksToEliminate(AnnotatedLSTNode loop, ArrayList<Instruction> instrToEliminate) { 583 ArrayList<Instruction> nullChecks = new ArrayList<Instruction>(); 584 ArrayList<Instruction> oddBoundChecks = new ArrayList<Instruction>(); 585 Enumeration<BasicBlock> blocks = loop.getBasicBlocks(); 586 while (blocks.hasMoreElements()) { 587 BasicBlock block = blocks.nextElement(); 588 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block); 589 while (instructions.hasMoreElements()) { 590 Instruction instruction = instructions.nextElement(); 591 if (NullCheck.conforms(instruction)) { 592 if (loop.isInvariant(NullCheck.getRef(instruction))) { 593 instrToEliminate.add(instruction); 594 nullChecks.add(instruction); 595 } 596 } else if (loop.isMonotonic() && BoundsCheck.conforms(instruction)) { 597 if (loop.isInvariant(BoundsCheck.getRef(instruction))) { 598 if (loop.isRelatedToIterator(BoundsCheck.getIndex(instruction))) { 599 if (loop.isInvariant(BoundsCheck.getGuard(instruction))) { 600 instrToEliminate.add(instruction); 601 } else { 602 // Null check isn't invariant but reference was, check 603 // null check will be eliminated at end of loop 604 oddBoundChecks.add(instruction); 605 } 606 } 607 } 608 } 609 } 610 } 611 // Check cases where the null check isn't loop invariant, however, 612 // it will be in the optimized loop as we'll have eliminated it 613 for (Instruction oddBoundCheck : oddBoundChecks) { 614 Operand guard = BoundsCheck.getGuard(oddBoundCheck); 615 for (Instruction nullCheck : nullChecks) { 616 if (guard.similar(NullCheck.getGuardResult(nullCheck))) { 617 instrToEliminate.add(oddBoundCheck); 618 break; 619 } 620 } 621 } 622 } 623 624 /** 625 * Get registers defined in the given loop. As we're in SSA form 626 * all register definitions must be unique. 627 * @param loop - the loop to examine 628 * @param registers - vector to which defined registers are added 629 * @param types list to which the register's types are added 630 * @param definingInstructions list to which the defining instructions for 631 * the registers are added 632 */ 633 private void getRegistersDefinedInLoop(AnnotatedLSTNode loop, ArrayList<Register> registers, 634 ArrayList<TypeReference> types, 635 ArrayList<Instruction> definingInstructions) { 636 Enumeration<BasicBlock> blocks = loop.getBasicBlocks(); 637 while (blocks.hasMoreElements()) { 638 BasicBlock block = blocks.nextElement(); 639 // can value escape 640 final boolean escapes = (block == loop.exit) || (ir.HIRInfo.dominatorTree.dominates(block, loop.exit)); 641 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block); 642 while (instructions.hasMoreElements()) { 643 Instruction instruction = instructions.nextElement(); 644 Enumeration<Operand> operands = instruction.getDefs(); 645 while (operands.hasMoreElements()) { 646 Operand operand = operands.nextElement(); 647 if (operand.isRegister()) { 648 registers.add(operand.asRegister().getRegister()); 649 types.add(operand.asRegister().getType()); 650 if (escapes) { 651 definingInstructions.add(instruction); 652 } else { 653 definingInstructions.add(null); 654 } 655 } 656 } 657 } 658 } 659 } 660 661 /** 662 * Generate into a new block phi nodes that define the original 663 * register defined by the loop and use two newly created 664 * registers. 665 * @param loop the loop to process 666 * @param registers - vector to which defined registers need to be 667 * created registers.x used in creating phi nodes 668 * @param types - vector of corresponding types of registers. 669 * @param phiInstructions - created phi instructions 670 * @param subOptimalRegMap - mapping of orignal destination to the 671 * newly created destination for the unoptimized loop 672 * @param optimalRegMap - mapping of orignal destination to the 673 * newly created destination for the optimized loop 674 */ 675 private void generatePhiNodes(AnnotatedLSTNode loop, ArrayList<Register> registers, 676 ArrayList<TypeReference> types, ArrayList<Instruction> phiInstructions, 677 HashMap<Register, Register> subOptimalRegMap, 678 HashMap<Register, Register> optimalRegMap) { 679 // Get the carried loop iterator's register 680 Register carriedLoopIteratorRegister = ((RegisterOperand) loop.getCarriedLoopIterator()).getRegister(); 681 for (int i = 0; i < registers.size(); i++) { 682 Register register = registers.get(i); 683 TypeReference type = types.get(i); 684 Instruction phi = Phi.create(PHI, new RegisterOperand(register, type), 2); 685 phi.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 686 687 // new operand for optimized loop 688 Operand op0 = ir.regpool.makeTemp(type); 689 Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, op0); 690 optimalRegMap.put(register, op0.asRegister().getRegister()); 691 692 // new operand for unoptimized loop 693 Operand op1 = ir.regpool.makeTemp(type); 694 Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, op1); 695 subOptimalRegMap.put(register, op1.asRegister().getRegister()); 696 697 // Add the new created carried loop iterator registers to 698 // internal set to mark the optimized loops 699 if (register == carriedLoopIteratorRegister) { 700 setOptimizedLoop(op0.asRegister().getRegister()); 701 setOptimizedLoop(op1.asRegister().getRegister()); 702 } 703 704 phiInstructions.add(phi); 705 } 706 // rename any optimized inner loops registers 707 renameOptimizedLoops(subOptimalRegMap, optimalRegMap); 708 } 709 710 /** 711 * Create a clone of the loop replacing definitions in the cloned 712 * loop with those found in the register map 713 * @param loop - loop to clone 714 * @param regMap - mapping of original definition to new 715 * definition 716 * @param regToBlockMap mapping of registers to new, unoptimized blocks. This starts 717 * empty and will be filled during execution of the method. 718 * @return a mapping from original BBs to created BBs 719 */ 720 private HashMap<BasicBlock, BasicBlock> createCloneLoop(AnnotatedLSTNode loop, 721 HashMap<Register, Register> regMap, 722 HashMap<Register, BasicBlock> regToBlockMap) { 723 HashMap<BasicBlock, BasicBlock> originalToCloneBBMap = new HashMap<BasicBlock, BasicBlock>(); 724 // After the newly created loop goto the old loop header 725 originalToCloneBBMap.put(loop.successor, loop.header); 726 // Create an empty block to be the loop predecessor 727 BasicBlock new_pred = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir); 728 ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), new_pred); 729 originalToCloneBBMap.put(loop.predecessor, new_pred); 730 // Create copy blocks 731 Enumeration<BasicBlock> blocks = loop.getBasicBlocks(); 732 while (blocks.hasMoreElements()) { 733 BasicBlock block = blocks.nextElement(); 734 block.killFallThrough(); // get rid of fall through edges to aid recomputeNormalOuts 735 // Create copy and register mapping 736 BasicBlock copy = block.copyWithoutLinks(ir); 737 originalToCloneBBMap.put(block, copy); 738 // Link into code order 739 ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), copy); 740 // Alter register definitions and uses in copy 741 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy); 742 while (instructions.hasMoreElements()) { 743 Instruction instruction = instructions.nextElement(); 744 Enumeration<Operand> operands = instruction.getDefs(); 745 while (operands.hasMoreElements()) { 746 Operand operand = operands.nextElement(); 747 if (operand.isRegister()) { 748 Register register = operand.asRegister().getRegister(); 749 if (regMap.containsKey(register)) { 750 instruction.replaceRegister(register, regMap.get(register)); 751 regToBlockMap.put(regMap.get(register), copy); 752 } 753 } 754 } 755 operands = instruction.getUses(); 756 while (operands.hasMoreElements()) { 757 Operand operand = operands.nextElement(); 758 if (operand instanceof RegisterOperand) { 759 Register register = operand.asRegister().getRegister(); 760 if (regMap.containsKey(register)) { 761 instruction.replaceRegister(register, regMap.get(register)); 762 } 763 } 764 } 765 } 766 } 767 // Fix up outs 768 // loop predecessor 769 new_pred.redirectOuts(loop.header, originalToCloneBBMap.get(loop.header), ir); 770 // loop blocks 771 blocks = loop.getBasicBlocks(); 772 while (blocks.hasMoreElements()) { 773 BasicBlock block = blocks.nextElement(); 774 BasicBlock copy = originalToCloneBBMap.get(block); 775 Enumeration<BasicBlock> outs = block.getOutNodes(); 776 while (outs.hasMoreElements()) { 777 BasicBlock out = outs.nextElement(); 778 if (originalToCloneBBMap.containsKey(out)) { 779 copy.redirectOuts(out, originalToCloneBBMap.get(out), ir); 780 } 781 } 782 } 783 // Fix up phis 784 blocks = loop.getBasicBlocks(); 785 while (blocks.hasMoreElements()) { 786 BasicBlock block = blocks.nextElement(); 787 BasicBlock copy = originalToCloneBBMap.get(block); 788 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy); 789 while (instructions.hasMoreElements()) { 790 Instruction instruction = instructions.nextElement(); 791 if (Phi.conforms(instruction)) { 792 for (int i = 0; i < Phi.getNumberOfValues(instruction); i++) { 793 BasicBlock phi_predecessor = Phi.getPred(instruction, i).block; 794 if (originalToCloneBBMap.containsKey(phi_predecessor)) { 795 Phi.setPred(instruction, i, new BasicBlockOperand(originalToCloneBBMap.get(phi_predecessor))); 796 } else { 797 dumpIR(ir, "Error when optimising" + ir.getMethod()); 798 throw new Error("There's > 1 route to this phi node " + 799 instruction + 800 " from outside the loop: " + 801 phi_predecessor); 802 } 803 } 804 } 805 } 806 } 807 return originalToCloneBBMap; 808 } 809 810 /** 811 * Create a clone of the loop replacing definitions in the cloned 812 * loop with those found in the register map and eliminate 813 * unnecessary bound checks 814 * @param loop - loop to clone 815 * @param regMap - mapping of original definition to new 816 * definition 817 * @param instrToEliminate - instructions to eliminate 818 * @param regToBlockMap - mapping of a register to its defining BB 819 * @return a mapping from original BBs to created BBs 820 */ 821 private HashMap<BasicBlock, BasicBlock> createOptimizedLoop(AnnotatedLSTNode loop, 822 HashMap<Register, Register> regMap, 823 ArrayList<Instruction> instrToEliminate, 824 HashMap<Register, BasicBlock> regToBlockMap) { 825 HashMap<BasicBlock, BasicBlock> originalToCloneBBMap = new HashMap<BasicBlock, BasicBlock>(); 826 // After the newly created loop goto the old loop header 827 originalToCloneBBMap.put(loop.successor, loop.header); 828 // Create an empty block to be the loop predecessor 829 BasicBlock new_pred = loop.header.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir); 830 ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), new_pred); 831 originalToCloneBBMap.put(loop.predecessor, new_pred); 832 833 // Create copy blocks 834 Enumeration<BasicBlock> blocks = loop.getBasicBlocks(); 835 while (blocks.hasMoreElements()) { 836 BasicBlock block = blocks.nextElement(); 837 // N.B. fall through will have been killed by unoptimized loop 838 // Create copy and register mapping 839 BasicBlock copy = block.copyWithoutLinks(ir); 840 originalToCloneBBMap.put(block, copy); 841 // Link into code order 842 ir.cfg.linkInCodeOrder(ir.cfg.lastInCodeOrder(), copy); 843 844 // Alter register definitions in copy 845 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy); 846 loop_over_created_instructions: 847 while (instructions.hasMoreElements()) { 848 Instruction instruction = instructions.nextElement(); 849 if (BoundsCheck.conforms(instruction)) { 850 for (Instruction anInstrToEliminate : instrToEliminate) { 851 if (instruction.similar(anInstrToEliminate)) { 852 instruction.remove(); 853 continue loop_over_created_instructions; 854 } 855 } 856 } else if (NullCheck.conforms(instruction)) { 857 for (Instruction anInstrToEliminate : instrToEliminate) { 858 if (instruction.similar(anInstrToEliminate)) { 859 instruction.remove(); 860 continue loop_over_created_instructions; 861 } 862 } 863 } 864 Enumeration<Operand> operands = instruction.getDefs(); 865 while (operands.hasMoreElements()) { 866 Operand operand = operands.nextElement(); 867 if (operand instanceof RegisterOperand) { 868 Register register = operand.asRegister().getRegister(); 869 if (regMap.containsKey(register)) { 870 instruction.replaceRegister(register, regMap.get(register)); 871 regToBlockMap.put(regMap.get(register), copy); 872 } 873 } 874 } 875 operands = instruction.getUses(); 876 while (operands.hasMoreElements()) { 877 Operand operand = operands.nextElement(); 878 if (operand.isRegister()) { 879 Register register = operand.asRegister().getRegister(); 880 if (regMap.containsKey(register)) { 881 instruction.replaceRegister(register, regMap.get(register)); 882 } 883 } 884 } 885 } 886 } 887 // Fix up outs 888 // loop predecessor 889 new_pred.redirectOuts(loop.header, originalToCloneBBMap.get(loop.header), ir); 890 blocks = loop.getBasicBlocks(); 891 while (blocks.hasMoreElements()) { 892 BasicBlock block = blocks.nextElement(); 893 BasicBlock copy = originalToCloneBBMap.get(block); 894 Enumeration<BasicBlock> outs = block.getOutNodes(); 895 while (outs.hasMoreElements()) { 896 BasicBlock out = outs.nextElement(); 897 if (originalToCloneBBMap.containsKey(out)) { 898 copy.redirectOuts(out, originalToCloneBBMap.get(out), ir); 899 } 900 } 901 } 902 // Fix up phis 903 blocks = loop.getBasicBlocks(); 904 while (blocks.hasMoreElements()) { 905 BasicBlock block = blocks.nextElement(); 906 BasicBlock copy = originalToCloneBBMap.get(block); 907 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, copy); 908 while (instructions.hasMoreElements()) { 909 Instruction instruction = instructions.nextElement(); 910 if (Phi.conforms(instruction)) { 911 for (int i = 0; i < Phi.getNumberOfValues(instruction); i++) { 912 BasicBlock phi_predecessor = Phi.getPred(instruction, i).block; 913 if (originalToCloneBBMap.containsKey(phi_predecessor)) { 914 Phi.setPred(instruction, i, new BasicBlockOperand(originalToCloneBBMap.get(phi_predecessor))); 915 } else { 916 throw new Error("There's > 1 route to this phi node from outside the loop: " + phi_predecessor); 917 } 918 } 919 } 920 } 921 } 922 return originalToCloneBBMap; 923 } 924 925 /** 926 * When phi nodes were generated the basic blocks weren't known for 927 * the predecessors, fix this up now. (It may also not be possible 928 * to reach the unoptimized loop any more) 929 * 930 * @param phiInstructions a list of phi nodes 931 * @param unoptimizedLoopExit the exit block of the unoptimized loop. 932 * This may be {@code null} if the unoptimized loop is no longer reachable. 933 * @param optimizedLoopExit the exit block of the optimized loop 934 */ 935 private void fixUpPhiPredecessors(ArrayList<Instruction> phiInstructions, BasicBlock unoptimizedLoopExit, 936 BasicBlock optimizedLoopExit) { 937 if (unoptimizedLoopExit != null) { 938 for (Instruction instruction : phiInstructions) { 939 Phi.setPred(instruction, OPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(optimizedLoopExit)); 940 Phi.setPred(instruction, UNOPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(unoptimizedLoopExit)); 941 } 942 } else { 943 for (Instruction instruction : phiInstructions) { 944 Operand operand = Phi.getValue(instruction, OPTIMIZED_LOOP_OPERAND); 945 Phi.resizeNumberOfPreds(instruction, 1); 946 Phi.resizeNumberOfValues(instruction, 1); 947 Phi.setValue(instruction, OPTIMIZED_LOOP_OPERAND, operand); 948 Phi.setPred(instruction, OPTIMIZED_LOOP_OPERAND, new BasicBlockOperand(optimizedLoopExit)); 949 } 950 } 951 } 952 953 /* 954 * TODO better JavaDoc comment. 955 * <p> 956 * Create the block containing explict branches to either the 957 * optimized or unoptimized loops 958 * @param optimalRegMap - mapping used to map eliminated bound and 959 * null check guards to 960 */ 961 private boolean createBranchBlocks(AnnotatedLSTNode loop, BasicBlock block, 962 ArrayList<Instruction> checksToEliminate, BasicBlock unoptimizedLoopEntry, 963 BasicBlock optimizedLoopEntry, 964 HashMap<Register, Register> optimalRegMap) { 965 BasicBlock blockOnEntry = block; 966 // 1) generate null check guards 967 block = generateNullCheckBranchBlocks(loop, checksToEliminate, optimalRegMap, block, unoptimizedLoopEntry); 968 969 // 2) generate bound check guards 970 if (loop.isMonotonic()) { 971 // create new operands for values beyond initial and terminal iterator values 972 Operand terminal; 973 Operand terminalLessStrideOnce; 974 Operand terminalPlusStrideOnce; 975 976 // NB. precomputing these makes life easier and the code easier to read, 977 // it does create dead code though 978 if (loop.terminalIteratorValue.isIntConstant()) { 979 terminal = loop.terminalIteratorValue; 980 int terminalAsInt = terminal.asIntConstant().value; 981 int stride = loop.strideValue.asIntConstant().value; 982 terminalLessStrideOnce = new IntConstantOperand(terminalAsInt - stride); 983 terminalPlusStrideOnce = new IntConstantOperand(terminalAsInt + stride); 984 } else { 985 Instruction tempInstr; 986 terminal = loop.generateLoopInvariantOperand(block, loop.terminalIteratorValue); 987 terminalLessStrideOnce = ir.regpool.makeTempInt(); 988 terminalPlusStrideOnce = ir.regpool.makeTempInt(); 989 tempInstr = 990 Binary.create(INT_SUB, terminalLessStrideOnce.asRegister(), terminal.copy(), loop.strideValue.copy()); 991 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 992 block.appendInstruction(tempInstr); 993 DefUse.updateDUForNewInstruction(tempInstr); 994 995 tempInstr = 996 Binary.create(INT_ADD, terminalPlusStrideOnce.asRegister(), terminal.copy(), loop.strideValue.copy()); 997 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 998 block.appendInstruction(tempInstr); 999 DefUse.updateDUForNewInstruction(tempInstr); 1000 } 1001 1002 // Determine maximum and minimum index values for different loop types 1003 Operand phiMinIndexValue; 1004 Operand phiMaxIndexValue; 1005 1006 if (loop.isMonotonicIncreasing()) { 1007 phiMinIndexValue = loop.initialIteratorValue; 1008 if ((loop.condition.isLESS() || 1009 loop.condition.isLOWER() || 1010 loop.condition.isNOT_EQUAL())) { 1011 phiMaxIndexValue = terminal; 1012 } else if ((loop.condition.isLESS_EQUAL() || 1013 loop.condition.isLOWER_EQUAL() || 1014 loop.condition.isEQUAL())) { 1015 phiMaxIndexValue = terminalPlusStrideOnce; 1016 } else { 1017 throw new Error("Unrecognised loop for fission " + loop); 1018 } 1019 } else if (loop.isMonotonicDecreasing()) { 1020 phiMaxIndexValue = loop.initialIteratorValue; 1021 if ((loop.condition.isGREATER() || 1022 loop.condition.isHIGHER() || 1023 loop.condition.isNOT_EQUAL())) { 1024 phiMinIndexValue = terminalPlusStrideOnce; 1025 } else if ((loop.condition.isGREATER_EQUAL() || 1026 loop.condition.isHIGHER_EQUAL() || 1027 loop.condition.isEQUAL())) { 1028 phiMinIndexValue = terminalLessStrideOnce; 1029 } else { 1030 throw new Error("Unrecognised loop for fission " + loop); 1031 } 1032 } else { 1033 throw new Error("Unrecognised loop for fission " + loop); 1034 } 1035 // Generate tests 1036 for (int i = 0; i < checksToEliminate.size(); i++) { 1037 Instruction instr = checksToEliminate.get(i); 1038 if (BoundsCheck.conforms(instr)) { 1039 // Have we already generated these tests? 1040 boolean alreadyChecked = false; 1041 for (int j = 0; j < i; j++) { 1042 Instruction old_instr = checksToEliminate.get(j); 1043 if (BoundsCheck.conforms(old_instr) && 1044 (BoundsCheck.getRef(old_instr).similar(BoundsCheck.getRef(instr))) && 1045 (BoundsCheck.getIndex(old_instr).similar(BoundsCheck.getIndex(instr)))) { 1046 // yes - just create a guard move 1047 alreadyChecked = true; 1048 RegisterOperand guardResult = BoundsCheck.getGuardResult(instr).copyRO(); 1049 guardResult.setRegister(optimalRegMap.get(guardResult.getRegister())); 1050 RegisterOperand guardSource = BoundsCheck.getGuardResult(old_instr).copyRO(); 1051 guardSource.setRegister(optimalRegMap.get(guardSource.getRegister())); 1052 Instruction tempInstr = Move.create(GUARD_MOVE, guardResult, guardSource); 1053 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1054 block.appendInstruction(tempInstr); 1055 break; 1056 } 1057 } 1058 if (!alreadyChecked) { 1059 // no - generate tests 1060 Operand index = BoundsCheck.getIndex(instr); 1061 int distance = loop.getFixedDistanceFromPhiIterator(index); 1062 if (distance == 0) { 1063 block = 1064 generateExplicitBoundCheck(instr, 1065 phiMinIndexValue, 1066 phiMaxIndexValue, 1067 optimalRegMap, 1068 block, 1069 unoptimizedLoopEntry); 1070 } else { 1071 Instruction tempInstr; 1072 RegisterOperand minIndex = ir.regpool.makeTempInt(); 1073 RegisterOperand maxIndex = ir.regpool.makeTempInt(); 1074 1075 tempInstr = 1076 Binary.create(INT_ADD, minIndex, phiMinIndexValue.copy(), new IntConstantOperand(distance)); 1077 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1078 block.appendInstruction(tempInstr); 1079 DefUse.updateDUForNewInstruction(tempInstr); 1080 1081 tempInstr = 1082 Binary.create(INT_ADD, maxIndex, phiMaxIndexValue.copy(), new IntConstantOperand(distance)); 1083 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1084 block.appendInstruction(tempInstr); 1085 DefUse.updateDUForNewInstruction(tempInstr); 1086 1087 block = generateExplicitBoundCheck(instr, minIndex, maxIndex, optimalRegMap, block, unoptimizedLoopEntry); 1088 } 1089 } 1090 } 1091 } 1092 } 1093 // Have we had to create a new basic block since entry => we 1094 // generated a branch to the unoptimized loop 1095 boolean isUnoptimizedLoopReachable = (blockOnEntry != block); 1096 // 3) Finish up with goto and generate true guard value 1097 { 1098 Instruction branch; // the generated branch instruction 1099 branch = Goto.create(GOTO, optimizedLoopEntry.makeJumpTarget()); 1100 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1101 block.appendInstruction(branch); 1102 block.deleteNormalOut(); 1103 block.insertOut(optimizedLoopEntry); 1104 } 1105 return isUnoptimizedLoopReachable; 1106 } 1107 1108 /** 1109 * Generate null check branch blocks 1110 * 1111 * @param loop the current loop where checks are being eliminated 1112 * @param checksToEliminate all of the checks that are being eliminated in the pass 1113 * @param optimalRegMap a map from original register to the register used in the optimal loop 1114 * @param block the block to generate code into 1115 * @param unoptimizedLoopEntry entry to the unoptimized loop for if the check fails 1116 * @return the new block to generate code into 1117 */ 1118 private BasicBlock generateNullCheckBranchBlocks(AnnotatedLSTNode loop, 1119 ArrayList<Instruction> checksToEliminate, 1120 HashMap<Register, Register> optimalRegMap, 1121 BasicBlock block, BasicBlock unoptimizedLoopEntry) { 1122 // Map of already generated null check references to their 1123 // corresponding guard result 1124 HashMap<Register, Operand> refToGuardMap = new HashMap<Register, Operand>(); 1125 // Iterate over checks 1126 for (Instruction instr : checksToEliminate) { 1127 // Is this a null check 1128 if (NullCheck.conforms(instr)) { 1129 // the generated branch instruction 1130 Instruction branch; 1131 // the reference to compare 1132 Operand ref = AnnotatedLSTNode.follow(NullCheck.getRef(instr)); 1133 // the guard result to define 1134 RegisterOperand guardResult = NullCheck.getGuardResult(instr).copyRO(); 1135 guardResult.setRegister(optimalRegMap.get(guardResult.getRegister())); 1136 // check if we've generated this test already 1137 if (ref.isRegister() && refToGuardMap.containsKey(ref.asRegister().getRegister())) { 1138 // yes - generate just a guard move 1139 branch = Move.create(GUARD_MOVE, guardResult, refToGuardMap.get(ref.asRegister().getRegister()).copy()); 1140 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1141 block.appendInstruction(branch); 1142 } else { 1143 // check if we can just move a guard from the loop predecessors 1144 RegisterOperand guard = nullCheckPerformedInLoopPredecessors(loop.header, instr); 1145 if (guard != null) { 1146 // yes - generate just a guard move 1147 branch = Move.create(GUARD_MOVE, guardResult, guard.copyRO()); 1148 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1149 block.appendInstruction(branch); 1150 } else { 1151 // generate explicit null test 1152 branch = 1153 IfCmp.create(REF_IFCMP, 1154 guardResult, 1155 ref.copy(), 1156 new NullConstantOperand(), 1157 ConditionOperand.EQUAL(), 1158 unoptimizedLoopEntry.makeJumpTarget(), 1159 BranchProfileOperand.unlikely()); 1160 if (ref.isRegister()) { 1161 refToGuardMap.put(ref.asRegister().getRegister(), guardResult); 1162 } 1163 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1164 block.appendInstruction(branch); 1165 // Adjust block 1166 block.insertOut(unoptimizedLoopEntry); 1167 BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir); 1168 BasicBlock temp = (BasicBlock) block.next; 1169 ir.cfg.breakCodeOrder(block, temp); 1170 ir.cfg.linkInCodeOrder(block, new_block); 1171 ir.cfg.linkInCodeOrder(new_block, temp); 1172 block.insertOut(new_block); 1173 block = new_block; 1174 } 1175 } 1176 } 1177 } 1178 return block; 1179 } 1180 1181 /** 1182 * Generate bound check branch blocks 1183 * 1184 * @param boundCheckInstr the bound check instruction in question 1185 * @param minIndexValue the min value for an iterator a loop will generate 1186 * @param maxIndexValue the max value for an iterator a loop will generate 1187 * @param optimalRegMap a map from original register to the register used in the optimal loop 1188 * @param block the block to generate code into 1189 * @param unoptimizedLoopEntry entry to the unoptimized loop for if the check fails 1190 * @return the new block to generate code into 1191 */ 1192 private BasicBlock generateExplicitBoundCheck(Instruction boundCheckInstr, Operand minIndexValue, 1193 Operand maxIndexValue, 1194 HashMap<Register, Register> optimalRegMap, 1195 BasicBlock block, BasicBlock unoptimizedLoopEntry) { 1196 // 1) Work out what tests are necessary. NB we don't optimise for 1197 // the case when exceptions will always be generated 1198 boolean lowerBoundTestRedundant; 1199 boolean upperBoundTestRedundant; 1200 { 1201 // as array lengths must be >= 0 the lower bound test is not 1202 // necessary if: 1203 // (minIndexValue >= 0) or ((arraylength A) + zeroOrPositiveConstant) 1204 lowerBoundTestRedundant = 1205 ((minIndexValue.isIntConstant() && (minIndexValue.asIntConstant().value >= 0)) || 1206 ((getConstantAdjustedArrayLengthRef(minIndexValue) != null) && 1207 (getConstantAdjustedArrayLengthDistance(minIndexValue) >= 0))); 1208 // as the upper bound must be <= arraylength the test is not 1209 // necessary if: 1210 // maxIndexValue = (arraylength A) - zeroOrPositiveConstant 1211 Operand maxIndexArrayLengthRef = getConstantAdjustedArrayLengthRef(maxIndexValue); 1212 upperBoundTestRedundant = 1213 ((maxIndexArrayLengthRef != null) && 1214 maxIndexArrayLengthRef.similar(BoundsCheck.getRef(boundCheckInstr)) && 1215 (getConstantAdjustedArrayLengthDistance(maxIndexValue) <= 0)); 1216 } 1217 1218 // 2) Create explicit bound check 1219 1220 // register to hold result (NB it's a guard for the optimal loop) 1221 RegisterOperand guardResult = BoundsCheck.getGuardResult(boundCheckInstr).copyRO(); 1222 guardResult.setRegister(optimalRegMap.get(guardResult.getRegister())); 1223 1224 // the guard on the bound check (mapped from the optimal loop as 1225 // it should already have been generated or may already be out of 1226 // the loop) 1227 Operand origGuard = BoundsCheck.getGuard(boundCheckInstr); 1228 Operand guard = origGuard.copy(); 1229 if (origGuard.isRegister() && optimalRegMap.containsKey(origGuard.asRegister().getRegister())) { 1230 guard.asRegister().setRegister(optimalRegMap.get(origGuard.asRegister().getRegister())); 1231 } 1232 1233 if (lowerBoundTestRedundant && upperBoundTestRedundant) { 1234 // both tests redundant so just generate a guard move of the 1235 // bound check guard 1236 Instruction move = Move.create(GUARD_MOVE, guardResult, guard); 1237 move.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1238 block.appendInstruction(move); 1239 } else { 1240 // 2.1) Create array length 1241 RegisterOperand array_length = ir.regpool.makeTempInt(); 1242 Instruction array_length_instr = 1243 GuardedUnary.create(ARRAYLENGTH, 1244 array_length, 1245 AnnotatedLSTNode.follow(BoundsCheck.getRef(boundCheckInstr)).copy(), 1246 guard); 1247 array_length_instr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1248 block.appendInstruction(array_length_instr); 1249 1250 // 2.2) Create minimum index test unless test is redundant 1251 if (!lowerBoundTestRedundant) { 1252 RegisterOperand lowerBoundGuard = upperBoundTestRedundant ? guardResult : ir.regpool.makeTempValidation(); 1253 // Generate bound check 1254 Instruction branch = 1255 IfCmp.create(INT_IFCMP, 1256 lowerBoundGuard, 1257 minIndexValue.copy(), 1258 array_length.copyRO(), 1259 ConditionOperand.LESS(), 1260 unoptimizedLoopEntry.makeJumpTarget(), 1261 BranchProfileOperand.unlikely()); 1262 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1263 block.appendInstruction(branch); 1264 // Adjust block 1265 block.insertOut(unoptimizedLoopEntry); 1266 BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir); 1267 BasicBlock temp = (BasicBlock) block.next; 1268 ir.cfg.breakCodeOrder(block, temp); 1269 ir.cfg.linkInCodeOrder(block, new_block); 1270 ir.cfg.linkInCodeOrder(new_block, temp); 1271 block.insertOut(new_block); 1272 block = new_block; 1273 } 1274 // 2.3) Create maximum index test 1275 if (!upperBoundTestRedundant) { 1276 Instruction branch = 1277 IfCmp.create(INT_IFCMP, 1278 guardResult, 1279 maxIndexValue.copy(), 1280 array_length.copyRO(), 1281 ConditionOperand.GREATER(), 1282 unoptimizedLoopEntry.makeJumpTarget(), 1283 BranchProfileOperand.unlikely()); 1284 branch.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1285 block.appendInstruction(branch); 1286 // Adjust block 1287 block.insertOut(unoptimizedLoopEntry); 1288 BasicBlock new_block = block.createSubBlock(SYNTH_LOOP_VERSIONING_BCI, ir); 1289 BasicBlock temp = (BasicBlock) block.next; 1290 ir.cfg.breakCodeOrder(block, temp); 1291 ir.cfg.linkInCodeOrder(block, new_block); 1292 ir.cfg.linkInCodeOrder(new_block, temp); 1293 block.insertOut(new_block); 1294 block = new_block; 1295 } 1296 } 1297 return block; 1298 } 1299 1300 /** 1301 * Can we eliminate a null check as it has lready been performed? 1302 * NB SSA guarantees that if a value is null it must always be null 1303 * 1304 * @param header loop header basic block 1305 * @param instr null check instruction 1306 * @return the guard for the null check if it has already been performed, 1307 * {@code null} if the check hasn't been performed yet 1308 */ 1309 private RegisterOperand nullCheckPerformedInLoopPredecessors(BasicBlock header, Instruction instr) { 1310 if (VM.VerifyAssertions) VM._assert(NullCheck.conforms(instr)); 1311 BasicBlock block = header; 1312 do { 1313 block = ir.HIRInfo.dominatorTree.getParent(block); 1314 Instruction lastInst = block.lastInstruction(); 1315 for (Instruction itrInst = block.firstInstruction(); itrInst != lastInst; itrInst = 1316 itrInst.nextInstructionInCodeOrder()) { 1317 if (NullCheck.conforms(itrInst) && NullCheck.getRef(itrInst).similar(NullCheck.getRef(instr))) { 1318 return NullCheck.getGuardResult(itrInst); 1319 } 1320 } 1321 } while (block != ir.cfg.entry()); 1322 return null; 1323 } 1324 1325 /** 1326 * Gets the array length reference ignoring instructions that adjust 1327 * its result by a fixed amount. 1328 * 1329 * @param op operand to chase arraylength opcode to 1330 * constant value from an array length 1331 * @return the array length as defined above 1332 */ 1333 private Operand getConstantAdjustedArrayLengthRef(Operand op) { 1334 Operand result = null; 1335 if (op.isRegister()) { 1336 Instruction opInstr = AnnotatedLSTNode.definingInstruction(op); 1337 if (opInstr.getOpcode() == ARRAYLENGTH_opcode) { 1338 result = GuardedUnary.getVal(opInstr); 1339 } else if ((opInstr.getOpcode() == INT_ADD_opcode) || (opInstr.getOpcode() == INT_SUB_opcode)) { 1340 Operand val1 = Binary.getVal1(opInstr); 1341 Operand val2 = Binary.getVal2(opInstr); 1342 if (val1.isConstant()) { 1343 result = getConstantAdjustedArrayLengthRef(val2); 1344 } else if (val2.isConstant()) { 1345 result = getConstantAdjustedArrayLengthRef(val1); 1346 } 1347 } 1348 } 1349 return result; 1350 } 1351 1352 /** 1353 * Get the distance from an array length by addding up instructions 1354 * that adjust the array length result by a constant amount 1355 * 1356 * @param op operand to chase arraylength opcode to 1357 * @return the array length as defined above 1358 */ 1359 private int getConstantAdjustedArrayLengthDistance(Operand op) { 1360 Instruction opInstr = AnnotatedLSTNode.definingInstruction(op); 1361 if (opInstr.getOpcode() == ARRAYLENGTH_opcode) { 1362 return 0; 1363 } else if (opInstr.getOpcode() == INT_ADD_opcode) { 1364 Operand val1 = Binary.getVal1(opInstr); 1365 Operand val2 = Binary.getVal2(opInstr); 1366 if (val1.isConstant()) { 1367 return val1.asIntConstant().value + getConstantAdjustedArrayLengthDistance(val2); 1368 } else { 1369 if (VM.VerifyAssertions) VM._assert(val2.isConstant()); 1370 return getConstantAdjustedArrayLengthDistance(val1) + val2.asIntConstant().value; 1371 } 1372 } else if (opInstr.getOpcode() == INT_SUB_opcode) { 1373 Operand val1 = Binary.getVal1(opInstr); 1374 Operand val2 = Binary.getVal2(opInstr); 1375 if (val1.isConstant()) { 1376 return val1.asIntConstant().value - getConstantAdjustedArrayLengthDistance(val2); 1377 } else { 1378 if (VM.VerifyAssertions) VM._assert(val2.isConstant()); 1379 return getConstantAdjustedArrayLengthDistance(val1) - val2.asIntConstant().value; 1380 } 1381 } else { 1382 throw new Error("Unexpected opcode when computing distance " + op); 1383 } 1384 } 1385 1386 /* 1387 * TODO Convert to JavaDoc and add missing tags. 1388 * <p> 1389 * Remove loop and replace register definitions in the original loop 1390 * with phi instructions 1391 */ 1392 private void modifyOriginalLoop(AnnotatedLSTNode loop, ArrayList<Instruction> phiInstructions, 1393 ArrayList<Instruction> definingInstrInOriginalLoop, 1394 HashMap<Register, Register> subOptimalRegMap, 1395 HashMap<Register, Register> optimalRegMap) { 1396 // Remove instructions from loop header and exit, remove other 1397 // loop body blocks 1398 Enumeration<BasicBlock> blocks = loop.getBasicBlocks(); 1399 while (blocks.hasMoreElements()) { 1400 BasicBlock block = blocks.nextElement(); 1401 if ((block == loop.header) || (block == loop.exit)) { 1402 IREnumeration.AllInstructionsEnum instructions = new IREnumeration.AllInstructionsEnum(ir, block); 1403 while (instructions.hasMoreElements()) { 1404 Instruction instruction = instructions.nextElement(); 1405 if (!BBend.conforms(instruction) && !Label.conforms(instruction)) { 1406 instruction.remove(); 1407 } 1408 } 1409 } else { 1410 ir.cfg.removeFromCFGAndCodeOrder(block); 1411 } 1412 } 1413 1414 // Place phi instructions in loop header 1415 for (int i = 0; i < phiInstructions.size(); i++) { 1416 Instruction origInstr = definingInstrInOriginalLoop.get(i); 1417 // Did the original instructions value escape the loop? 1418 if (origInstr != null) { 1419 // Was this a phi of a phi? 1420 if (Phi.conforms(origInstr)) { 1421 Instruction phi = phiInstructions.get(i); 1422 boolean phiHasUnoptimizedArg = Phi.getNumberOfValues(phi) == 2; 1423 // Phi of a phi - so make sure that we get the value to escape the loop, not the value at the loop header 1424 boolean fixed = false; 1425 for (int index = 0; index < Phi.getNumberOfPreds(origInstr); index++) { 1426 BasicBlockOperand predOp = Phi.getPred(origInstr, index); 1427 // Only worry about values who are on the backward branch 1428 if (predOp.block == loop.exit) { 1429 if (fixed) { // We've tried to do 2 replaces => something wrong 1430 SSA.printInstructions(ir); 1431 OptimizingCompilerException.UNREACHABLE("LoopVersioning", 1432 "Phi node in loop header with multiple in loop predecessors"); 1433 } 1434 Operand rval = Phi.getValue(origInstr, index); 1435 if (rval.isRegister()) { 1436 // Sort out registers 1437 Register origRegPhiRval = rval.asRegister().getRegister(); 1438 Register subOptRegPhiRval; 1439 Register optRegPhiRval; 1440 if (!subOptimalRegMap.containsKey(origRegPhiRval)) { 1441 // Register comes from loop exit but it wasn't defined in the loop 1442 subOptRegPhiRval = origRegPhiRval; 1443 optRegPhiRval = origRegPhiRval; 1444 } else { 1445 subOptRegPhiRval = subOptimalRegMap.get(origRegPhiRval); 1446 optRegPhiRval = optimalRegMap.get(origRegPhiRval); 1447 } 1448 if (phiHasUnoptimizedArg) { 1449 Phi.getValue(phi, UNOPTIMIZED_LOOP_OPERAND).asRegister().setRegister(subOptRegPhiRval); 1450 } 1451 Phi.getValue(phi, OPTIMIZED_LOOP_OPERAND).asRegister().setRegister(optRegPhiRval); 1452 } else if (rval.isConstant()) { 1453 // Sort out constants 1454 if (phiHasUnoptimizedArg) { 1455 Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, rval.copy()); 1456 } 1457 Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, rval.copy()); 1458 } else if (rval instanceof HeapOperand) { 1459 // Sort out heap variables 1460 @SuppressWarnings("unchecked") // Cast to generic type 1461 HeapVariable<Object> origPhiRval = ((HeapOperand) rval).value; 1462 HeapVariable<Object> subOptPhiRval; 1463 HeapVariable<Object> optPhiRval; 1464 if (true /*subOptimalRegMap.containsKey(origPhiRval) == false*/) { 1465 // currently we only expect to optimise scalar SSA 1466 // form 1467 subOptPhiRval = origPhiRval; 1468 optPhiRval = origPhiRval; 1469 } else { 1470 /* 1471 subOptPhiRval = (HeapVariable)subOptimalRegMap.get(origPhiRval); 1472 optPhiRval = (HeapVariable)optimalRegMap.get(origPhiRval); 1473 */ 1474 } 1475 if (phiHasUnoptimizedArg) { 1476 Phi.setValue(phi, UNOPTIMIZED_LOOP_OPERAND, new HeapOperand<Object>(subOptPhiRval)); 1477 } 1478 Phi.setValue(phi, OPTIMIZED_LOOP_OPERAND, new HeapOperand<Object>(optPhiRval)); 1479 } else { 1480 OptimizingCompilerException.UNREACHABLE("LoopVersioning", 1481 "Unknown operand type", 1482 rval.toString()); 1483 } 1484 fixed = true; 1485 } 1486 } 1487 } 1488 // Add back to loop 1489 loop.header.appendInstruction(phiInstructions.get(i)); 1490 } 1491 } 1492 // Remove original loop and branch to loop successor 1493 Instruction tempInstr; 1494 if (loop.header != loop.exit) { 1495 tempInstr = Goto.create(GOTO, loop.exit.makeJumpTarget()); 1496 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1497 loop.header.appendInstruction(tempInstr); 1498 loop.header.deleteNormalOut(); 1499 loop.header.insertOut(loop.exit); 1500 1501 } 1502 tempInstr = Goto.create(GOTO, loop.successor.makeJumpTarget()); 1503 tempInstr.setBytecodeIndex(SYNTH_LOOP_VERSIONING_BCI); 1504 loop.exit.appendInstruction(tempInstr); 1505 loop.exit.deleteNormalOut(); 1506 loop.exit.insertOut(loop.successor); 1507 } 1508 1509 private void removeUnoptimizedLoop(AnnotatedLSTNode loop, 1510 HashMap<BasicBlock, BasicBlock> unoptimizedLoopMap) { 1511 Enumeration<BasicBlock> blocks = loop.getBasicBlocks(); 1512 report("removing unoptimized loop"); 1513 BasicBlock block = unoptimizedLoopMap.get(loop.predecessor); 1514 report("removing block " + block); 1515 ir.cfg.removeFromCFGAndCodeOrder(block); 1516 while (blocks.hasMoreElements()) { 1517 block = unoptimizedLoopMap.get(blocks.nextElement()); 1518 if (!loop.contains(block)) { 1519 report("removing block " + block); 1520 ir.cfg.removeFromCFGAndCodeOrder(block); 1521 } else { 1522 report("not removing block that's in the original loop" + block); 1523 } 1524 } 1525 } 1526 1527 /** 1528 * Put the optimized loop's iterator register into the hash set 1529 * 1530 * @param reg register 1531 */ 1532 private void setOptimizedLoop(Register reg) { 1533 loopRegisterSet.add(reg); 1534 } 1535 1536 /** 1537 * Check whether the loop that contain such iterator register had 1538 * been optimized 1539 * 1540 * @param reg register 1541 * @return the test result 1542 */ 1543 private boolean isOptimizedLoop(Register reg) { 1544 return loopRegisterSet.contains(reg); 1545 } 1546 1547 /* 1548 * TODO Convert to JavaDoc and add missing tags. 1549 * Rename the iterators for optimized loops so we can tell they are still optimized 1550 */ 1551 private void renameOptimizedLoops(HashMap<Register, Register> subOptimalRegMap, 1552 HashMap<Register, Register> optimalRegMap) { 1553 Iterator<Register> itr = loopRegisterSet.iterator(); 1554 while (itr.hasNext()) { 1555 Register reg = itr.next(); 1556 if (subOptimalRegMap.containsKey(reg)) { 1557 loopRegisterSet.remove(reg); 1558 loopRegisterSet.add(subOptimalRegMap.get(reg)); 1559 loopRegisterSet.add(optimalRegMap.get(reg)); 1560 itr = loopRegisterSet.iterator(); 1561 } 1562 } 1563 } 1564}