001/* 002 * This file is part of the Jikes RVM project (http://jikesrvm.org). 003 * 004 * This file is licensed to You under the Eclipse Public License (EPL); 005 * You may not use this file except in compliance with the License. You 006 * may obtain a copy of the License at 007 * 008 * http://www.opensource.org/licenses/eclipse-1.0.php 009 * 010 * See the COPYRIGHT.txt file distributed with this work for information 011 * regarding copyright ownership. 012 */ 013package org.jikesrvm.compilers.opt.regalloc.ia32; 014 015import static org.jikesrvm.compilers.opt.driver.OptConstants.PRIMITIVE_TYPE_FOR_WORD; 016import static org.jikesrvm.compilers.opt.ir.Operators.BBEND; 017import static org.jikesrvm.compilers.opt.ir.Operators.NOP; 018import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_BACKEDGE; 019import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_EPILOGUE; 020import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_OSR; 021import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_PROLOGUE; 022import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.ADVISE_ESP; 023import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_ADD; 024import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_FCLEAR; 025import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_FMOV; 026import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_FMOV_opcode; 027import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_FNINIT; 028import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_FNSAVE; 029import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_FRSTOR; 030import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_LEA; 031import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOV; 032import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOVQ; 033import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOVSD; 034import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOVSD_opcode; 035import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOVSS; 036import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOVSS_opcode; 037import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOV_opcode; 038import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_POP; 039import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_PUSH; 040import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_RET_opcode; 041import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_SYSCALL; 042import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_TRAPIF; 043import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.REQUIRE_ESP; 044import static org.jikesrvm.compilers.opt.regalloc.ia32.PhysicalRegisterConstants.DOUBLE_REG; 045import static org.jikesrvm.compilers.opt.regalloc.ia32.PhysicalRegisterConstants.INT_REG; 046import static org.jikesrvm.ia32.ArchConstants.SSE2_FULL; 047import static org.jikesrvm.ia32.StackframeLayoutConstants.STACKFRAME_ALIGNMENT; 048import static org.jikesrvm.runtime.JavaSizeConstants.BYTES_IN_DOUBLE; 049import static org.jikesrvm.runtime.JavaSizeConstants.BYTES_IN_FLOAT; 050 051import java.util.Enumeration; 052import java.util.Iterator; 053 054import org.jikesrvm.VM; 055import org.jikesrvm.classloader.TypeReference; 056import org.jikesrvm.compilers.opt.OptimizingCompilerException; 057import org.jikesrvm.compilers.opt.ir.Empty; 058import org.jikesrvm.compilers.opt.ir.GenericPhysicalRegisterSet; 059import org.jikesrvm.compilers.opt.ir.IR; 060import org.jikesrvm.compilers.opt.ir.Instruction; 061import org.jikesrvm.compilers.opt.ir.Operator; 062import org.jikesrvm.compilers.opt.ir.Register; 063import org.jikesrvm.compilers.opt.ir.ia32.MIR_BinaryAcc; 064import org.jikesrvm.compilers.opt.ir.ia32.MIR_FSave; 065import org.jikesrvm.compilers.opt.ir.ia32.MIR_Lea; 066import org.jikesrvm.compilers.opt.ir.ia32.MIR_Move; 067import org.jikesrvm.compilers.opt.ir.ia32.MIR_Nullary; 068import org.jikesrvm.compilers.opt.ir.ia32.MIR_TrapIf; 069import org.jikesrvm.compilers.opt.ir.ia32.MIR_UnaryNoRes; 070import org.jikesrvm.compilers.opt.ir.ia32.PhysicalDefUse; 071import org.jikesrvm.compilers.opt.ir.ia32.PhysicalRegisterSet; 072import org.jikesrvm.compilers.opt.ir.operand.MemoryOperand; 073import org.jikesrvm.compilers.opt.ir.operand.Operand; 074import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand; 075import org.jikesrvm.compilers.opt.ir.operand.StackLocationOperand; 076import org.jikesrvm.compilers.opt.ir.operand.TrapCodeOperand; 077import org.jikesrvm.compilers.opt.ir.operand.ia32.IA32ConditionOperand; 078import org.jikesrvm.compilers.opt.regalloc.GenericStackManager; 079import org.jikesrvm.runtime.ArchEntrypoints; 080import org.jikesrvm.runtime.Entrypoints; 081import org.vmmagic.unboxed.Offset; 082 083/** 084 * Class to manage the allocation of the "compiler-specific" portion of 085 * the stackframe. This class holds only the architecture-specific 086 * functions. 087 */ 088public final class StackManager extends GenericStackManager { 089 090 /** 091 * A frame offset for 108 bytes of stack space to store the 092 * floating point state in the SaveVolatile protocol. 093 */ 094 private int fsaveLocation; 095 096 /** 097 * We allow the stack pointer to float from its normal position at the 098 * bottom of the frame. This field holds the 'current' offset of the 099 * SP. 100 */ 101 private int ESPOffset = 0; 102 103 /** 104 * Should we allow the stack pointer to float in order to avoid scratch 105 * registers in move instructions. Note: as of Feb. 02, we think this 106 * is a bad idea. 107 */ 108 private static final boolean FLOAT_ESP = false; 109 110 @Override 111 public int getFrameFixedSize() { 112 return frameSize - WORDSIZE; 113 } 114 115 /** 116 * @param type one of INT_VALUE, FLOAT_VALUE, or DOUBLE_VALUE 117 * @return the size of a type of value, in bytes. 118 * NOTE: For the purpose of register allocation, an x87 FLOAT_VALUE is 64 bits! 119 */ 120 private static byte getSizeOfType(byte type) { 121 switch (type) { 122 case INT_VALUE: 123 return (byte) (WORDSIZE); 124 case FLOAT_VALUE: 125 if (SSE2_FULL) return (byte) BYTES_IN_FLOAT; 126 case DOUBLE_VALUE: 127 return (byte) BYTES_IN_DOUBLE; 128 default: 129 OptimizingCompilerException.TODO("getSizeOfValue: unsupported"); 130 return 0; 131 } 132 } 133 134 /** 135 * @param type one of INT_VALUE, FLOAT_VALUE, or DOUBLE_VALUE 136 * @return the move operator for a type of value. 137 */ 138 private static Operator getMoveOperator(byte type) { 139 switch (type) { 140 case INT_VALUE: 141 return IA32_MOV; 142 case DOUBLE_VALUE: 143 if (SSE2_FULL) return IA32_MOVSD; 144 case FLOAT_VALUE: 145 if (SSE2_FULL) return IA32_MOVSS; 146 return IA32_FMOV; 147 default: 148 OptimizingCompilerException.TODO("getMoveOperator: unsupported"); 149 return null; 150 } 151 } 152 153 @Override 154 public int allocateNewSpillLocation(int type) { 155 156 // increment by the spill size 157 spillPointer += PhysicalRegisterSet.getSpillSize(type); 158 159 if (spillPointer + WORDSIZE > frameSize) { 160 frameSize = spillPointer + WORDSIZE; 161 } 162 return spillPointer; 163 } 164 165 @Override 166 public void insertSpillBefore(Instruction s, Register r, byte type, int location) { 167 168 Operator move = getMoveOperator(type); 169 byte size = getSizeOfType(type); 170 RegisterOperand rOp; 171 switch (type) { 172 case FLOAT_VALUE: 173 rOp = F(r); 174 break; 175 case DOUBLE_VALUE: 176 rOp = D(r); 177 break; 178 default: 179 rOp = new RegisterOperand(r, PRIMITIVE_TYPE_FOR_WORD); 180 break; 181 } 182 StackLocationOperand spill = new StackLocationOperand(true, -location, size); 183 s.insertBefore(MIR_Move.create(move, spill, rOp)); 184 } 185 186 @Override 187 public void insertUnspillBefore(Instruction s, Register r, byte type, int location) { 188 Operator move = getMoveOperator(type); 189 byte size = getSizeOfType(type); 190 RegisterOperand rOp; 191 switch (type) { 192 case FLOAT_VALUE: 193 rOp = F(r); 194 break; 195 case DOUBLE_VALUE: 196 rOp = D(r); 197 break; 198 default: 199 rOp = new RegisterOperand(r, PRIMITIVE_TYPE_FOR_WORD); 200 break; 201 } 202 StackLocationOperand spill = new StackLocationOperand(true, -location, size); 203 s.insertBefore(MIR_Move.create(move, rOp, spill)); 204 } 205 206 @Override 207 public void computeNonVolatileArea() { 208 GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet(); 209 210 if (ir.compiledMethod.isSaveVolatile()) { 211 // Record that we use every nonvolatile GPR 212 int numGprNv = PhysicalRegisterSet.getNumberOfNonvolatileGPRs(); 213 ir.compiledMethod.setNumberOfNonvolatileGPRs((short) numGprNv); 214 215 // set the frame size 216 frameSize += numGprNv * WORDSIZE; 217 frameSize = align(frameSize, STACKFRAME_ALIGNMENT); 218 219 // TODO!! 220 ir.compiledMethod.setNumberOfNonvolatileFPRs((short) 0); 221 222 // Record that we need a stack frame. 223 setFrameRequired(); 224 225 if (SSE2_FULL) { 226 for (int i = 0; i < 8; i++) { 227 fsaveLocation = allocateNewSpillLocation(DOUBLE_REG); 228 } 229 } else { 230 // Grab 108 bytes (same as 27 4-byte spills) in the stack 231 // frame, as a place to store the floating-point state with FSAVE 232 for (int i = 0; i < 27; i++) { 233 fsaveLocation = allocateNewSpillLocation(INT_REG); 234 } 235 } 236 237 // Map each volatile register to a spill location. 238 int i = 0; 239 for (Enumeration<Register> e = phys.enumerateVolatileGPRs(); e.hasMoreElements(); i++) { 240 e.nextElement(); 241 // Note that as a side effect, the following call bumps up the 242 // frame size. 243 saveVolatileGPRLocation[i] = allocateNewSpillLocation(INT_REG); 244 } 245 246 // Map each non-volatile register to a spill location. 247 i = 0; 248 for (Enumeration<Register> e = phys.enumerateNonvolatileGPRs(); e.hasMoreElements(); i++) { 249 e.nextElement(); 250 // Note that as a side effect, the following call bumps up the 251 // frame size. 252 nonVolatileGPRLocation[i] = allocateNewSpillLocation(INT_REG); 253 } 254 255 // Set the offset to find non-volatiles. 256 int gprOffset = getNonvolatileGPROffset(0); 257 ir.compiledMethod.setUnsignedNonVolatileOffset(gprOffset); 258 259 } else { 260 // Count the number of nonvolatiles used. 261 int numGprNv = 0; 262 int i = 0; 263 for (Enumeration<Register> e = phys.enumerateNonvolatileGPRs(); e.hasMoreElements();) { 264 Register r = e.nextElement(); 265 if (r.isTouched()) { 266 // Note that as a side effect, the following call bumps up the 267 // frame size. 268 nonVolatileGPRLocation[i++] = allocateNewSpillLocation(INT_REG); 269 numGprNv++; 270 } 271 } 272 // Update the OptCompiledMethod object. 273 ir.compiledMethod.setNumberOfNonvolatileGPRs((short) numGprNv); 274 if (numGprNv > 0) { 275 int gprOffset = getNonvolatileGPROffset(0); 276 ir.compiledMethod.setUnsignedNonVolatileOffset(gprOffset); 277 // record that we need a stack frame 278 setFrameRequired(); 279 } else { 280 ir.compiledMethod.setUnsignedNonVolatileOffset(0); 281 } 282 283 ir.compiledMethod.setNumberOfNonvolatileFPRs((short) 0); 284 285 } 286 } 287 288 @Override 289 public void cleanUpAndInsertEpilogue() { 290 291 Instruction inst = ir.firstInstructionInCodeOrder().nextInstructionInCodeOrder(); 292 for (; inst != null; inst = inst.nextInstructionInCodeOrder()) { 293 switch (inst.getOpcode()) { 294 case IA32_MOV_opcode: 295 // remove frivolous moves 296 Operand result = MIR_Move.getResult(inst); 297 Operand val = MIR_Move.getValue(inst); 298 if (result.similar(val)) { 299 inst = inst.remove(); 300 } 301 break; 302 case IA32_FMOV_opcode: 303 case IA32_MOVSS_opcode: 304 case IA32_MOVSD_opcode: 305 // remove frivolous moves 306 result = MIR_Move.getResult(inst); 307 val = MIR_Move.getValue(inst); 308 if (result.similar(val)) { 309 inst = inst.remove(); 310 } 311 break; 312 case IA32_RET_opcode: 313 if (frameIsRequired()) { 314 insertEpilogue(inst); 315 } 316 default: 317 break; 318 } 319 } 320 // now that the frame size is fixed, fix up the spill location code 321 rewriteStackLocations(); 322 } 323 324 /** 325 * Insert an explicit stack overflow check in the prologue <em>after</em> 326 * buying the stack frame.<p> 327 * 328 * SIDE EFFECT: mutates the plg into a trap instruction. We need to 329 * mutate so that the trap instruction is in the GC map data structures. 330 * 331 * @param plg the prologue instruction 332 */ 333 private void insertNormalStackOverflowCheck(Instruction plg) { 334 if (!ir.method.isInterruptible()) { 335 plg.remove(); 336 return; 337 } 338 339 if (ir.compiledMethod.isSaveVolatile()) { 340 return; 341 } 342 343 PhysicalRegisterSet phys = (PhysicalRegisterSet)ir.regpool.getPhysicalRegisterSet(); 344 Register ESP = phys.getESP(); 345 MemoryOperand M = 346 MemoryOperand.BD(ir.regpool.makeTROp(), 347 Entrypoints.stackLimitField.getOffset(), 348 (byte) WORDSIZE, 349 null, 350 null); 351 352 // Trap if ESP <= active Thread Stack Limit 353 MIR_TrapIf.mutate(plg, 354 IA32_TRAPIF, 355 null, 356 new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD), 357 M, 358 IA32ConditionOperand.LE(), 359 TrapCodeOperand.StackOverflow()); 360 } 361 362 /** 363 * Insert an explicit stack overflow check in the prologue <em>before</em> 364 * buying the stack frame. 365 * SIDE EFFECT: mutates the plg into a trap instruction. We need to 366 * mutate so that the trap instruction is in the GC map data structures. 367 * 368 * @param plg the prologue instruction 369 */ 370 private void insertBigFrameStackOverflowCheck(Instruction plg) { 371 if (!ir.method.isInterruptible()) { 372 plg.remove(); 373 return; 374 } 375 376 if (ir.compiledMethod.isSaveVolatile()) { 377 return; 378 } 379 380 PhysicalRegisterSet phys = (PhysicalRegisterSet)ir.regpool.getPhysicalRegisterSet(); 381 Register ESP = phys.getESP(); 382 Register ECX = phys.getECX(); 383 384 // ECX := active Thread Stack Limit 385 MemoryOperand M = 386 MemoryOperand.BD(ir.regpool.makeTROp(), 387 Entrypoints.stackLimitField.getOffset(), 388 (byte) WORDSIZE, 389 null, 390 null); 391 plg.insertBefore(MIR_Move.create(IA32_MOV, new RegisterOperand((ECX), PRIMITIVE_TYPE_FOR_WORD), M)); 392 393 // ECX += frame Size 394 int frameSize = getFrameFixedSize(); 395 plg.insertBefore(MIR_BinaryAcc.create(IA32_ADD, new RegisterOperand(ECX, PRIMITIVE_TYPE_FOR_WORD), VM.BuildFor32Addr ? IC(frameSize) : LC(frameSize))); 396 // Trap if ESP <= ECX 397 MIR_TrapIf.mutate(plg, 398 IA32_TRAPIF, 399 null, 400 new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD), 401 new RegisterOperand(ECX, PRIMITIVE_TYPE_FOR_WORD), 402 IA32ConditionOperand.LE(), 403 TrapCodeOperand.StackOverflow()); 404 } 405 406 /** 407 * Insert the prologue for a normal method. 408 * 409 * Assume we are inserting the prologue for method B called from method 410 * A. 411 * <ul> 412 * <li> Perform a stack overflow check. 413 * <li> Store a back pointer to A's frame 414 * <li> Store B's compiled method id 415 * <li> Adjust frame pointer to point to B's frame 416 * <li> Save any used non-volatile registers 417 * </ul> 418 */ 419 @Override 420 public void insertNormalPrologue() { 421 PhysicalRegisterSet phys = (PhysicalRegisterSet)ir.regpool.getPhysicalRegisterSet(); 422 Register ESP = phys.getESP(); 423 MemoryOperand fpHome = 424 MemoryOperand.BD(ir.regpool.makeTROp(), 425 ArchEntrypoints.framePointerField.getOffset(), 426 (byte) WORDSIZE, 427 null, 428 null); 429 430 // the prologue instruction 431 Instruction plg = ir.firstInstructionInCodeOrder().nextInstructionInCodeOrder(); 432 // inst is the instruction immediately after the IR_PROLOGUE 433 // instruction 434 Instruction inst = plg.nextInstructionInCodeOrder(); 435 436 int frameFixedSize = getFrameFixedSize(); 437 ir.compiledMethod.setFrameFixedSize(frameFixedSize); 438 439 // I. Buy a stackframe (including overflow check) 440 // NOTE: We play a little game here. If the frame we are buying is 441 // very small (less than 256) then we can be sloppy with the 442 // stackoverflow check and actually allocate the frame in the guard 443 // region. We'll notice when this frame calls someone and take the 444 // stackoverflow in the callee. We can't do this if the frame is too big, 445 // because growing the stack in the callee and/or handling a hardware trap 446 // in this frame will require most of the guard region to complete. 447 // See libvm.C. 448 if (frameFixedSize >= 256) { 449 // 1. Insert Stack overflow check. 450 insertBigFrameStackOverflowCheck(plg); 451 452 // 2. Save caller's frame pointer 453 inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, fpHome)); 454 455 // 3. Set my frame pointer to current value of stackpointer 456 inst.insertBefore(MIR_Move.create(IA32_MOV, fpHome.copy(), new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD))); 457 458 // 4. Store my compiled method id 459 int cmid = ir.compiledMethod.getId(); 460 inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, VM.BuildFor32Addr ? IC(cmid) : LC(cmid))); 461 } else { 462 // 1. Save caller's frame pointer 463 inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, fpHome)); 464 465 // 2. Set my frame pointer to current value of stackpointer 466 inst.insertBefore(MIR_Move.create(IA32_MOV, fpHome.copy(), new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD))); 467 468 // 3. Store my compiled method id 469 int cmid = ir.compiledMethod.getId(); 470 inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, VM.BuildFor32Addr ? IC(cmid) : LC(cmid))); 471 472 // 4. Insert Stack overflow check. 473 insertNormalStackOverflowCheck(plg); 474 } 475 476 // II. Save any used volatile and non-volatile registers 477 if (ir.compiledMethod.isSaveVolatile()) { 478 saveVolatiles(inst); 479 saveFloatingPointState(inst); 480 } 481 saveNonVolatiles(inst); 482 } 483 484 /** 485 * Insert code into the prologue to save any used non-volatile 486 * registers. 487 * 488 * @param inst the first instruction after the prologue. 489 */ 490 private void saveNonVolatiles(Instruction inst) { 491 GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet(); 492 int nNonvolatileGPRS = ir.compiledMethod.getNumberOfNonvolatileGPRs(); 493 494 // Save each non-volatile GPR used by this method. 495 int n = nNonvolatileGPRS - 1; 496 for (Enumeration<Register> e = phys.enumerateNonvolatileGPRsBackwards(); e.hasMoreElements() && n >= 0; n--) { 497 Register nv = e.nextElement(); 498 int offset = getNonvolatileGPROffset(n); 499 Operand M = new StackLocationOperand(true, -offset, WORDSIZE); 500 inst.insertBefore(MIR_Move.create(IA32_MOV, M, new RegisterOperand(nv, PRIMITIVE_TYPE_FOR_WORD))); 501 } 502 } 503 504 /** 505 * Insert code before a return instruction to restore the nonvolatile 506 * registers. 507 * 508 * @param inst the return instruction 509 */ 510 private void restoreNonVolatiles(Instruction inst) { 511 GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet(); 512 int nNonvolatileGPRS = ir.compiledMethod.getNumberOfNonvolatileGPRs(); 513 514 int n = nNonvolatileGPRS - 1; 515 for (Enumeration<Register> e = phys.enumerateNonvolatileGPRsBackwards(); e.hasMoreElements() && n >= 0; n--) { 516 Register nv = e.nextElement(); 517 int offset = getNonvolatileGPROffset(n); 518 Operand M = new StackLocationOperand(true, -offset, WORDSIZE); 519 inst.insertBefore(MIR_Move.create(IA32_MOV, new RegisterOperand(nv, PRIMITIVE_TYPE_FOR_WORD), M)); 520 } 521 } 522 523 /** 524 * Insert code into the prologue to save the floating point state. 525 * 526 * @param inst the first instruction after the prologue. 527 */ 528 private void saveFloatingPointState(Instruction inst) { 529 530 if (SSE2_FULL) { 531 GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet(); 532 for (int i = 0; i < 8; i++) { 533 inst.insertBefore(MIR_Move.create(IA32_MOVQ, 534 new StackLocationOperand(true, -fsaveLocation + (i * BYTES_IN_DOUBLE), BYTES_IN_DOUBLE), 535 new RegisterOperand(phys.getFPR(i), TypeReference.Double))); 536 } 537 } else { 538 Operand M = new StackLocationOperand(true, -fsaveLocation, 4); 539 inst.insertBefore(MIR_FSave.create(IA32_FNSAVE, M)); 540 } 541 } 542 543 /** 544 * Insert code into the epilogue to restore the floating point state. 545 * 546 * @param inst the return instruction after the epilogue. 547 */ 548 private void restoreFloatingPointState(Instruction inst) { 549 if (SSE2_FULL) { 550 GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet(); 551 for (int i = 0; i < 8; i++) { 552 inst.insertBefore(MIR_Move.create(IA32_MOVQ, 553 new RegisterOperand(phys.getFPR(i), TypeReference.Double), 554 new StackLocationOperand(true, -fsaveLocation + (i * BYTES_IN_DOUBLE), BYTES_IN_DOUBLE))); 555 } 556 } else { 557 Operand M = new StackLocationOperand(true, -fsaveLocation, 4); 558 inst.insertBefore(MIR_FSave.create(IA32_FRSTOR, M)); 559 } 560 } 561 562 /** 563 * Insert code into the prologue to save all volatile 564 * registers. 565 * 566 * @param inst the first instruction after the prologue. 567 */ 568 private void saveVolatiles(Instruction inst) { 569 GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet(); 570 571 // Save each GPR. 572 int i = 0; 573 for (Enumeration<Register> e = phys.enumerateVolatileGPRs(); e.hasMoreElements(); i++) { 574 Register r = e.nextElement(); 575 int location = saveVolatileGPRLocation[i]; 576 Operand M = new StackLocationOperand(true, -location, WORDSIZE); 577 inst.insertBefore(MIR_Move.create(IA32_MOV, M, new RegisterOperand(r, PRIMITIVE_TYPE_FOR_WORD))); 578 } 579 } 580 581 /** 582 * Insert code before a return instruction to restore the volatile 583 * and volatile registers. 584 * 585 * @param inst the return instruction 586 */ 587 private void restoreVolatileRegisters(Instruction inst) { 588 GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet(); 589 590 // Restore every GPR 591 int i = 0; 592 for (Enumeration<Register> e = phys.enumerateVolatileGPRs(); e.hasMoreElements(); i++) { 593 Register r = e.nextElement(); 594 int location = saveVolatileGPRLocation[i]; 595 Operand M = new StackLocationOperand(true, -location, WORDSIZE); 596 inst.insertBefore(MIR_Move.create(IA32_MOV, new RegisterOperand(r, PRIMITIVE_TYPE_FOR_WORD), M)); 597 } 598 } 599 600 /** 601 * Insert the epilogue before a particular return instruction. 602 * 603 * @param ret the return instruction. 604 */ 605 private void insertEpilogue(Instruction ret) { 606 // 1. Restore any saved registers 607 if (ir.compiledMethod.isSaveVolatile()) { 608 restoreVolatileRegisters(ret); 609 restoreFloatingPointState(ret); 610 } 611 restoreNonVolatiles(ret); 612 613 // 2. Restore caller's stackpointer and framepointer 614 int frameSize = getFrameFixedSize(); 615 ret.insertBefore(MIR_UnaryNoRes.create(REQUIRE_ESP, IC(frameSize))); 616 MemoryOperand fpHome = 617 MemoryOperand.BD(ir.regpool.makeTROp(), 618 ArchEntrypoints.framePointerField.getOffset(), 619 (byte) WORDSIZE, 620 null, 621 null); 622 ret.insertBefore(MIR_Nullary.create(IA32_POP, fpHome)); 623 } 624 625 @Override 626 public void replaceOperandWithSpillLocation(Instruction s, RegisterOperand symb) { 627 628 // Get the spill location previously assigned to the symbolic 629 // register. 630 int location = regAllocState.getSpill(symb.getRegister()); 631 632 // Create a memory operand M representing the spill location. 633 int size; 634 if (VM.BuildFor32Addr) { 635 if (SSE2_FULL) { 636 size = symb.getType().getMemoryBytes(); 637 if (size < WORDSIZE) 638 size = WORDSIZE; 639 } else { 640 int type = PhysicalRegisterSet.getPhysicalRegisterType(symb.getRegister()); 641 size = PhysicalRegisterSet.getSpillSize(type); 642 } 643 } else { 644 size = WORDSIZE; 645 } 646 StackLocationOperand M = new StackLocationOperand(true, -location, (byte) size); 647 648 M = new StackLocationOperand(true, -location, (byte) size); 649 650 // replace the register operand with the memory operand 651 s.replaceOperand(symb, M); 652 } 653 654 private boolean hasSymbolicRegister(MemoryOperand M) { 655 if (M.base != null && !M.base.getRegister().isPhysical()) return true; 656 if (M.index != null && !M.index.getRegister().isPhysical()) return true; 657 return false; 658 } 659 660 /** 661 * @param s the instruction to check 662 * @return {@code true} if and only if the instruction is a MOVE instruction 663 * that can be generated without resorting to scratch registers 664 */ 665 private boolean isScratchFreeMove(Instruction s) { 666 if (s.operator() != IA32_MOV) return false; 667 668 // if we don't allow ESP to float, we will always use scratch 669 // registers in these move instructions. 670 if (!FLOAT_ESP) return false; 671 672 Operand result = MIR_Move.getResult(s); 673 Operand value = MIR_Move.getValue(s); 674 675 // TODO Remove duplication and check if code and documentation 676 // are correct. 677 678 // We need scratch registers for spilled registers that appear in 679 // memory operands. 680 if (result.isMemory()) { 681 MemoryOperand M = result.asMemory(); 682 if (hasSymbolicRegister(M)) return false; 683 // We will perform this transformation by changing the MOV to a PUSH 684 // or POP. Note that IA32 cannot PUSH/POP >WORDSIZE quantities, so 685 // disable the transformation for that case. Also, (TODO), our 686 // assembler does not emit the prefix to allow 16-bit push/pops, so 687 // disable these too. What's left? WORDSIZE only. 688 if (M.size != WORDSIZE) return false; 689 } 690 if (value.isMemory()) { 691 MemoryOperand M = value.asMemory(); 692 if (hasSymbolicRegister(M)) return false; 693 // We will perform this transformation by changing the MOV to a PUSH 694 // or POP. Note that IA32 cannot PUSH/POP >WORDSIZE quantities, so 695 // disable the transformation for that case. Also, (TODO), our 696 // assembler does not emit the prefix to allow 16-bit push/pops, so 697 // disable these too. What's left? WORDSIZE only. 698 if (M.size != WORDSIZE) return false; 699 } 700 // If we get here, all is kosher. 701 return true; 702 } 703 704 @Override 705 public boolean needScratch(Register r, Instruction s) { 706 // We never need a scratch register for a floating point value in an 707 // FMOV instruction. 708 if (r.isFloatingPoint() && s.operator() == IA32_FMOV) return false; 709 710 // never need a scratch register for a YIELDPOINT_OSR 711 if (s.operator() == YIELDPOINT_OSR) return false; 712 713 // Some MOVEs never need scratch registers 714 if (isScratchFreeMove(s)) return false; 715 716 // If s already has a memory operand, it is illegal to introduce 717 // another. 718 if (s.hasMemoryOperand()) return true; 719 720 // If r appears more than once in the instruction, we can't 721 // use a memory operand for all occurrences, so we will need a scratch 722 int count = 0; 723 for (Enumeration<Operand> ops = s.getOperands(); ops.hasMoreElements();) { 724 Operand op = ops.nextElement(); 725 if (op.isRegister()) { 726 RegisterOperand rop = op.asRegister(); 727 if (rop.getRegister() == r) { 728 count++; 729 } 730 // If the register in question is an int register (i.e. 32 bit) 731 // and the VM is build for x64, we mustn't introduce a memory 732 // operand for the register. All spill locations are 64-bit, 733 // so the 32-bit operation would be converted to a quad operation 734 // because of the memory operand. This would change the semantics 735 // of the operation (e.g. for CMP or ADD). 736 // Moreover, we can't use a 32-bit memory operation because it is 737 // not guaranteed that a x64 memory address would fit into 32 bits. 738 // FIXME need to review this decision before finishing x64 opt work 739 if (VM.BuildFor64Addr && rop.isInt() && rop.getRegister() == r) return true; 740 } 741 } 742 if (count > 1) return true; 743 744 // Check the architecture restrictions. 745 if (RegisterRestrictions.mustBeInRegister(r, s)) return true; 746 747 // Otherwise, everything is OK. 748 return false; 749 } 750 751 /** 752 * Before instruction s, insert code to adjust ESP so that it lies at a 753 * particular offset from its usual location. 754 * 755 * @param s the instruction before which ESP must have the desired offset 756 * @param desiredOffset the desired offset 757 */ 758 private void moveESPBefore(Instruction s, int desiredOffset) { 759 PhysicalRegisterSet phys = (PhysicalRegisterSet)ir.regpool.getPhysicalRegisterSet(); 760 Register ESP = phys.getESP(); 761 int delta = desiredOffset - ESPOffset; 762 if (delta != 0) { 763 if (canModifyEFLAGS(s)) { 764 s.insertBefore(MIR_BinaryAcc.create(IA32_ADD, new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD), VM.BuildFor32Addr ? IC(delta) : LC(delta))); 765 } else { 766 MemoryOperand M = 767 MemoryOperand.BD(new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD), 768 Offset.fromIntSignExtend(delta), 769 (byte) WORDSIZE, 770 null, 771 null); 772 s.insertBefore(MIR_Lea.create(IA32_LEA, new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD), M)); 773 } 774 ESPOffset = desiredOffset; 775 } 776 } 777 778 private boolean canModifyEFLAGS(Instruction s) { 779 if (PhysicalDefUse.usesEFLAGS(s.operator())) { 780 return false; 781 } 782 if (PhysicalDefUse.definesEFLAGS(s.operator())) { 783 return true; 784 } 785 if (s.operator() == BBEND) return true; 786 return canModifyEFLAGS(s.nextInstructionInCodeOrder()); 787 } 788 789 /** 790 * Attempt to rewrite a move instruction to a NOP. 791 * 792 * @param s the instruction to rewrite 793 * @return {@code true} if and only if the transformation applies 794 */ 795 private boolean mutateMoveToNop(Instruction s) { 796 Operand result = MIR_Move.getResult(s); 797 Operand val = MIR_Move.getValue(s); 798 if (result.isStackLocation() && val.isStackLocation()) { 799 if (result.similar(val)) { 800 Empty.mutate(s, NOP); 801 return true; 802 } 803 } 804 return false; 805 } 806 807 /** 808 * Rewrites a move instruction if it has 2 memory operands. 809 * One of the 2 memory operands must be a stack location operand. Move 810 * the SP to the appropriate location and use a push or pop instruction. 811 * 812 * @param s the instruction to rewrite 813 */ 814 private void rewriteMoveInstruction(Instruction s) { 815 // first attempt to mutate the move into a noop 816 if (mutateMoveToNop(s)) return; 817 818 Operand result = MIR_Move.getResult(s); 819 Operand val = MIR_Move.getValue(s); 820 if (result instanceof StackLocationOperand) { 821 if (val instanceof MemoryOperand || val instanceof StackLocationOperand) { 822 int offset = ((StackLocationOperand) result).getOffset(); 823 byte size = ((StackLocationOperand) result).getSize(); 824 offset = FPOffset2SPOffset(offset) + size; 825 moveESPBefore(s, offset); 826 MIR_UnaryNoRes.mutate(s, IA32_PUSH, val); 827 } 828 } else { 829 if (result instanceof MemoryOperand) { 830 if (val instanceof StackLocationOperand) { 831 int offset = ((StackLocationOperand) val).getOffset(); 832 offset = FPOffset2SPOffset(offset); 833 moveESPBefore(s, offset); 834 MIR_Nullary.mutate(s, IA32_POP, result); 835 } 836 } 837 } 838 } 839 840 /** 841 * Walks through the IR. For each StackLocationOperand, replace the 842 * operand with the appropriate MemoryOperand. 843 */ 844 private void rewriteStackLocations() { 845 // ESP is initially WORDSIZE above where the framepointer is going to be. 846 ESPOffset = getFrameFixedSize() + WORDSIZE; 847 Register ESP = ((PhysicalRegisterSet)ir.regpool.getPhysicalRegisterSet()).getESP(); 848 849 boolean seenReturn = false; 850 for (Enumeration<Instruction> e = ir.forwardInstrEnumerator(); e.hasMoreElements();) { 851 Instruction s = e.nextElement(); 852 853 if (s.isReturn()) { 854 seenReturn = true; 855 continue; 856 } 857 858 if (s.isBranch()) { 859 // restore ESP to home location at end of basic block. 860 moveESPBefore(s, 0); 861 continue; 862 } 863 864 if (s.operator() == BBEND) { 865 if (seenReturn) { 866 // at a return ESP will be at FrameFixedSize, 867 seenReturn = false; 868 ESPOffset = 0; 869 } else { 870 moveESPBefore(s, 0); 871 } 872 continue; 873 } 874 875 if (s.operator() == ADVISE_ESP) { 876 ESPOffset = MIR_UnaryNoRes.getVal(s).asIntConstant().value; 877 continue; 878 } 879 880 if (s.operator() == REQUIRE_ESP) { 881 // ESP is required to be at the given offset from the bottom of the frame 882 moveESPBefore(s, MIR_UnaryNoRes.getVal(s).asIntConstant().value); 883 continue; 884 } 885 886 if (s.operator() == YIELDPOINT_PROLOGUE || 887 s.operator() == YIELDPOINT_BACKEDGE || 888 s.operator() == YIELDPOINT_EPILOGUE) { 889 moveESPBefore(s, 0); 890 continue; 891 } 892 893 if (s.operator() == IA32_MOV) { 894 rewriteMoveInstruction(s); 895 } 896 897 // pop computes the effective address of its operand after ESP 898 // is incremented. Therefore update ESPOffset before rewriting 899 // stacklocation and memory operands. 900 if (s.operator() == IA32_POP) { 901 ESPOffset += WORDSIZE; 902 } 903 904 for (Enumeration<Operand> ops = s.getOperands(); ops.hasMoreElements();) { 905 Operand op = ops.nextElement(); 906 if (op instanceof StackLocationOperand) { 907 StackLocationOperand sop = (StackLocationOperand) op; 908 int offset = sop.getOffset(); 909 if (sop.isFromTop()) { 910 offset = FPOffset2SPOffset(offset); 911 } 912 offset -= ESPOffset; 913 byte size = sop.getSize(); 914 MemoryOperand M = 915 MemoryOperand.BD(new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD), 916 Offset.fromIntSignExtend(offset), 917 size, 918 null, 919 null); 920 s.replaceOperand(op, M); 921 } else if (op instanceof MemoryOperand) { 922 MemoryOperand M = op.asMemory(); 923 if ((M.base != null && M.base.getRegister() == ESP) || (M.index != null && M.index.getRegister() == ESP)) { 924 M.disp = M.disp.minus(ESPOffset); 925 } 926 } 927 } 928 929 // push computes the effective address of its operand after ESP 930 // is decremented. Therefore update ESPOffset after rewriting 931 // stacklocation and memory operands. 932 if (s.operator() == IA32_PUSH) { 933 ESPOffset -= WORDSIZE; 934 } 935 } 936 } 937 938 /** 939 * PRECONDITION: The final frameSize is calculated before calling this 940 * routine. 941 * 942 * @param fpOffset offset in bytes from the top of the stack frame 943 * @return offset in bytes from the stack pointer. 944 */ 945 private int FPOffset2SPOffset(int fpOffset) { 946 // Note that SP = FP - frameSize + WORDSIZE; 947 // So, FP + fpOffset = SP + frameSize - WORDSIZE 948 // + fpOffset 949 return frameSize + fpOffset - WORDSIZE; 950 } 951 952 @Override 953 public void restoreScratchRegistersBefore(Instruction s) { 954 for (Iterator<ScratchRegister> i = scratchInUse.iterator(); i.hasNext();) { 955 ScratchRegister scratch = i.next(); 956 957 if (scratch.currentContents == null) continue; 958 if (VERBOSE_DEBUG) { 959 System.out.println("RESTORE: consider " + scratch); 960 } 961 boolean removed = false; 962 boolean unloaded = false; 963 if (definedIn(scratch.scratch, s) || 964 (s.isCall() && !s.operator().isCallSaveVolatile() && scratch.scratch.isVolatile()) || 965 (s.operator() == IA32_FNINIT && scratch.scratch.isFloatingPoint()) || 966 (s.operator() == IA32_FCLEAR && scratch.scratch.isFloatingPoint())) { 967 // s defines the scratch register, so save its contents before they 968 // are killed. 969 if (VERBOSE_DEBUG) { 970 System.out.println("RESTORE : unload because defined " + scratch); 971 } 972 unloadScratchRegisterBefore(s, scratch); 973 974 // update mapping information 975 if (VERBOSE_DEBUG) { 976 System.out.println("RSRB: End scratch interval " + scratch.scratch + " " + s); 977 } 978 scratchMap.endScratchInterval(scratch.scratch, s); 979 Register scratchContents = scratch.currentContents; 980 if (scratchContents != null) { 981 if (VERBOSE_DEBUG) { 982 System.out.println("RSRB: End symbolic interval " + scratch.currentContents + " " + s); 983 } 984 scratchMap.endSymbolicInterval(scratch.currentContents, s); 985 } 986 987 i.remove(); 988 removed = true; 989 unloaded = true; 990 } 991 992 if (usedIn(scratch.scratch, s) || 993 !isLegal(scratch.currentContents, scratch.scratch, s) || 994 (s.operator() == IA32_FCLEAR && scratch.scratch.isFloatingPoint())) { 995 // first spill the currents contents of the scratch register to 996 // memory 997 if (!unloaded) { 998 if (VERBOSE_DEBUG) { 999 System.out.println("RESTORE : unload because used " + scratch); 1000 } 1001 unloadScratchRegisterBefore(s, scratch); 1002 1003 // update mapping information 1004 if (VERBOSE_DEBUG) { 1005 System.out.println("RSRB2: End scratch interval " + scratch.scratch + " " + s); 1006 } 1007 scratchMap.endScratchInterval(scratch.scratch, s); 1008 Register scratchContents = scratch.currentContents; 1009 if (scratchContents != null) { 1010 if (VERBOSE_DEBUG) { 1011 System.out.println("RSRB2: End symbolic interval " + scratch.currentContents + " " + s); 1012 } 1013 scratchMap.endSymbolicInterval(scratch.currentContents, s); 1014 } 1015 1016 } 1017 // s or some future instruction uses the scratch register, 1018 // so restore the correct contents. 1019 if (VERBOSE_DEBUG) { 1020 System.out.println("RESTORE : reload because used " + scratch); 1021 } 1022 reloadScratchRegisterBefore(s, scratch); 1023 1024 if (!removed) { 1025 i.remove(); 1026 removed = true; 1027 } 1028 } 1029 } 1030 } 1031 1032 /** 1033 * Initialize some architecture-specific state needed for register 1034 * allocation. 1035 * 1036 * @param ir the IR that's being processed 1037 */ 1038 @Override 1039 public void initForArch(IR ir) { 1040 PhysicalRegisterSet phys = (PhysicalRegisterSet)ir.regpool.getPhysicalRegisterSet(); 1041 1042 // We reserve the last (bottom) slot in the FPR stack as a scratch register. 1043 // This allows us to do one push/pop sequence in order to use the 1044 // top of the stack as a scratch location 1045 phys.getFPR(7).reserveRegister(); 1046 } 1047 1048 @Override 1049 public boolean isSysCall(Instruction s) { 1050 return s.operator() == IA32_SYSCALL; 1051 } 1052}