001/*
002 *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003 *
004 *  This file is licensed to You under the Eclipse Public License (EPL);
005 *  You may not use this file except in compliance with the License. You
006 *  may obtain a copy of the License at
007 *
008 *      http://www.opensource.org/licenses/eclipse-1.0.php
009 *
010 *  See the COPYRIGHT.txt file distributed with this work for information
011 *  regarding copyright ownership.
012 */
013package org.jikesrvm.compilers.opt.regalloc.ia32;
014
015import static org.jikesrvm.compilers.opt.driver.OptConstants.PRIMITIVE_TYPE_FOR_WORD;
016import static org.jikesrvm.compilers.opt.ir.Operators.BBEND;
017import static org.jikesrvm.compilers.opt.ir.Operators.NOP;
018import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_BACKEDGE;
019import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_EPILOGUE;
020import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_OSR;
021import static org.jikesrvm.compilers.opt.ir.Operators.YIELDPOINT_PROLOGUE;
022import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.ADVISE_ESP;
023import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_ADD;
024import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_FCLEAR;
025import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_FMOV;
026import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_FMOV_opcode;
027import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_FNINIT;
028import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_FNSAVE;
029import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_FRSTOR;
030import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_LEA;
031import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOV;
032import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOVQ;
033import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOVSD;
034import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOVSD_opcode;
035import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOVSS;
036import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOVSS_opcode;
037import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_MOV_opcode;
038import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_POP;
039import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_PUSH;
040import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_RET_opcode;
041import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_SYSCALL;
042import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.IA32_TRAPIF;
043import static org.jikesrvm.compilers.opt.ir.ia32.ArchOperators.REQUIRE_ESP;
044import static org.jikesrvm.compilers.opt.regalloc.ia32.PhysicalRegisterConstants.DOUBLE_REG;
045import static org.jikesrvm.compilers.opt.regalloc.ia32.PhysicalRegisterConstants.INT_REG;
046import static org.jikesrvm.ia32.ArchConstants.SSE2_FULL;
047import static org.jikesrvm.ia32.StackframeLayoutConstants.STACKFRAME_ALIGNMENT;
048import static org.jikesrvm.runtime.JavaSizeConstants.BYTES_IN_DOUBLE;
049import static org.jikesrvm.runtime.JavaSizeConstants.BYTES_IN_FLOAT;
050
051import java.util.Enumeration;
052import java.util.Iterator;
053
054import org.jikesrvm.VM;
055import org.jikesrvm.classloader.TypeReference;
056import org.jikesrvm.compilers.opt.OptimizingCompilerException;
057import org.jikesrvm.compilers.opt.ir.Empty;
058import org.jikesrvm.compilers.opt.ir.GenericPhysicalRegisterSet;
059import org.jikesrvm.compilers.opt.ir.IR;
060import org.jikesrvm.compilers.opt.ir.Instruction;
061import org.jikesrvm.compilers.opt.ir.Operator;
062import org.jikesrvm.compilers.opt.ir.Register;
063import org.jikesrvm.compilers.opt.ir.ia32.MIR_BinaryAcc;
064import org.jikesrvm.compilers.opt.ir.ia32.MIR_FSave;
065import org.jikesrvm.compilers.opt.ir.ia32.MIR_Lea;
066import org.jikesrvm.compilers.opt.ir.ia32.MIR_Move;
067import org.jikesrvm.compilers.opt.ir.ia32.MIR_Nullary;
068import org.jikesrvm.compilers.opt.ir.ia32.MIR_TrapIf;
069import org.jikesrvm.compilers.opt.ir.ia32.MIR_UnaryNoRes;
070import org.jikesrvm.compilers.opt.ir.ia32.PhysicalDefUse;
071import org.jikesrvm.compilers.opt.ir.ia32.PhysicalRegisterSet;
072import org.jikesrvm.compilers.opt.ir.operand.MemoryOperand;
073import org.jikesrvm.compilers.opt.ir.operand.Operand;
074import org.jikesrvm.compilers.opt.ir.operand.RegisterOperand;
075import org.jikesrvm.compilers.opt.ir.operand.StackLocationOperand;
076import org.jikesrvm.compilers.opt.ir.operand.TrapCodeOperand;
077import org.jikesrvm.compilers.opt.ir.operand.ia32.IA32ConditionOperand;
078import org.jikesrvm.compilers.opt.regalloc.GenericStackManager;
079import org.jikesrvm.runtime.ArchEntrypoints;
080import org.jikesrvm.runtime.Entrypoints;
081import org.vmmagic.unboxed.Offset;
082
083/**
084 * Class to manage the allocation of the "compiler-specific" portion of
085 * the stackframe.  This class holds only the architecture-specific
086 * functions.
087 */
088public final class StackManager extends GenericStackManager {
089
090  /**
091   * A frame offset for 108 bytes of stack space to store the
092   * floating point state in the SaveVolatile protocol.
093   */
094  private int fsaveLocation;
095
096  /**
097   * We allow the stack pointer to float from its normal position at the
098   * bottom of the frame.  This field holds the 'current' offset of the
099   * SP.
100   */
101  private int ESPOffset = 0;
102
103  /**
104   * Should we allow the stack pointer to float in order to avoid scratch
105   * registers in move instructions.  Note: as of Feb. 02, we think this
106   * is a bad idea.
107   */
108  private static final boolean FLOAT_ESP = false;
109
110  @Override
111  public int getFrameFixedSize() {
112    return frameSize - WORDSIZE;
113  }
114
115  /**
116   * @param type one of INT_VALUE, FLOAT_VALUE, or DOUBLE_VALUE
117   * @return the size of a type of value, in bytes.
118   * NOTE: For the purpose of register allocation, an x87 FLOAT_VALUE is 64 bits!
119   */
120  private static byte getSizeOfType(byte type) {
121    switch (type) {
122      case INT_VALUE:
123        return (byte) (WORDSIZE);
124      case FLOAT_VALUE:
125        if (SSE2_FULL) return (byte) BYTES_IN_FLOAT;
126      case DOUBLE_VALUE:
127        return (byte) BYTES_IN_DOUBLE;
128      default:
129        OptimizingCompilerException.TODO("getSizeOfValue: unsupported");
130        return 0;
131    }
132  }
133
134  /**
135   * @param type one of INT_VALUE, FLOAT_VALUE, or DOUBLE_VALUE
136   * @return the move operator for a type of value.
137   */
138  private static Operator getMoveOperator(byte type) {
139    switch (type) {
140      case INT_VALUE:
141        return IA32_MOV;
142      case DOUBLE_VALUE:
143        if (SSE2_FULL) return IA32_MOVSD;
144      case FLOAT_VALUE:
145        if (SSE2_FULL) return IA32_MOVSS;
146        return IA32_FMOV;
147      default:
148        OptimizingCompilerException.TODO("getMoveOperator: unsupported");
149        return null;
150    }
151  }
152
153  @Override
154  public int allocateNewSpillLocation(int type) {
155
156    // increment by the spill size
157    spillPointer += PhysicalRegisterSet.getSpillSize(type);
158
159    if (spillPointer + WORDSIZE > frameSize) {
160      frameSize = spillPointer + WORDSIZE;
161    }
162    return spillPointer;
163  }
164
165  @Override
166  public void insertSpillBefore(Instruction s, Register r, byte type, int location) {
167
168    Operator move = getMoveOperator(type);
169    byte size = getSizeOfType(type);
170    RegisterOperand rOp;
171    switch (type) {
172      case FLOAT_VALUE:
173        rOp = F(r);
174        break;
175      case DOUBLE_VALUE:
176        rOp = D(r);
177        break;
178      default:
179        rOp = new RegisterOperand(r, PRIMITIVE_TYPE_FOR_WORD);
180        break;
181    }
182    StackLocationOperand spill = new StackLocationOperand(true, -location, size);
183    s.insertBefore(MIR_Move.create(move, spill, rOp));
184  }
185
186  @Override
187  public void insertUnspillBefore(Instruction s, Register r, byte type, int location) {
188    Operator move = getMoveOperator(type);
189    byte size = getSizeOfType(type);
190    RegisterOperand rOp;
191    switch (type) {
192      case FLOAT_VALUE:
193        rOp = F(r);
194        break;
195      case DOUBLE_VALUE:
196        rOp = D(r);
197        break;
198      default:
199        rOp = new RegisterOperand(r, PRIMITIVE_TYPE_FOR_WORD);
200        break;
201    }
202    StackLocationOperand spill = new StackLocationOperand(true, -location, size);
203    s.insertBefore(MIR_Move.create(move, rOp, spill));
204  }
205
206  @Override
207  public void computeNonVolatileArea() {
208    GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
209
210    if (ir.compiledMethod.isSaveVolatile()) {
211      // Record that we use every nonvolatile GPR
212      int numGprNv = PhysicalRegisterSet.getNumberOfNonvolatileGPRs();
213      ir.compiledMethod.setNumberOfNonvolatileGPRs((short) numGprNv);
214
215      // set the frame size
216      frameSize += numGprNv * WORDSIZE;
217      frameSize = align(frameSize, STACKFRAME_ALIGNMENT);
218
219      // TODO!!
220      ir.compiledMethod.setNumberOfNonvolatileFPRs((short) 0);
221
222      // Record that we need a stack frame.
223      setFrameRequired();
224
225      if (SSE2_FULL) {
226        for (int i = 0; i < 8; i++) {
227          fsaveLocation = allocateNewSpillLocation(DOUBLE_REG);
228        }
229      } else {
230        // Grab 108 bytes (same as 27 4-byte spills) in the stack
231        // frame, as a place to store the floating-point state with FSAVE
232        for (int i = 0; i < 27; i++) {
233          fsaveLocation = allocateNewSpillLocation(INT_REG);
234        }
235      }
236
237      // Map each volatile register to a spill location.
238      int i = 0;
239      for (Enumeration<Register> e = phys.enumerateVolatileGPRs(); e.hasMoreElements(); i++) {
240        e.nextElement();
241        // Note that as a side effect, the following call bumps up the
242        // frame size.
243        saveVolatileGPRLocation[i] = allocateNewSpillLocation(INT_REG);
244      }
245
246      // Map each non-volatile register to a spill location.
247      i = 0;
248      for (Enumeration<Register> e = phys.enumerateNonvolatileGPRs(); e.hasMoreElements(); i++) {
249        e.nextElement();
250        // Note that as a side effect, the following call bumps up the
251        // frame size.
252        nonVolatileGPRLocation[i] = allocateNewSpillLocation(INT_REG);
253      }
254
255      // Set the offset to find non-volatiles.
256      int gprOffset = getNonvolatileGPROffset(0);
257      ir.compiledMethod.setUnsignedNonVolatileOffset(gprOffset);
258
259    } else {
260      // Count the number of nonvolatiles used.
261      int numGprNv = 0;
262      int i = 0;
263      for (Enumeration<Register> e = phys.enumerateNonvolatileGPRs(); e.hasMoreElements();) {
264        Register r = e.nextElement();
265        if (r.isTouched()) {
266          // Note that as a side effect, the following call bumps up the
267          // frame size.
268          nonVolatileGPRLocation[i++] = allocateNewSpillLocation(INT_REG);
269          numGprNv++;
270        }
271      }
272      // Update the OptCompiledMethod object.
273      ir.compiledMethod.setNumberOfNonvolatileGPRs((short) numGprNv);
274      if (numGprNv > 0) {
275        int gprOffset = getNonvolatileGPROffset(0);
276        ir.compiledMethod.setUnsignedNonVolatileOffset(gprOffset);
277        // record that we need a stack frame
278        setFrameRequired();
279      } else {
280        ir.compiledMethod.setUnsignedNonVolatileOffset(0);
281      }
282
283      ir.compiledMethod.setNumberOfNonvolatileFPRs((short) 0);
284
285    }
286  }
287
288  @Override
289  public void cleanUpAndInsertEpilogue() {
290
291    Instruction inst = ir.firstInstructionInCodeOrder().nextInstructionInCodeOrder();
292    for (; inst != null; inst = inst.nextInstructionInCodeOrder()) {
293      switch (inst.getOpcode()) {
294        case IA32_MOV_opcode:
295          // remove frivolous moves
296          Operand result = MIR_Move.getResult(inst);
297          Operand val = MIR_Move.getValue(inst);
298          if (result.similar(val)) {
299            inst = inst.remove();
300          }
301          break;
302        case IA32_FMOV_opcode:
303        case IA32_MOVSS_opcode:
304        case IA32_MOVSD_opcode:
305          // remove frivolous moves
306          result = MIR_Move.getResult(inst);
307          val = MIR_Move.getValue(inst);
308          if (result.similar(val)) {
309            inst = inst.remove();
310          }
311          break;
312        case IA32_RET_opcode:
313          if (frameIsRequired()) {
314            insertEpilogue(inst);
315          }
316        default:
317          break;
318      }
319    }
320    // now that the frame size is fixed, fix up the spill location code
321    rewriteStackLocations();
322  }
323
324  /**
325   * Insert an explicit stack overflow check in the prologue <em>after</em>
326   * buying the stack frame.<p>
327   *
328   * SIDE EFFECT: mutates the plg into a trap instruction.  We need to
329   * mutate so that the trap instruction is in the GC map data structures.
330   *
331   * @param plg the prologue instruction
332   */
333  private void insertNormalStackOverflowCheck(Instruction plg) {
334    if (!ir.method.isInterruptible()) {
335      plg.remove();
336      return;
337    }
338
339    if (ir.compiledMethod.isSaveVolatile()) {
340      return;
341    }
342
343    PhysicalRegisterSet phys = (PhysicalRegisterSet)ir.regpool.getPhysicalRegisterSet();
344    Register ESP = phys.getESP();
345    MemoryOperand M =
346        MemoryOperand.BD(ir.regpool.makeTROp(),
347                             Entrypoints.stackLimitField.getOffset(),
348                             (byte) WORDSIZE,
349                             null,
350                             null);
351
352    //    Trap if ESP <= active Thread Stack Limit
353    MIR_TrapIf.mutate(plg,
354                      IA32_TRAPIF,
355                      null,
356                      new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD),
357                      M,
358                      IA32ConditionOperand.LE(),
359                      TrapCodeOperand.StackOverflow());
360  }
361
362  /**
363   * Insert an explicit stack overflow check in the prologue <em>before</em>
364   * buying the stack frame.
365   * SIDE EFFECT: mutates the plg into a trap instruction.  We need to
366   * mutate so that the trap instruction is in the GC map data structures.
367   *
368   * @param plg the prologue instruction
369   */
370  private void insertBigFrameStackOverflowCheck(Instruction plg) {
371    if (!ir.method.isInterruptible()) {
372      plg.remove();
373      return;
374    }
375
376    if (ir.compiledMethod.isSaveVolatile()) {
377      return;
378    }
379
380    PhysicalRegisterSet phys = (PhysicalRegisterSet)ir.regpool.getPhysicalRegisterSet();
381    Register ESP = phys.getESP();
382    Register ECX = phys.getECX();
383
384    //    ECX := active Thread Stack Limit
385    MemoryOperand M =
386        MemoryOperand.BD(ir.regpool.makeTROp(),
387                             Entrypoints.stackLimitField.getOffset(),
388                             (byte) WORDSIZE,
389                             null,
390                             null);
391    plg.insertBefore(MIR_Move.create(IA32_MOV, new RegisterOperand((ECX), PRIMITIVE_TYPE_FOR_WORD), M));
392
393    //    ECX += frame Size
394    int frameSize = getFrameFixedSize();
395    plg.insertBefore(MIR_BinaryAcc.create(IA32_ADD, new RegisterOperand(ECX, PRIMITIVE_TYPE_FOR_WORD), VM.BuildFor32Addr ? IC(frameSize) : LC(frameSize)));
396    //    Trap if ESP <= ECX
397    MIR_TrapIf.mutate(plg,
398                      IA32_TRAPIF,
399                      null,
400                      new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD),
401                      new RegisterOperand(ECX, PRIMITIVE_TYPE_FOR_WORD),
402                      IA32ConditionOperand.LE(),
403                      TrapCodeOperand.StackOverflow());
404  }
405
406  /**
407   * Insert the prologue for a normal method.
408   *
409   * Assume we are inserting the prologue for method B called from method
410   * A.
411   *    <ul>
412   *    <li> Perform a stack overflow check.
413   *    <li> Store a back pointer to A's frame
414   *    <li> Store B's compiled method id
415   *    <li> Adjust frame pointer to point to B's frame
416   *    <li> Save any used non-volatile registers
417   *    </ul>
418   */
419  @Override
420  public void insertNormalPrologue() {
421    PhysicalRegisterSet phys = (PhysicalRegisterSet)ir.regpool.getPhysicalRegisterSet();
422    Register ESP = phys.getESP();
423    MemoryOperand fpHome =
424        MemoryOperand.BD(ir.regpool.makeTROp(),
425                             ArchEntrypoints.framePointerField.getOffset(),
426                             (byte) WORDSIZE,
427                             null,
428                             null);
429
430    // the prologue instruction
431    Instruction plg = ir.firstInstructionInCodeOrder().nextInstructionInCodeOrder();
432    // inst is the instruction immediately after the IR_PROLOGUE
433    // instruction
434    Instruction inst = plg.nextInstructionInCodeOrder();
435
436    int frameFixedSize = getFrameFixedSize();
437    ir.compiledMethod.setFrameFixedSize(frameFixedSize);
438
439    // I. Buy a stackframe (including overflow check)
440    // NOTE: We play a little game here.  If the frame we are buying is
441    //       very small (less than 256) then we can be sloppy with the
442    //       stackoverflow check and actually allocate the frame in the guard
443    //       region.  We'll notice when this frame calls someone and take the
444    //       stackoverflow in the callee. We can't do this if the frame is too big,
445    //       because growing the stack in the callee and/or handling a hardware trap
446    //       in this frame will require most of the guard region to complete.
447    //       See libvm.C.
448    if (frameFixedSize >= 256) {
449      // 1. Insert Stack overflow check.
450      insertBigFrameStackOverflowCheck(plg);
451
452      // 2. Save caller's frame pointer
453      inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, fpHome));
454
455      // 3. Set my frame pointer to current value of stackpointer
456      inst.insertBefore(MIR_Move.create(IA32_MOV, fpHome.copy(), new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD)));
457
458      // 4. Store my compiled method id
459      int cmid = ir.compiledMethod.getId();
460      inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, VM.BuildFor32Addr ? IC(cmid) : LC(cmid)));
461    } else {
462      // 1. Save caller's frame pointer
463      inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, fpHome));
464
465      // 2. Set my frame pointer to current value of stackpointer
466      inst.insertBefore(MIR_Move.create(IA32_MOV, fpHome.copy(), new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD)));
467
468      // 3. Store my compiled method id
469      int cmid = ir.compiledMethod.getId();
470      inst.insertBefore(MIR_UnaryNoRes.create(IA32_PUSH, VM.BuildFor32Addr ? IC(cmid) : LC(cmid)));
471
472      // 4. Insert Stack overflow check.
473      insertNormalStackOverflowCheck(plg);
474    }
475
476    // II. Save any used volatile and non-volatile registers
477    if (ir.compiledMethod.isSaveVolatile()) {
478      saveVolatiles(inst);
479      saveFloatingPointState(inst);
480    }
481    saveNonVolatiles(inst);
482  }
483
484  /**
485   * Insert code into the prologue to save any used non-volatile
486   * registers.
487   *
488   * @param inst the first instruction after the prologue.
489   */
490  private void saveNonVolatiles(Instruction inst) {
491    GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
492    int nNonvolatileGPRS = ir.compiledMethod.getNumberOfNonvolatileGPRs();
493
494    // Save each non-volatile GPR used by this method.
495    int n = nNonvolatileGPRS - 1;
496    for (Enumeration<Register> e = phys.enumerateNonvolatileGPRsBackwards(); e.hasMoreElements() && n >= 0; n--) {
497      Register nv = e.nextElement();
498      int offset = getNonvolatileGPROffset(n);
499      Operand M = new StackLocationOperand(true, -offset, WORDSIZE);
500      inst.insertBefore(MIR_Move.create(IA32_MOV, M, new RegisterOperand(nv, PRIMITIVE_TYPE_FOR_WORD)));
501    }
502  }
503
504  /**
505   * Insert code before a return instruction to restore the nonvolatile
506   * registers.
507   *
508   * @param inst the return instruction
509   */
510  private void restoreNonVolatiles(Instruction inst) {
511    GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
512    int nNonvolatileGPRS = ir.compiledMethod.getNumberOfNonvolatileGPRs();
513
514    int n = nNonvolatileGPRS - 1;
515    for (Enumeration<Register> e = phys.enumerateNonvolatileGPRsBackwards(); e.hasMoreElements() && n >= 0; n--) {
516      Register nv = e.nextElement();
517      int offset = getNonvolatileGPROffset(n);
518      Operand M = new StackLocationOperand(true, -offset, WORDSIZE);
519      inst.insertBefore(MIR_Move.create(IA32_MOV, new RegisterOperand(nv, PRIMITIVE_TYPE_FOR_WORD), M));
520    }
521  }
522
523  /**
524   * Insert code into the prologue to save the floating point state.
525   *
526   * @param inst the first instruction after the prologue.
527   */
528  private void saveFloatingPointState(Instruction inst) {
529
530    if (SSE2_FULL) {
531      GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
532      for (int i = 0; i < 8; i++) {
533        inst.insertBefore(MIR_Move.create(IA32_MOVQ,
534            new StackLocationOperand(true, -fsaveLocation + (i * BYTES_IN_DOUBLE), BYTES_IN_DOUBLE),
535            new RegisterOperand(phys.getFPR(i), TypeReference.Double)));
536      }
537    } else {
538      Operand M = new StackLocationOperand(true, -fsaveLocation, 4);
539      inst.insertBefore(MIR_FSave.create(IA32_FNSAVE, M));
540    }
541  }
542
543  /**
544   * Insert code into the epilogue to restore the floating point state.
545   *
546   * @param inst the return instruction after the epilogue.
547   */
548  private void restoreFloatingPointState(Instruction inst) {
549    if (SSE2_FULL) {
550      GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
551      for (int i = 0; i < 8; i++) {
552        inst.insertBefore(MIR_Move.create(IA32_MOVQ,
553            new RegisterOperand(phys.getFPR(i), TypeReference.Double),
554            new StackLocationOperand(true, -fsaveLocation + (i * BYTES_IN_DOUBLE), BYTES_IN_DOUBLE)));
555      }
556    } else {
557      Operand M = new StackLocationOperand(true, -fsaveLocation, 4);
558      inst.insertBefore(MIR_FSave.create(IA32_FRSTOR, M));
559    }
560  }
561
562  /**
563   * Insert code into the prologue to save all volatile
564   * registers.
565   *
566   * @param inst the first instruction after the prologue.
567   */
568  private void saveVolatiles(Instruction inst) {
569    GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
570
571    // Save each GPR.
572    int i = 0;
573    for (Enumeration<Register> e = phys.enumerateVolatileGPRs(); e.hasMoreElements(); i++) {
574      Register r = e.nextElement();
575      int location = saveVolatileGPRLocation[i];
576      Operand M = new StackLocationOperand(true, -location, WORDSIZE);
577      inst.insertBefore(MIR_Move.create(IA32_MOV, M, new RegisterOperand(r, PRIMITIVE_TYPE_FOR_WORD)));
578    }
579  }
580
581  /**
582   * Insert code before a return instruction to restore the volatile
583   * and volatile registers.
584   *
585   * @param inst the return instruction
586   */
587  private void restoreVolatileRegisters(Instruction inst) {
588    GenericPhysicalRegisterSet phys = ir.regpool.getPhysicalRegisterSet();
589
590    // Restore every GPR
591    int i = 0;
592    for (Enumeration<Register> e = phys.enumerateVolatileGPRs(); e.hasMoreElements(); i++) {
593      Register r = e.nextElement();
594      int location = saveVolatileGPRLocation[i];
595      Operand M = new StackLocationOperand(true, -location, WORDSIZE);
596      inst.insertBefore(MIR_Move.create(IA32_MOV, new RegisterOperand(r, PRIMITIVE_TYPE_FOR_WORD), M));
597    }
598  }
599
600  /**
601   * Insert the epilogue before a particular return instruction.
602   *
603   * @param ret the return instruction.
604   */
605  private void insertEpilogue(Instruction ret) {
606    // 1. Restore any saved registers
607    if (ir.compiledMethod.isSaveVolatile()) {
608      restoreVolatileRegisters(ret);
609      restoreFloatingPointState(ret);
610    }
611    restoreNonVolatiles(ret);
612
613    // 2. Restore caller's stackpointer and framepointer
614    int frameSize = getFrameFixedSize();
615    ret.insertBefore(MIR_UnaryNoRes.create(REQUIRE_ESP, IC(frameSize)));
616    MemoryOperand fpHome =
617        MemoryOperand.BD(ir.regpool.makeTROp(),
618                             ArchEntrypoints.framePointerField.getOffset(),
619                             (byte) WORDSIZE,
620                             null,
621                             null);
622    ret.insertBefore(MIR_Nullary.create(IA32_POP, fpHome));
623  }
624
625  @Override
626  public void replaceOperandWithSpillLocation(Instruction s, RegisterOperand symb) {
627
628    // Get the spill location previously assigned to the symbolic
629    // register.
630    int location = regAllocState.getSpill(symb.getRegister());
631
632    // Create a memory operand M representing the spill location.
633    int size;
634    if (VM.BuildFor32Addr) {
635      if (SSE2_FULL) {
636        size = symb.getType().getMemoryBytes();
637        if (size < WORDSIZE)
638          size = WORDSIZE;
639      } else {
640        int type = PhysicalRegisterSet.getPhysicalRegisterType(symb.getRegister());
641        size = PhysicalRegisterSet.getSpillSize(type);
642      }
643    } else {
644      size = WORDSIZE;
645    }
646    StackLocationOperand M = new StackLocationOperand(true, -location, (byte) size);
647
648    M = new StackLocationOperand(true, -location, (byte) size);
649
650    // replace the register operand with the memory operand
651    s.replaceOperand(symb, M);
652  }
653
654  private boolean hasSymbolicRegister(MemoryOperand M) {
655    if (M.base != null && !M.base.getRegister().isPhysical()) return true;
656    if (M.index != null && !M.index.getRegister().isPhysical()) return true;
657    return false;
658  }
659
660  /**
661   * @param s the instruction to check
662   * @return {@code true} if and only if the instruction is a MOVE instruction
663   *  that can be generated without resorting to scratch registers
664   */
665  private boolean isScratchFreeMove(Instruction s) {
666    if (s.operator() != IA32_MOV) return false;
667
668    // if we don't allow ESP to float, we will always use scratch
669    // registers in these move instructions.
670    if (!FLOAT_ESP) return false;
671
672    Operand result = MIR_Move.getResult(s);
673    Operand value = MIR_Move.getValue(s);
674
675    // TODO Remove duplication and check if code and documentation
676    // are correct.
677
678    // We need scratch registers for spilled registers that appear in
679    // memory operands.
680    if (result.isMemory()) {
681      MemoryOperand M = result.asMemory();
682      if (hasSymbolicRegister(M)) return false;
683      // We will perform this transformation by changing the MOV to a PUSH
684      // or POP.  Note that IA32 cannot PUSH/POP >WORDSIZE quantities, so
685      // disable the transformation for that case.  Also, (TODO), our
686      // assembler does not emit the prefix to allow 16-bit push/pops, so
687      // disable these too.  What's left?  WORDSIZE only.
688      if (M.size != WORDSIZE) return false;
689    }
690    if (value.isMemory()) {
691      MemoryOperand M = value.asMemory();
692      if (hasSymbolicRegister(M)) return false;
693      // We will perform this transformation by changing the MOV to a PUSH
694      // or POP.  Note that IA32 cannot PUSH/POP >WORDSIZE quantities, so
695      // disable the transformation for that case.  Also, (TODO), our
696      // assembler does not emit the prefix to allow 16-bit push/pops, so
697      // disable these too.  What's left?  WORDSIZE only.
698      if (M.size != WORDSIZE) return false;
699    }
700    // If we get here, all is kosher.
701    return true;
702  }
703
704  @Override
705  public boolean needScratch(Register r, Instruction s) {
706    // We never need a scratch register for a floating point value in an
707    // FMOV instruction.
708    if (r.isFloatingPoint() && s.operator() == IA32_FMOV) return false;
709
710    // never need a scratch register for a YIELDPOINT_OSR
711    if (s.operator() == YIELDPOINT_OSR) return false;
712
713    // Some MOVEs never need scratch registers
714    if (isScratchFreeMove(s)) return false;
715
716    // If s already has a memory operand, it is illegal to introduce
717    // another.
718    if (s.hasMemoryOperand()) return true;
719
720    // If r appears more than once in the instruction, we can't
721    // use a memory operand for all occurrences, so we will need a scratch
722    int count = 0;
723    for (Enumeration<Operand> ops = s.getOperands(); ops.hasMoreElements();) {
724      Operand op = ops.nextElement();
725      if (op.isRegister()) {
726        RegisterOperand rop = op.asRegister();
727        if (rop.getRegister() == r) {
728          count++;
729        }
730        // If the register in question is an int register (i.e. 32 bit)
731        // and the VM is build for x64, we mustn't introduce a memory
732        // operand for the register. All spill locations are 64-bit,
733        // so the 32-bit operation would be converted to a quad operation
734        // because of the memory operand. This would change the semantics
735        // of the operation (e.g. for CMP or ADD).
736        // Moreover, we can't use a 32-bit memory operation because it is
737        // not guaranteed that a x64 memory address would fit into 32 bits.
738        // FIXME need to review this decision before finishing x64 opt work
739        if (VM.BuildFor64Addr && rop.isInt() && rop.getRegister() == r) return true;
740      }
741    }
742    if (count > 1) return true;
743
744    // Check the architecture restrictions.
745    if (RegisterRestrictions.mustBeInRegister(r, s)) return true;
746
747    // Otherwise, everything is OK.
748    return false;
749  }
750
751  /**
752   * Before instruction s, insert code to adjust ESP so that it lies at a
753   * particular offset from its usual location.
754   *
755   * @param s the instruction before which ESP must have the desired offset
756   * @param desiredOffset the desired offset
757   */
758  private void moveESPBefore(Instruction s, int desiredOffset) {
759    PhysicalRegisterSet phys = (PhysicalRegisterSet)ir.regpool.getPhysicalRegisterSet();
760    Register ESP = phys.getESP();
761    int delta = desiredOffset - ESPOffset;
762    if (delta != 0) {
763      if (canModifyEFLAGS(s)) {
764        s.insertBefore(MIR_BinaryAcc.create(IA32_ADD, new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD), VM.BuildFor32Addr ? IC(delta) : LC(delta)));
765      } else {
766        MemoryOperand M =
767            MemoryOperand.BD(new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD),
768                                 Offset.fromIntSignExtend(delta),
769                                 (byte) WORDSIZE,
770                                 null,
771                                 null);
772        s.insertBefore(MIR_Lea.create(IA32_LEA, new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD), M));
773      }
774      ESPOffset = desiredOffset;
775    }
776  }
777
778  private boolean canModifyEFLAGS(Instruction s) {
779    if (PhysicalDefUse.usesEFLAGS(s.operator())) {
780      return false;
781    }
782    if (PhysicalDefUse.definesEFLAGS(s.operator())) {
783      return true;
784    }
785    if (s.operator() == BBEND) return true;
786    return canModifyEFLAGS(s.nextInstructionInCodeOrder());
787  }
788
789  /**
790   * Attempt to rewrite a move instruction to a NOP.
791   *
792   * @param s the instruction to rewrite
793   * @return {@code true} if and only if the transformation applies
794   */
795  private boolean mutateMoveToNop(Instruction s) {
796    Operand result = MIR_Move.getResult(s);
797    Operand val = MIR_Move.getValue(s);
798    if (result.isStackLocation() && val.isStackLocation()) {
799      if (result.similar(val)) {
800        Empty.mutate(s, NOP);
801        return true;
802      }
803    }
804    return false;
805  }
806
807  /**
808   * Rewrites a move instruction if it has 2 memory operands.
809   * One of the 2 memory operands must be a stack location operand.  Move
810   * the SP to the appropriate location and use a push or pop instruction.
811   *
812   * @param s the instruction to rewrite
813   */
814  private void rewriteMoveInstruction(Instruction s) {
815    // first attempt to mutate the move into a noop
816    if (mutateMoveToNop(s)) return;
817
818    Operand result = MIR_Move.getResult(s);
819    Operand val = MIR_Move.getValue(s);
820    if (result instanceof StackLocationOperand) {
821      if (val instanceof MemoryOperand || val instanceof StackLocationOperand) {
822        int offset = ((StackLocationOperand) result).getOffset();
823        byte size = ((StackLocationOperand) result).getSize();
824        offset = FPOffset2SPOffset(offset) + size;
825        moveESPBefore(s, offset);
826        MIR_UnaryNoRes.mutate(s, IA32_PUSH, val);
827      }
828    } else {
829      if (result instanceof MemoryOperand) {
830        if (val instanceof StackLocationOperand) {
831          int offset = ((StackLocationOperand) val).getOffset();
832          offset = FPOffset2SPOffset(offset);
833          moveESPBefore(s, offset);
834          MIR_Nullary.mutate(s, IA32_POP, result);
835        }
836      }
837    }
838  }
839
840  /**
841   * Walks through the IR.  For each StackLocationOperand, replace the
842   * operand with the appropriate MemoryOperand.
843   */
844  private void rewriteStackLocations() {
845    // ESP is initially WORDSIZE above where the framepointer is going to be.
846    ESPOffset = getFrameFixedSize() + WORDSIZE;
847    Register ESP = ((PhysicalRegisterSet)ir.regpool.getPhysicalRegisterSet()).getESP();
848
849    boolean seenReturn = false;
850    for (Enumeration<Instruction> e = ir.forwardInstrEnumerator(); e.hasMoreElements();) {
851      Instruction s = e.nextElement();
852
853      if (s.isReturn()) {
854        seenReturn = true;
855        continue;
856      }
857
858      if (s.isBranch()) {
859        // restore ESP to home location at end of basic block.
860        moveESPBefore(s, 0);
861        continue;
862      }
863
864      if (s.operator() == BBEND) {
865        if (seenReturn) {
866          // at a return ESP will be at FrameFixedSize,
867          seenReturn = false;
868          ESPOffset = 0;
869        } else {
870          moveESPBefore(s, 0);
871        }
872        continue;
873      }
874
875      if (s.operator() == ADVISE_ESP) {
876        ESPOffset = MIR_UnaryNoRes.getVal(s).asIntConstant().value;
877        continue;
878      }
879
880      if (s.operator() == REQUIRE_ESP) {
881        // ESP is required to be at the given offset from the bottom of the frame
882        moveESPBefore(s, MIR_UnaryNoRes.getVal(s).asIntConstant().value);
883        continue;
884      }
885
886      if (s.operator() == YIELDPOINT_PROLOGUE ||
887          s.operator() == YIELDPOINT_BACKEDGE ||
888          s.operator() == YIELDPOINT_EPILOGUE) {
889        moveESPBefore(s, 0);
890        continue;
891      }
892
893      if (s.operator() == IA32_MOV) {
894        rewriteMoveInstruction(s);
895      }
896
897      // pop computes the effective address of its operand after ESP
898      // is incremented.  Therefore update ESPOffset before rewriting
899      // stacklocation and memory operands.
900      if (s.operator() == IA32_POP) {
901        ESPOffset += WORDSIZE;
902      }
903
904      for (Enumeration<Operand> ops = s.getOperands(); ops.hasMoreElements();) {
905        Operand op = ops.nextElement();
906        if (op instanceof StackLocationOperand) {
907          StackLocationOperand sop = (StackLocationOperand) op;
908          int offset = sop.getOffset();
909          if (sop.isFromTop()) {
910            offset = FPOffset2SPOffset(offset);
911          }
912          offset -= ESPOffset;
913          byte size = sop.getSize();
914          MemoryOperand M =
915              MemoryOperand.BD(new RegisterOperand(ESP, PRIMITIVE_TYPE_FOR_WORD),
916                                   Offset.fromIntSignExtend(offset),
917                                   size,
918                                   null,
919                                   null);
920          s.replaceOperand(op, M);
921        } else if (op instanceof MemoryOperand) {
922          MemoryOperand M = op.asMemory();
923          if ((M.base != null && M.base.getRegister() == ESP) || (M.index != null && M.index.getRegister() == ESP)) {
924            M.disp = M.disp.minus(ESPOffset);
925          }
926        }
927      }
928
929      // push computes the effective address of its operand after ESP
930      // is decremented.  Therefore update ESPOffset after rewriting
931      // stacklocation and memory operands.
932      if (s.operator() == IA32_PUSH) {
933        ESPOffset -= WORDSIZE;
934      }
935    }
936  }
937
938  /**
939   * PRECONDITION: The final frameSize is calculated before calling this
940   * routine.
941   *
942   * @param fpOffset offset in bytes from the top of the stack frame
943   * @return offset in bytes from the stack pointer.
944   */
945  private int FPOffset2SPOffset(int fpOffset) {
946    // Note that SP = FP - frameSize + WORDSIZE;
947    // So, FP + fpOffset = SP + frameSize - WORDSIZE
948    // + fpOffset
949    return frameSize + fpOffset - WORDSIZE;
950  }
951
952  @Override
953  public void restoreScratchRegistersBefore(Instruction s) {
954    for (Iterator<ScratchRegister> i = scratchInUse.iterator(); i.hasNext();) {
955      ScratchRegister scratch = i.next();
956
957      if (scratch.currentContents == null) continue;
958      if (VERBOSE_DEBUG) {
959        System.out.println("RESTORE: consider " + scratch);
960      }
961      boolean removed = false;
962      boolean unloaded = false;
963      if (definedIn(scratch.scratch, s) ||
964          (s.isCall() && !s.operator().isCallSaveVolatile() && scratch.scratch.isVolatile()) ||
965          (s.operator() == IA32_FNINIT && scratch.scratch.isFloatingPoint()) ||
966          (s.operator() == IA32_FCLEAR && scratch.scratch.isFloatingPoint())) {
967        // s defines the scratch register, so save its contents before they
968        // are killed.
969        if (VERBOSE_DEBUG) {
970          System.out.println("RESTORE : unload because defined " + scratch);
971        }
972        unloadScratchRegisterBefore(s, scratch);
973
974        // update mapping information
975        if (VERBOSE_DEBUG) {
976          System.out.println("RSRB: End scratch interval " + scratch.scratch + " " + s);
977        }
978        scratchMap.endScratchInterval(scratch.scratch, s);
979        Register scratchContents = scratch.currentContents;
980        if (scratchContents != null) {
981          if (VERBOSE_DEBUG) {
982            System.out.println("RSRB: End symbolic interval " + scratch.currentContents + " " + s);
983          }
984          scratchMap.endSymbolicInterval(scratch.currentContents, s);
985        }
986
987        i.remove();
988        removed = true;
989        unloaded = true;
990      }
991
992      if (usedIn(scratch.scratch, s) ||
993          !isLegal(scratch.currentContents, scratch.scratch, s) ||
994          (s.operator() == IA32_FCLEAR && scratch.scratch.isFloatingPoint())) {
995        // first spill the currents contents of the scratch register to
996        // memory
997        if (!unloaded) {
998          if (VERBOSE_DEBUG) {
999            System.out.println("RESTORE : unload because used " + scratch);
1000          }
1001          unloadScratchRegisterBefore(s, scratch);
1002
1003          // update mapping information
1004          if (VERBOSE_DEBUG) {
1005            System.out.println("RSRB2: End scratch interval " + scratch.scratch + " " + s);
1006          }
1007          scratchMap.endScratchInterval(scratch.scratch, s);
1008          Register scratchContents = scratch.currentContents;
1009          if (scratchContents != null) {
1010            if (VERBOSE_DEBUG) {
1011              System.out.println("RSRB2: End symbolic interval " + scratch.currentContents + " " + s);
1012            }
1013            scratchMap.endSymbolicInterval(scratch.currentContents, s);
1014          }
1015
1016        }
1017        // s or some future instruction uses the scratch register,
1018        // so restore the correct contents.
1019        if (VERBOSE_DEBUG) {
1020          System.out.println("RESTORE : reload because used " + scratch);
1021        }
1022        reloadScratchRegisterBefore(s, scratch);
1023
1024        if (!removed) {
1025          i.remove();
1026          removed = true;
1027        }
1028      }
1029    }
1030  }
1031
1032  /**
1033   * Initialize some architecture-specific state needed for register
1034   * allocation.
1035   *
1036   * @param ir the IR that's being processed
1037   */
1038  @Override
1039  public void initForArch(IR ir) {
1040    PhysicalRegisterSet phys = (PhysicalRegisterSet)ir.regpool.getPhysicalRegisterSet();
1041
1042    // We reserve the last (bottom) slot in the FPR stack as a scratch register.
1043    // This allows us to do one push/pop sequence in order to use the
1044    // top of the stack as a scratch location
1045    phys.getFPR(7).reserveRegister();
1046  }
1047
1048  @Override
1049  public boolean isSysCall(Instruction s) {
1050    return s.operator() == IA32_SYSCALL;
1051  }
1052}