001/*
002 *  This file is part of the Jikes RVM project (http://jikesrvm.org).
003 *
004 *  This file is licensed to You under the Eclipse Public License (EPL);
005 *  You may not use this file except in compliance with the License. You
006 *  may obtain a copy of the License at
007 *
008 *      http://www.opensource.org/licenses/eclipse-1.0.php
009 *
010 *  See the COPYRIGHT.txt file distributed with this work for information
011 *  regarding copyright ownership.
012 */
013package org.jikesrvm.compilers.opt.controlflow;
014
015import java.lang.reflect.Constructor;
016import java.util.Arrays;
017import java.util.Enumeration;
018
019import org.jikesrvm.VM;
020import org.jikesrvm.compilers.opt.OptOptions;
021import org.jikesrvm.compilers.opt.driver.CompilerPhase;
022import org.jikesrvm.compilers.opt.ir.BasicBlock;
023import org.jikesrvm.compilers.opt.ir.IR;
024import org.jikesrvm.compilers.opt.ir.WeightedBranchTargets;
025
026/**
027 * Derive relative basic block execution frequencies from branch probabilities.<p>
028 *
029 * This code assumes that the loop structure tree can be constructed for
030 * the CFG in question.  This implies that the CFG is reducible. <p>
031 *
032 * The basic algorithm is as follows:
033 * <ul>
034 * <li> Construct the loop structure tree for the CFG. </li>
035 * <li> In a postorder traversal, compute the loop multiplier for each loop.
036 *      The loop multiplier is a number such that the execution frequency of
037 *      the loop pre-header times the loop multiplier is equal to the
038 *      execution frequency of the loop head.  This can be derived by computing
039 *      the loop exit weight (the probability of exiting the loop) and applying
040 *      Kirchoff's law that flow in is equal to flow out.  Loop exit weight
041 *      can be computed in a single topological (ignoring backedges) traversal
042 *      of the nodes in the loop. </li>
043 * <li> Assign the entry node weight 1.  In a topological traversal of the CFG
044 *      (ignoring backedges), propagate the weights.  When processing a loop head,
045 *      multiply the incoming weight by the loop multiplier.</li>
046 * </ul>
047 */
048public class EstimateBlockFrequencies extends CompilerPhase {
049
050  /**
051   * The IR on which to operate.
052   */
053  private IR ir;
054
055  /**
056   * The loop structure tree of said IR
057   */
058  private LSTGraph lst;
059
060  /**
061   * Constructor for this compiler phase
062   */
063  private static final Constructor<CompilerPhase> constructor =
064      getCompilerPhaseConstructor(EstimateBlockFrequencies.class);
065
066  /**
067   * Get a constructor object for this compiler phase
068   * @return compiler phase constructor
069   */
070  @Override
071  public Constructor<CompilerPhase> getClassConstructor() {
072    return constructor;
073  }
074
075  /**
076   * Topological ordering (ignoring backedges) of CFG
077   */
078  private BasicBlock[] topOrder;
079
080  @Override
081  public String getName() {
082    return "Estimate Block Frequencies";
083  }
084
085  @Override
086  public void reportAdditionalStats() {
087    VM.sysWrite("  ");
088    VM.sysWrite(container.counter1 / container.counter2 * 100, 2);
089    VM.sysWrite("% Infrequent BBs");
090  }
091
092  /**
093   * Compute relative basic block frequencies for the argument IR based on the
094   * branch probability information on each conditional and multiway branch.<p>
095   *
096   * Assumptions:
097   * <ol>
098   *   <li>LST is valid
099   *   <li>basic block numbering is dense (compact has been called)
100   * </ol>
101   *
102   * @param _ir the IR on which to apply the phase
103   */
104  @Override
105  public void perform(IR _ir) {
106    // Prepare
107    ir = _ir;
108
109    if (ir.options.PROFILE_FREQUENCY_STRATEGY == OptOptions.PROFILE_DUMB_FREQ) {
110      setDumbFrequencies(ir);
111      return;
112    }
113
114    ir.cfg.resetTopSorted();
115    ir.cfg.buildTopSort();
116    topOrder = new BasicBlock[ir.cfg.numberOfNodes()];
117    int idx = 0;
118    for (BasicBlock ptr = ir.cfg.entry(); ptr != null; ptr = (BasicBlock) ptr.getForwardSortedNext()) {
119      topOrder[idx++] = ptr;
120      ptr.setExecutionFrequency(0f);
121      ptr.clearScratchFlag();
122    }
123
124    // Get pre-computed LST from IR.
125    lst = ir.HIRInfo.loopStructureTree;
126
127    // Compute loop multipliers
128    if (lst != null) {
129      computeLoopMultipliers(lst.getRoot());
130      for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
131        BasicBlock bb = e.nextElement();
132        bb.setExecutionFrequency(0f);
133        bb.clearScratchFlag();
134      }
135    }
136
137    // Compute execution frequency of each basic block
138    computeBlockFrequencies();
139
140    // Set infrequent bits on basic blocks
141    computeInfrequentBlocks(ir);
142  }
143
144  /**
145   * Set the frequency of each basic block to 1.0f.
146   *
147   * @param ir the IR that contains the blocks
148   */
149  private void setDumbFrequencies(IR ir) {
150    for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
151      BasicBlock bb = e.nextElement();
152      bb.setExecutionFrequency(1f);
153    }
154  }
155
156  /**
157   * Compute which blocks are infrequent.<p>
158   *
159   * Algorithm:
160   * <ol>
161   *   <li>let f = INFREQUENT_THRESHOLD.
162   *   <li>Start with S = {all basic blocks}.
163   *   <li>Sort the blocks by frequency.  Starting with the most frequent
164   *       blocks, remove blocks from S until the sum of block frequencies in S
165   *       &lt;= f.  Then blocks in S are infrequent.
166   * </ol>
167   *
168   * @param ir the governing IR.
169   */
170  private void computeInfrequentBlocks(IR ir) {
171    int i = 0;
172    float[] freq = new float[ir.getMaxBasicBlockNumber()];
173    float total = 0f;
174    // count the total frequency of all blocks
175    for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
176      BasicBlock bb = e.nextElement();
177      freq[i] = bb.getExecutionFrequency();
178      total += freq[i];
179      i++;
180    }
181    // sort the frequencies (ascending);
182    Arrays.sort(freq);
183    float f = ir.options.PROFILE_INFREQUENT_THRESHOLD;
184    float goal = (1f - f) * total;
185    total = 0f;
186    float threshold = 0f;
187    // add up the frequencies (descending) until we real the goal.
188    for (i = freq.length - 1; i >= 0 && total < goal; i--) {
189      threshold = freq[i];
190      total += threshold;
191    }
192    // go back and set infrequent bits.
193    for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) {
194      BasicBlock bb = e.nextElement();
195      if (bb.getExecutionFrequency() < threshold) {
196        bb.setInfrequent();
197        container.counter1++;
198      } else {
199        bb.clearInfrequent();
200      }
201      container.counter2++;
202    }
203  }
204
205  /**
206   * Postorder traversal of LST computing loop multiplier and loop exits
207   * for each loop.
208   *
209   * @param n a node
210   */
211  private void computeLoopMultipliers(LSTNode n) {
212    for (Enumeration<LSTNode> e = n.getChildren(); e.hasMoreElements();) {
213      computeLoopMultipliers(e.nextElement());
214    }
215    if (n != lst.getRoot()) {
216      computeMultiplier(n);
217      n.header.clearScratchFlag(); // so we won't ignore when processing enclosing loop
218    }
219  }
220
221  /**
222   * Compute the loop multiplier for this loop nest
223   *
224   * @param n starting node
225   */
226  private void computeMultiplier(LSTNode n) {
227    n.initializeLoopExits();
228    computeNodeWeights(n);
229    float loopExitWeight = computeLoopExitWeight(n);
230    n.loopMultiplier = 1.0f / loopExitWeight;
231  }
232
233  /**
234   * Propagate execution frequencies through the loop.
235   * Also records loop exit edges in loopExits.
236   *
237   * @param n starting node
238   */
239  private void computeNodeWeights(LSTNode n) {
240    n.header.setExecutionFrequency(1f);
241    int idx = 0;
242    while (topOrder[idx] != n.header) idx++;
243    for (int numNodes = n.loop.populationCount(); numNodes > 0;) {
244      if (idx >= topOrder.length) {
245        numNodes--;
246        continue;
247      }
248      BasicBlock cur = topOrder[idx++];
249      if (cur == null) {
250        numNodes--;
251        continue;
252      }
253      if (!n.loop.get(cur.getNumber())) continue; // node was not in the loop nest being processed.
254      LSTNode other = lst.getLoop(cur);
255      if (other != n) {
256        if (cur == other.header) {
257          // loop header of nested loop
258          numNodes -= other.loop.populationCount();
259        }
260        continue; // skip over nodes in nested loop.
261      }
262
263      numNodes--;
264      cur.setScratchFlag();
265      float weight = cur.getExecutionFrequency();
266      for (WeightedBranchTargets wbt = new WeightedBranchTargets(cur); wbt.hasMoreElements(); wbt.advance()) {
267        processEdge(n, cur, wbt.curBlock(), wbt.curWeight(), weight);
268      }
269    }
270  }
271
272  private void processEdge(LSTNode n, BasicBlock source, BasicBlock target, float prob, float weight) {
273    if (target.getScratchFlag()) return; // ignore backedge
274    if (n.loop.get(target.getNumber())) {
275      LSTNode other = lst.getLoop(target);
276      if (other == n) {
277        target.augmentExecutionFrequency(prob * weight);
278      } else {
279        // header of nested loop; pass prob and weight through to targets of loop exits
280        // Algorithm: find the total loopExitWeight, then distribute prob and weight
281        //            in ratio to the exit weight for each exit.
282        //            Effectively we are treating the nested loop as an n-way branch to its loop exits.
283        target.setScratchFlag();
284        float exitWeight = computeLoopExitWeight(other);
285        for (LSTNode.Edge exit : other.loopExits) {
286          float myWeight = exit.source.getExecutionFrequency() * exit.probability;
287          float myFraction = myWeight / exitWeight;
288          processEdge(n, source, exit.target, prob * myFraction, weight);
289        }
290        target.clearScratchFlag();
291      }
292    } else {
293      n.addLoopExit(source, target, prob);
294    }
295  }
296
297  private float computeLoopExitWeight(LSTNode n) {
298    float exitWeight = 0f;
299    for (LSTNode.Edge exit : n.loopExits) {
300      exitWeight += (exit.source.getExecutionFrequency() * exit.probability);
301    }
302    // Kludge: if we think the loop has no exits, lets pretend that there is a 1%
303    //         chance of exiting to avoid getting NaN's in our computation.
304    return exitWeight == 0f ? 0.01f : exitWeight;
305  }
306
307  private void computeBlockFrequencies() {
308    ir.cfg.entry().setExecutionFrequency(1f);
309    for (BasicBlock cur : topOrder) {
310      if (cur == null || cur.isExit()) continue; // ignore exit node.
311      if (lst != null) {
312        LSTNode loop = lst.getLoop(cur);
313        if (loop != null && loop.header == cur) {
314          cur.setExecutionFrequency(cur.getExecutionFrequency() * loop.loopMultiplier);
315        }
316      }
317      float weight = cur.getExecutionFrequency();
318      cur.setScratchFlag();
319
320      for (WeightedBranchTargets wbt = new WeightedBranchTargets(cur); wbt.hasMoreElements(); wbt.advance()) {
321        BasicBlock target = wbt.curBlock();
322        if (!target.getScratchFlag()) {
323          target.augmentExecutionFrequency(wbt.curWeight() * weight);
324        }
325      }
326    }
327  }
328}