001/* 002 * This file is part of the Jikes RVM project (http://jikesrvm.org). 003 * 004 * This file is licensed to You under the Eclipse Public License (EPL); 005 * You may not use this file except in compliance with the License. You 006 * may obtain a copy of the License at 007 * 008 * http://www.opensource.org/licenses/eclipse-1.0.php 009 * 010 * See the COPYRIGHT.txt file distributed with this work for information 011 * regarding copyright ownership. 012 */ 013package org.jikesrvm.compilers.opt.controlflow; 014 015import java.lang.reflect.Constructor; 016import java.util.Arrays; 017import java.util.Enumeration; 018 019import org.jikesrvm.VM; 020import org.jikesrvm.compilers.opt.OptOptions; 021import org.jikesrvm.compilers.opt.driver.CompilerPhase; 022import org.jikesrvm.compilers.opt.ir.BasicBlock; 023import org.jikesrvm.compilers.opt.ir.IR; 024import org.jikesrvm.compilers.opt.ir.WeightedBranchTargets; 025 026/** 027 * Derive relative basic block execution frequencies from branch probabilities.<p> 028 * 029 * This code assumes that the loop structure tree can be constructed for 030 * the CFG in question. This implies that the CFG is reducible. <p> 031 * 032 * The basic algorithm is as follows: 033 * <ul> 034 * <li> Construct the loop structure tree for the CFG. </li> 035 * <li> In a postorder traversal, compute the loop multiplier for each loop. 036 * The loop multiplier is a number such that the execution frequency of 037 * the loop pre-header times the loop multiplier is equal to the 038 * execution frequency of the loop head. This can be derived by computing 039 * the loop exit weight (the probability of exiting the loop) and applying 040 * Kirchoff's law that flow in is equal to flow out. Loop exit weight 041 * can be computed in a single topological (ignoring backedges) traversal 042 * of the nodes in the loop. </li> 043 * <li> Assign the entry node weight 1. In a topological traversal of the CFG 044 * (ignoring backedges), propagate the weights. When processing a loop head, 045 * multiply the incoming weight by the loop multiplier.</li> 046 * </ul> 047 */ 048public class EstimateBlockFrequencies extends CompilerPhase { 049 050 /** 051 * The IR on which to operate. 052 */ 053 private IR ir; 054 055 /** 056 * The loop structure tree of said IR 057 */ 058 private LSTGraph lst; 059 060 /** 061 * Constructor for this compiler phase 062 */ 063 private static final Constructor<CompilerPhase> constructor = 064 getCompilerPhaseConstructor(EstimateBlockFrequencies.class); 065 066 /** 067 * Get a constructor object for this compiler phase 068 * @return compiler phase constructor 069 */ 070 @Override 071 public Constructor<CompilerPhase> getClassConstructor() { 072 return constructor; 073 } 074 075 /** 076 * Topological ordering (ignoring backedges) of CFG 077 */ 078 private BasicBlock[] topOrder; 079 080 @Override 081 public String getName() { 082 return "Estimate Block Frequencies"; 083 } 084 085 @Override 086 public void reportAdditionalStats() { 087 VM.sysWrite(" "); 088 VM.sysWrite(container.counter1 / container.counter2 * 100, 2); 089 VM.sysWrite("% Infrequent BBs"); 090 } 091 092 /** 093 * Compute relative basic block frequencies for the argument IR based on the 094 * branch probability information on each conditional and multiway branch.<p> 095 * 096 * Assumptions: 097 * <ol> 098 * <li>LST is valid 099 * <li>basic block numbering is dense (compact has been called) 100 * </ol> 101 * 102 * @param _ir the IR on which to apply the phase 103 */ 104 @Override 105 public void perform(IR _ir) { 106 // Prepare 107 ir = _ir; 108 109 if (ir.options.PROFILE_FREQUENCY_STRATEGY == OptOptions.PROFILE_DUMB_FREQ) { 110 setDumbFrequencies(ir); 111 return; 112 } 113 114 ir.cfg.resetTopSorted(); 115 ir.cfg.buildTopSort(); 116 topOrder = new BasicBlock[ir.cfg.numberOfNodes()]; 117 int idx = 0; 118 for (BasicBlock ptr = ir.cfg.entry(); ptr != null; ptr = (BasicBlock) ptr.getForwardSortedNext()) { 119 topOrder[idx++] = ptr; 120 ptr.setExecutionFrequency(0f); 121 ptr.clearScratchFlag(); 122 } 123 124 // Get pre-computed LST from IR. 125 lst = ir.HIRInfo.loopStructureTree; 126 127 // Compute loop multipliers 128 if (lst != null) { 129 computeLoopMultipliers(lst.getRoot()); 130 for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) { 131 BasicBlock bb = e.nextElement(); 132 bb.setExecutionFrequency(0f); 133 bb.clearScratchFlag(); 134 } 135 } 136 137 // Compute execution frequency of each basic block 138 computeBlockFrequencies(); 139 140 // Set infrequent bits on basic blocks 141 computeInfrequentBlocks(ir); 142 } 143 144 /** 145 * Set the frequency of each basic block to 1.0f. 146 * 147 * @param ir the IR that contains the blocks 148 */ 149 private void setDumbFrequencies(IR ir) { 150 for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) { 151 BasicBlock bb = e.nextElement(); 152 bb.setExecutionFrequency(1f); 153 } 154 } 155 156 /** 157 * Compute which blocks are infrequent.<p> 158 * 159 * Algorithm: 160 * <ol> 161 * <li>let f = INFREQUENT_THRESHOLD. 162 * <li>Start with S = {all basic blocks}. 163 * <li>Sort the blocks by frequency. Starting with the most frequent 164 * blocks, remove blocks from S until the sum of block frequencies in S 165 * <= f. Then blocks in S are infrequent. 166 * </ol> 167 * 168 * @param ir the governing IR. 169 */ 170 private void computeInfrequentBlocks(IR ir) { 171 int i = 0; 172 float[] freq = new float[ir.getMaxBasicBlockNumber()]; 173 float total = 0f; 174 // count the total frequency of all blocks 175 for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) { 176 BasicBlock bb = e.nextElement(); 177 freq[i] = bb.getExecutionFrequency(); 178 total += freq[i]; 179 i++; 180 } 181 // sort the frequencies (ascending); 182 Arrays.sort(freq); 183 float f = ir.options.PROFILE_INFREQUENT_THRESHOLD; 184 float goal = (1f - f) * total; 185 total = 0f; 186 float threshold = 0f; 187 // add up the frequencies (descending) until we real the goal. 188 for (i = freq.length - 1; i >= 0 && total < goal; i--) { 189 threshold = freq[i]; 190 total += threshold; 191 } 192 // go back and set infrequent bits. 193 for (Enumeration<BasicBlock> e = ir.getBasicBlocks(); e.hasMoreElements();) { 194 BasicBlock bb = e.nextElement(); 195 if (bb.getExecutionFrequency() < threshold) { 196 bb.setInfrequent(); 197 container.counter1++; 198 } else { 199 bb.clearInfrequent(); 200 } 201 container.counter2++; 202 } 203 } 204 205 /** 206 * Postorder traversal of LST computing loop multiplier and loop exits 207 * for each loop. 208 * 209 * @param n a node 210 */ 211 private void computeLoopMultipliers(LSTNode n) { 212 for (Enumeration<LSTNode> e = n.getChildren(); e.hasMoreElements();) { 213 computeLoopMultipliers(e.nextElement()); 214 } 215 if (n != lst.getRoot()) { 216 computeMultiplier(n); 217 n.header.clearScratchFlag(); // so we won't ignore when processing enclosing loop 218 } 219 } 220 221 /** 222 * Compute the loop multiplier for this loop nest 223 * 224 * @param n starting node 225 */ 226 private void computeMultiplier(LSTNode n) { 227 n.initializeLoopExits(); 228 computeNodeWeights(n); 229 float loopExitWeight = computeLoopExitWeight(n); 230 n.loopMultiplier = 1.0f / loopExitWeight; 231 } 232 233 /** 234 * Propagate execution frequencies through the loop. 235 * Also records loop exit edges in loopExits. 236 * 237 * @param n starting node 238 */ 239 private void computeNodeWeights(LSTNode n) { 240 n.header.setExecutionFrequency(1f); 241 int idx = 0; 242 while (topOrder[idx] != n.header) idx++; 243 for (int numNodes = n.loop.populationCount(); numNodes > 0;) { 244 if (idx >= topOrder.length) { 245 numNodes--; 246 continue; 247 } 248 BasicBlock cur = topOrder[idx++]; 249 if (cur == null) { 250 numNodes--; 251 continue; 252 } 253 if (!n.loop.get(cur.getNumber())) continue; // node was not in the loop nest being processed. 254 LSTNode other = lst.getLoop(cur); 255 if (other != n) { 256 if (cur == other.header) { 257 // loop header of nested loop 258 numNodes -= other.loop.populationCount(); 259 } 260 continue; // skip over nodes in nested loop. 261 } 262 263 numNodes--; 264 cur.setScratchFlag(); 265 float weight = cur.getExecutionFrequency(); 266 for (WeightedBranchTargets wbt = new WeightedBranchTargets(cur); wbt.hasMoreElements(); wbt.advance()) { 267 processEdge(n, cur, wbt.curBlock(), wbt.curWeight(), weight); 268 } 269 } 270 } 271 272 private void processEdge(LSTNode n, BasicBlock source, BasicBlock target, float prob, float weight) { 273 if (target.getScratchFlag()) return; // ignore backedge 274 if (n.loop.get(target.getNumber())) { 275 LSTNode other = lst.getLoop(target); 276 if (other == n) { 277 target.augmentExecutionFrequency(prob * weight); 278 } else { 279 // header of nested loop; pass prob and weight through to targets of loop exits 280 // Algorithm: find the total loopExitWeight, then distribute prob and weight 281 // in ratio to the exit weight for each exit. 282 // Effectively we are treating the nested loop as an n-way branch to its loop exits. 283 target.setScratchFlag(); 284 float exitWeight = computeLoopExitWeight(other); 285 for (LSTNode.Edge exit : other.loopExits) { 286 float myWeight = exit.source.getExecutionFrequency() * exit.probability; 287 float myFraction = myWeight / exitWeight; 288 processEdge(n, source, exit.target, prob * myFraction, weight); 289 } 290 target.clearScratchFlag(); 291 } 292 } else { 293 n.addLoopExit(source, target, prob); 294 } 295 } 296 297 private float computeLoopExitWeight(LSTNode n) { 298 float exitWeight = 0f; 299 for (LSTNode.Edge exit : n.loopExits) { 300 exitWeight += (exit.source.getExecutionFrequency() * exit.probability); 301 } 302 // Kludge: if we think the loop has no exits, lets pretend that there is a 1% 303 // chance of exiting to avoid getting NaN's in our computation. 304 return exitWeight == 0f ? 0.01f : exitWeight; 305 } 306 307 private void computeBlockFrequencies() { 308 ir.cfg.entry().setExecutionFrequency(1f); 309 for (BasicBlock cur : topOrder) { 310 if (cur == null || cur.isExit()) continue; // ignore exit node. 311 if (lst != null) { 312 LSTNode loop = lst.getLoop(cur); 313 if (loop != null && loop.header == cur) { 314 cur.setExecutionFrequency(cur.getExecutionFrequency() * loop.loopMultiplier); 315 } 316 } 317 float weight = cur.getExecutionFrequency(); 318 cur.setScratchFlag(); 319 320 for (WeightedBranchTargets wbt = new WeightedBranchTargets(cur); wbt.hasMoreElements(); wbt.advance()) { 321 BasicBlock target = wbt.curBlock(); 322 if (!target.getScratchFlag()) { 323 target.augmentExecutionFrequency(wbt.curWeight() * weight); 324 } 325 } 326 } 327 } 328}