001/* 002 * This file is part of the Jikes RVM project (http://jikesrvm.org). 003 * 004 * This file is licensed to You under the Eclipse Public License (EPL); 005 * You may not use this file except in compliance with the License. You 006 * may obtain a copy of the License at 007 * 008 * http://www.opensource.org/licenses/eclipse-1.0.php 009 * 010 * See the COPYRIGHT.txt file distributed with this work for information 011 * regarding copyright ownership. 012 */ 013package org.jikesrvm.classloader; 014 015import static org.jikesrvm.classloader.ClassLoaderConstants.BooleanTypeCode; 016import static org.jikesrvm.classloader.ClassLoaderConstants.ByteTypeCode; 017import static org.jikesrvm.classloader.ClassLoaderConstants.CharTypeCode; 018import static org.jikesrvm.classloader.ClassLoaderConstants.ClassTypeCode; 019import static org.jikesrvm.classloader.ClassLoaderConstants.DoubleTypeCode; 020import static org.jikesrvm.classloader.ClassLoaderConstants.FloatTypeCode; 021import static org.jikesrvm.classloader.ClassLoaderConstants.IntTypeCode; 022import static org.jikesrvm.classloader.ClassLoaderConstants.LongTypeCode; 023import static org.jikesrvm.classloader.ClassLoaderConstants.ShortTypeCode; 024import static org.jikesrvm.classloader.ClassLoaderConstants.VoidTypeCode; 025 026import org.jikesrvm.VM; 027import org.vmmagic.pragma.Interruptible; 028import org.vmmagic.pragma.Pure; 029 030/** <p>A Java class for parsing type descriptors and class names. The class 031 is <code>abstract</code> to eliminate the temptation to instantiate it, 032 since it contains only static methods. 033 034 <p>There are five similar kinds of descriptors and names that we have to 035 deal with. We don't have methods for parsing all of them. 036 037 <p> In this documentation, I will refer to <i>The Java Native Interface 038 Programmer's Guide and Specification</i> as the <i>JNI Guide</i>. 039 040 <p> Some of the types I discuss below are described in 12.3 of the JNI 041 Guide. 042 043 <dl> 044 <dt>Fully-qualified class names and fully-qualified interface names</dt> 045 <dd>These are the dot-separated names, such as "java.lang.String" or 046 "java.util.Map". 047 <p>We can validate these with the static method #isJavaClassName(String) 048 in this class. 049 </dd> 050 051 <dt>JNI Class Descriptor (including array classes),<br> These include the 052 internal Form of fully-qualified class names 053 and internal form of fully-qualified interface names</dt> 054 <dd>These 055 <dd>“It can be derived from a fully qualified class or interface 056 name as defined in The Java Language Specification by substituting the "." 057 character with the "/" character. For example, the JNI class descriptor 058 for <code>java.lang.String</code> is "<code>java/lang/String</code>” 059 Array classes are formed using the "[" character followed by the field 060 descriptor of the element type. The class descrpitor for "int[]" is "[I". 061 <P>We do not have an interface for parsing these right now. 062 </dd> 063 064 <dt>Field Descriptors</dt> 065 <dd>Described in 12.3.3 of the JNI Guide. 066 Examples: 067 <ul> 068 <li>"Z" for boolean<br> 069 <li> "B" for byte 070 <li>"D" for double 071 <li>"Ljava/lang/String;" for java.lang.String 072 <li> "[I" for int[]. 073 </ul> 074 </dd> 075 076 <dt>Method Descriptors</dt> 077 <dd>Described in 12.3.4 of the JNI guide. To quote: 078 079 <blockquote> 080 081 Method Descriptors are formed by placing the field descriptors of all 082 argument types in a pair of parentheses, and following that by the 083 field descriptor of the return type. There are no spaces or other 084 separator characters between the argument types. "<code>V</code>" is 085 used to denote the <code>void</code> method return type. Constructors 086 use "<code>V</code>" as their return type and use "<code><init></code>" 087 as their name. 088 </blockquote> 089 090 Example: The method with signature "<code>byte f(int i, String s)</code>" 091 has the Method Descriptor "<code>(ILjava/lang/String;)B</code>" 092 093 <dt>TypeReference names</dt> 094 <dd>Inside Jikes RVM, we use the TypeReference class to represent the 095 reference in some class file to some type (class, interface, primitive, or 096 array). We also use them to represent Void ({@code TypeReference.Void}). 097 TypeReference names are just field descriptors plus "V".</dd> 098 </dl> 099 100 */ 101 102public abstract class TypeDescriptorParsing { 103 /** Is the string <code>s</code> a legal name for a Java class or interface? 104 * This will take either fully-qualified names or names that are not fully 105 * qualified. 106 * <p> 107 * @param s The string to check for whether it's a valid name for a Java 108 * class. This is a string of the form, for example: 109 * "<code>java.lang.String</code>" 110 * @return <code>true</code> if <code>s</code> is valid, <code>false</code> 111 * otherwise. 112 * 113 * <p> 114 115 * <small><b>Implementation Question for wiser heads than mine:</b> 116 * Would it be more efficient for me to convert this to a <code>char</code> 117 * array? 118 * That's the way the example in <i>The Java Class Libraries</i> for 119 * <code>Character.isJavaIdentifier<i>*</i>()</code> is written. Or is the 120 * <code>String.charAt()</code> method inexpensive?</small> */ 121 @Interruptible 122 @Pure 123 public static boolean isJavaClassName(String s) { 124 boolean identStart = true; // pretend we just saw a . 125 for (int i = 0; i < s.length(); ++i) { 126 char c = s.charAt(i); 127 if (identStart) { 128 if (!isVMIdentifierStart(c)) { 129 return false; // failure to match identifier start. 130 } 131 identStart = false; // on to the next one. 132 continue; 133 } 134 if (c == '.' || c == '/') { 135 identStart = true; 136 continue; 137 } 138 /* We have a character that is not the first one of a VM identifier */ 139 if (!isVMIdentifierPart(c)) { 140 return false; 141 } 142 /* And on we go around the loop */ 143 } 144 // Must not finish by needing the start of another identifier. 145 return !identStart; 146 } 147 148 /** 149 * Java 1.5 relaxes the historical convention that class file identifiers 150 * (i.e. class, field, and method names) must be drawn from the characters 151 * specified by JLS identifiers (i.e. implemented by 152 * java.lang.Character.isJavaIdentifierStart()).<p> 153 * 154 * Given that, parsing rules for internal and external VM identifier 155 * dictates that identifiers may not contain the following 156 * characters: { <code>'.'</code>, <code>';'</code>, <code>'['</code>, 157 * or <code>'/'</code> }. Method identifiers, excluding <code><init></code> 158 * and <code><clinit></code>, are further constrained to not include 159 * the characters <code>'<'</code> or <code>'>'</code>.<p> 160 * 161 * To avoid word boundary ambiguity, identifiers are presumed to not 162 * begin with a space character. Although not stated explicitly, this 163 * remains convention.<p> 164 * 165 * This method evaluates whether <code>c</code> is compatible as the starting 166 * character for a VM identifier. 167 * 168 * @param c character to evaluate for VM identifier compatibility 169 * @return boolean {@code true} iff <code>c</code> represents a valid VM identifier starting character 170 */ 171 @Pure 172 public static boolean isVMIdentifierStart(char c) { 173 return ((!Character.isWhitespace(c)) && isVMIdentifierPart(c)); 174 } 175 176 /** 177 * Java 1.5 relaxes the historical convention that class file identifiers 178 * (i.e. class, field, and method names) must be drawn from the characters 179 * specified by JLS identifiers (i.e. implemented by 180 * java.lang.Character.isJavaIdentifierPart()).<p> 181 * 182 * Given that, parsing rules for internal and external VM identifier 183 * dictates that identifiers may not contain the following 184 * characters: { <code>'.'</code>, <code>';'</code>, <code>'['</code>, 185 * or <code>'/'</code> }. Method identifiers, excluding <code><init></code> 186 * and <code><clinit></code>, are further constrained to not include 187 * the characters <code>'<'</code> or <code>'>'</code>.<p> 188 * 189 * This method evaluates whether <code>c</code> is compatible as a non-starting 190 * character for a VM identifier. 191 * 192 * @param c character to evaluate for VM identifier compatibility 193 * @return boolean {@code true} iff <code>c</code> represents a valid VM identifier non-starting character 194 */ 195 @Pure 196 public static boolean isVMIdentifierPart(char c) { 197 return ((c != '.') && (c != ';') && (c != '[') && (c != '/')); 198 } 199 200 /** 201 * Is this the internal form of a Java class name? (the one with the "/" 202 * instead of the "." separating components?) 203 * 204 * @param val a string as a char array 205 * @param first the start index of the string to be checked 206 * @param last the last index of the string to be checked 207 * @return {@code true} if the given char array represents an internal java class name 208 */ 209 public static boolean isJavaClassNameInternalForm(char[] val, int first, int last) { 210 if (val[first++] != ClassTypeCode) { 211 // the L 212 return false; 213 } 214 if (val[last--] != ';') { 215 // malformed("a class ('L') must end in a ';'"); 216 return false; 217 } 218 219 boolean identStart = true; // pretend we just saw a separator 220 for (int i = first; i <= last; ++i) { 221 char c = val[i]; 222 if (identStart) { 223 if (!isVMIdentifierStart(c)) { 224 return false; // failure to match identifier start. 225 } 226 identStart = false; // on to the next one. 227 continue; 228 } 229 if (c == '/') { 230 identStart = true; 231 continue; 232 } 233 /* We have a character that is not the first one of a VM identifier */ 234 if (!isVMIdentifierPart(c)) { 235 return false; 236 } 237 238 /* And on we go around the loop */ 239 } 240 // Must not finish by needing the start of another identifier. 241 return !identStart; 242 } 243 244 @Pure 245 public static boolean isValidTypeDescriptor(String s) { 246 try { 247 validateAsTypeDescriptor(s); 248 return true; 249 } catch (IllegalArgumentException iae) { 250 return false; 251 } 252 } 253 254 @Pure 255 public static boolean isValidTypeDescriptor(Atom a) { 256 try { 257 validateAsTypeDescriptor(a); 258 return true; 259 } catch (IllegalArgumentException iae) { 260 return false; 261 } 262 } 263 264 @Interruptible 265 @Pure 266 public static void validateAsTypeDescriptor(Atom a) throws IllegalArgumentException { 267 try { 268 // Atoms are always utf-8. 269 a.toUnicodeString(); 270 } catch (java.io.UTFDataFormatException udfe) { 271 IllegalArgumentException iae = 272 new IllegalArgumentException( 273 "The atom in question does not represent a valid UTF8 string, so it's not a type descriptor."); 274 iae.initCause(udfe); 275 throw iae; 276 } 277 } 278 279 /** 280 * Validates that the given String is a valid type descriptor. 281 * @param s string to check 282 * @throws IllegalArgumentException if the string is not a valid type descriptor 283 */ 284 @Interruptible 285 @Pure 286 public static void validateAsTypeDescriptor(String s) throws IllegalArgumentException { 287 char[] val = s.toCharArray(); 288 289 int i = 0; 290 if (val.length == 0) { 291 malformed("is the empty string", s); 292 } 293 294 // array dimensions precede the rest. 295 while (val[i] == '[') { 296 if (++i >= val.length) { 297 malformed("has just '[' chars", s); 298 } 299 } 300 if (VM.VerifyAssertions) { 301 // logically impossible: 302 VM._assert(i < val.length); 303 } 304 305 if (val[i] == VoidTypeCode && i != 0) { 306 malformed("can't have an array of void", s); 307 } 308 309 if (isJavaPrimitive(val[i])) { 310 // A primitive should be just 1 char long 311 if (i != val.length - 1) { 312 // if this isn't the last character, scream. 313 malformed("nothing should follow the primitive typecode '" + Character.toString(val[i]) + "'", s); 314 } 315 return; // otherwise all is well. 316 } 317 318 // logically impossible: 319 if (VM.VerifyAssertions) { 320 VM._assert(val[i] != '[' && !isJavaPrimitive(val[i])); 321 } 322 // All that's left is ClassTypeCode 323 if (val[i] != ClassTypeCode) { 324 malformed("unknown character '" + Character.toString(val[i]) + "'", s); 325 } 326 if (!isJavaClassNameInternalForm(val, i, val.length - 1)) { 327 malformed("doesn't end with a valid class name in internal form", s); 328 } 329 } 330 331 @Pure 332 private static boolean isJavaPrimitive(char c) { 333 byte b = (byte) c; 334 if (c != (char) b) { 335 return false; 336 } 337 return isJavaPrimitive(b); 338 } 339 340 @Pure 341 private static boolean isJavaPrimitive(byte b) { 342 switch (b) { 343 case VoidTypeCode: 344 case BooleanTypeCode: 345 case ByteTypeCode: 346 case ShortTypeCode: 347 case CharTypeCode: 348 case IntTypeCode: 349 case LongTypeCode: 350 case FloatTypeCode: 351 case DoubleTypeCode: 352 return true; 353 default: 354 return false; 355 } 356 } 357 358 private static void malformed(String msg, String typeName) throws IllegalArgumentException { 359 throw new IllegalArgumentException("Malformed type name" + 360 ((msg == null) ? "" : ": " + msg) + 361 ": \"" + 362 typeName + 363 "\""); 364 } 365 366 // These are test routines you can use to do unit testing on the methods in 367 // this class:: 368 // // Test isJavaClassName() 369// public static void main(String[] args) { 370// for (int i = 0; i < args.length; ++i) { 371// System.out.println(args[i] + " is " 372// + (TypeDescriptorParsing.isJavaClassName(args[i]) ? "" : "NOT " ) + "a valid Java class name."); 373// } 374// } 375 376// // Test validateAsTypeDescriptor() 377// public static void main(String[] args) { 378// for (int i = 0; i < args.length; ++i) { 379// System.out.println("Validating " + args[i] + " as a type descriptor."); 380// validateAsTypeDescriptor(args[i]); 381 382// } 383// } 384 385}