Java的系统属性有很多与底层操作系统相关,本文分析这些属性的赋值过程。
虚拟机启动
Java Hotspot虚拟机的启动过程(三)一文曾概述JNI_CreateJavaVM函数的工作,其中之一是初始化并加载Java类,如java.lang.String、java.lang.Class和System等,与之相关的Threads类的create_vm静态函数部分代码如下:
initialize_class(vmSymbols::java_lang_String(), CHECK_0);
// Initialize java_lang.System (needed before creating the thread)
initialize_class(vmSymbols::java_lang_System(), CHECK_0);
initialize_class(vmSymbols::java_lang_ThreadGroup(), CHECK_0);
Handle thread_group = create_initial_thread_group(CHECK_0);
Universe::set_main_thread_group(thread_group());
initialize_class(vmSymbols::java_lang_Thread(), CHECK_0);
oop thread_object = create_initial_thread(thread_group, main_thread, CHECK_0);
main_thread->set_threadObj(thread_object);
// Set thread status to running since main thread has
// been started and running.
java_lang_Thread::set_thread_status(thread_object,
java_lang_Thread::RUNNABLE);
// The VM creates & returns objects of this class. Make sure it's initialized.
initialize_class(vmSymbols::java_lang_Class(), CHECK_0);
// The VM preresolves methods to these classes. Make sure that they get initialized
initialize_class(vmSymbols::java_lang_reflect_Method(), CHECK_0);
initialize_class(vmSymbols::java_lang_ref_Finalizer(), CHECK_0);
call_initializeSystemClass(CHECK_0);
其中call_initializeSystemClass函数调用代码如下,在最后调用了System类的initializeSystemClass静态方法初始化System类。
static void call_initializeSystemClass(TRAPS) {
Klass* k = SystemDictionary::resolve_or_fail(vmSymbols::java_lang_System(), true, CHECK);
instanceKlassHandle klass (THREAD, k);
JavaValue result(T_VOID);
JavaCalls::call_static(&result, klass, vmSymbols::initializeSystemClass_name(),
vmSymbols::void_method_signature(), CHECK);
}
下面看一下System类和initializeSystemClass方法。
System类
System类的initializeSystemClass静态方法的部分代码如下:
private static void initializeSystemClass() {
// VM might invoke JNU_NewStringPlatform() to set those encoding
// sensitive properties (user.home, user.name, boot.class.path, etc.)
// during "props" initialization, in which it may need access, via
// System.getProperty(), to the related system encoding property that
// have been initialized (put into "props") at early stage of the
// initialization. So make sure the "props" is available at the
// very beginning of the initialization and all system properties to
// be put into it directly.
props = new Properties();
initProperties(props); // initialized by the VM
// 省略一些代码
}
- props是Properties类型的静态成员变量;
- initProperties方法填充了props变量。
initProperties方法
initProperties方法是一个JNI方法,其对应实现定义在文件jdk/src/share/native/java/lang/System.c中。根据Java方法与JNI函数的对应规则,initProperties方法对应Java_java_lang_System_initProperties函数,其部分代码如下:
JNIEXPORT jobject JNICALL
Java_java_lang_System_initProperties(JNIEnv *env, jclass cla, jobject props)
{
char buf[128];
java_props_t *sprops;
jmethodID putID, removeID, getPropID;
jobject ret = NULL;
jstring jVMVal = NULL;
sprops = GetJavaProperties(env);
CHECK_NULL_RETURN(sprops, NULL);
putID = (*env)->GetMethodID(env,
(*env)->GetObjectClass(env, props),
"put",
"(Ljava/lang/Object;Ljava/lang/Object;)Ljava/lang/Object;");
CHECK_NULL_RETURN(putID, NULL);
removeID = (*env)->GetMethodID(env,
(*env)->GetObjectClass(env, props),
"remove",
"(Ljava/lang/Object;)Ljava/lang/Object;");
CHECK_NULL_RETURN(removeID, NULL);
getPropID = (*env)->GetMethodID(env,
(*env)->GetObjectClass(env, props),
"getProperty",
"(Ljava/lang/String;)Ljava/lang/String;");
CHECK_NULL_RETURN(getPropID, NULL);
PUTPROP(props, "java.specification.version",
JDK_MAJOR_VERSION "." JDK_MINOR_VERSION);
PUTPROP(props, "java.specification.name",
"Java Platform API Specification");
PUTPROP(props, "java.specification.vendor",
JAVA_SPECIFICATION_VENDOR);
PUTPROP(props, "java.version", RELEASE);
PUTPROP(props, "java.vendor", VENDOR);
PUTPROP(props, "java.vendor.url", VENDOR_URL);
// 省略一些代码
}
- java_props_t 结构体在文件jdk/src/share/native/java/lang/java_props.h中定义;
- 调用GetJavaProperties函数获得系统属性并保存到java_props_t 结构体;
- 利用PUTPROP宏将第二步的属性填充到System类的props静态变量。
java_props_t 结构体
java_props_t 结构体的部分代码如下:
#ifdef WIN32
#include <tchar.h>
typedef WCHAR nchar;
#else
typedef char nchar;
#endif
typedef struct {
char *os_name;
char *os_version;
char *os_arch;
#ifdef JDK_ARCH_ABI_PROP_NAME
char *sun_arch_abi;
#endif
nchar *tmp_dir;
nchar *font_dir;
nchar *user_dir;
char *file_separator;
char *path_separator;
char *line_separator;
nchar *user_name;
nchar *user_home;
char *language;
char *format_language;
char *display_language;
char *script;
char *format_script;
char *display_script;
char *country;
char *format_country;
char *display_country;
char *variant;
char *format_variant;
char *display_variant;
char *encoding;
char *sun_jnu_encoding;
char *sun_stdout_encoding;
char *sun_stderr_encoding;
char *timezone;
char *printerJob;
char *graphics_env;
char *awt_toolkit;
char *unicode_encoding; /* The default endianness of unicode
i.e. UnicodeBig or UnicodeLittle */
const char *cpu_isalist; /* list of supported instruction sets */
char *cpu_endian; /* endianness of platform */
char *data_model; /* 32 or 64 bit data model */
char *patch_level; /* patches/service packs installed */
char *desktop; /* Desktop name. */
// 省略一些代码
} java_props_t;
- os_name成员对应os.name属性;
- os_version成员对应os.version属性;
- user_dir成员对应user.dir属性;
- file_separator成员对应file.separator属性;
- encoding成员对应file.encoding属性;
- 其他属性限于篇幅不再赘述。
GetJavaProperties函数
GetJavaProperties函数定义在文件jdk/src/solaris/native/java/lang/java_props_md.c中,以获取user.dir属性为例,其相关代码如下,可以看到使用了getcwd库函数,其他属性的获取方法同理。
/* Current directory */
{
char buf[MAXPATHLEN];
errno = 0;
if (getcwd(buf, sizeof(buf)) == NULL)
JNU_ThrowByName(env, "java/lang/Error",
"Properties init: Could not determine current working directory.");
else
sprops.user_dir = strdup(buf);
}
file.encoding属性
目前对系统属性file.encoding的含义,网络上的文章众说纷纭,其中的一些甚至是完全错误的。系统属性file.encoding表示的编码是什么呢?在GetJavaProperties函数中可以看到如下代码:
/* Determine the language, country, variant, and encoding from the host,
* and store these in the user.language, user.country, user.variant and
* file.encoding system properties. */
setlocale(LC_ALL, "");
if (ParseLocale(env, LC_CTYPE,
&(sprops.format_language),
&(sprops.format_script),
&(sprops.format_country),
&(sprops.format_variant),
&(sprops.encoding))) {
ParseLocale(env, LC_MESSAGES,
&(sprops.language),
&(sprops.script),
&(sprops.country),
&(sprops.variant),
NULL);
} else {
sprops.language = "en";
sprops.encoding = "ISO8859-1";
}
- 调用setlocale函数根据环境变量的值修改locale,首先从LC_ALL环境变量取值,然后是同类别的LC_类别环境变量,最后是LANG环境变量;
- 利用ParseLocale函数进一步解析,如果解析异常那么该属性是ISO8859-1;
- 可以使用虚拟机参数-Dfile.encoding覆盖该属性的默认值。
用于Linux的ParseLocale函数的代码如下:
static int ParseLocale(JNIEnv* env, int cat, char ** std_language, char ** std_script,
char ** std_country, char ** std_variant, char ** std_encoding) {
char *temp = NULL;
char *language = NULL, *country = NULL, *variant = NULL,
*encoding = NULL;
char *p, *encoding_variant, *old_temp, *old_ev;
char *lc;
/* Query the locale set for the category */
lc = setlocale(cat, NULL); // 比如返回的是en_US.UTF-8
if (lc == NULL || !strcmp(lc, "C") || !strcmp(lc, "POSIX")) {
lc = "en_US";
}
temp = malloc(strlen(lc) + 1);
if (temp == NULL) {
JNU_ThrowOutOfMemoryError(env, NULL);
return 0;
}
/*
* locale string format in Solaris is
* <language name>_<country name>.<encoding name>@<variant name>
* <country name>, <encoding name>, and <variant name> are optional.
*/
strcpy(temp, lc); // temp现在也是en_US.UTF-8
/* Parse the language, country, encoding, and variant from the
* locale. Any of the elements may be missing, but they must occur
* in the order language_country.encoding@variant, and must be
* preceded by their delimiter (except for language).
*
* If the locale name (without .encoding@variant, if any) matches
* any of the names in the locale_aliases list, map it to the
* corresponding full locale name. Most of the entries in the
* locale_aliases list are locales that include a language name but
* no country name, and this facility is used to map each language
* to a default country if that's possible. It's also used to map
* the Solaris locale aliases to their proper Java locale IDs.
*/
encoding_variant = malloc(strlen(temp)+1);
if (encoding_variant == NULL) {
free(temp);
JNU_ThrowOutOfMemoryError(env, NULL);
return 0;
}
if ((p = strchr(temp, '.')) != NULL) {
strcpy(encoding_variant, p); /* Copy the leading '.' */
*p = '\0';
} else if ((p = strchr(temp, '@')) != NULL) {
strcpy(encoding_variant, p); /* Copy the leading '@' */
*p = '\0';
} else {
*encoding_variant = '\0';
}
// encoding_variant是.UTF-8,temp是en_US了
if (mapLookup(locale_aliases, temp, &p)) {
old_temp = temp;
temp = realloc(temp, strlen(p)+1);
if (temp == NULL) {
free(old_temp);
free(encoding_variant);
JNU_ThrowOutOfMemoryError(env, NULL);
return 0;
}
strcpy(temp, p);
old_ev = encoding_variant;
encoding_variant = realloc(encoding_variant, strlen(temp)+1);
if (encoding_variant == NULL) {
free(old_ev);
free(temp);
JNU_ThrowOutOfMemoryError(env, NULL);
return 0;
}
// check the "encoding_variant" again, if any.
if ((p = strchr(temp, '.')) != NULL) {
strcpy(encoding_variant, p); /* Copy the leading '.' */
*p = '\0';
} else if ((p = strchr(temp, '@')) != NULL) {
strcpy(encoding_variant, p); /* Copy the leading '@' */
*p = '\0';
}
}
language = temp; // language和temp相同
if ((country = strchr(temp, '_')) != NULL) {
*country++ = '\0';
}
// 原temp被\0截断了,现在temp是en,country是US
p = encoding_variant;
if ((encoding = strchr(p, '.')) != NULL) {
p[encoding++ - p] = '\0';
p = encoding;
} // 去除了.UTF-8中点号之前的字符,现在p和encoding均指向UTF-8
if ((variant = strchr(p, '@')) != NULL) {
p[variant++ - p] = '\0';
}
/* Normalize the language name */
if (std_language != NULL) {
*std_language = "en";
if (language != NULL && mapLookup(language_names, language, std_language) == 0) {
*std_language = malloc(strlen(language)+1);
strcpy(*std_language, language);
}
}
/* Normalize the country name */
if (std_country != NULL && country != NULL) {
if (mapLookup(country_names, country, std_country) == 0) {
*std_country = malloc(strlen(country)+1);
strcpy(*std_country, country);
}
}
/* Normalize the script and variant name. Note that we only use
* variants listed in the mapping array; others are ignored.
*/
if (variant != NULL) {
if (std_script != NULL) {
mapLookup(script_names, variant, std_script);
}
if (std_variant != NULL) {
mapLookup(variant_names, variant, std_variant);
}
}
/* Normalize the encoding name. Note that we IGNORE the string
* 'encoding' extracted from the locale name above. Instead, we use the
* more reliable method of calling nl_langinfo(CODESET). This function
* returns an empty string if no encoding is set for the given locale
* (e.g., the C or POSIX locales); we use the default ISO 8859-1
* converter for such locales.
*/
if (std_encoding != NULL) {
/* OK, not so reliable - nl_langinfo() gives wrong answers on
* Euro locales, in particular. */
if (strcmp(p, "ISO8859-15") == 0)
p = "ISO8859-15";
else
p = nl_langinfo(CODESET); //原来setlocale得到的不是ISO8859-15的话就得用nl_langinfo重新取一遍
/* Convert the bare "646" used on Solaris to a proper IANA name */
if (strcmp(p, "646") == 0)
p = "ISO646-US";
/* return same result nl_langinfo would return for en_UK,
* in order to use optimizations. */
*std_encoding = (*p != '\0') ? p : "ISO8859-1";
/*
* Remap the encoding string to a different value for japanese
* locales on linux so that customized converters are used instead
* of the default converter for "EUC-JP". The customized converters
* omit support for the JIS0212 encoding which is not supported by
* the variant of "EUC-JP" encoding used on linux
*/
if (strcmp(p, "EUC-JP") == 0) {
*std_encoding = "EUC-JP-LINUX";
}
}
free(temp);
free(encoding_variant);
return 1;
}
如果setlocale(LC_CTYPE, NULL)调用所返回的locale中的编码是ISO8859-15,那么系统属性file.encoding即是ISO8859-15,否则是nl_langinfo(CODESET)函数返回的结果。
网友评论