From 1ce42d1001139a9168e9451dbd48a6daef95c691 Mon Sep 17 00:00:00 2001 From: Eike Rathke Date: Wed, 30 Apr 2014 16:51:05 +0000 Subject: resolve crashes with ICU 53.1 in locales with collator data, fdo#77071 ICU 53.1 changed API behavior: https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RuleBasedCollator.html#a2f4c7eeaf020ad68e3bd9722dd272357 isn't correct anymore: length: size of the image. If negative, the API will try to figure out the length of the image NO, IT WILL NOT! It bails out with an error instead. Introduced a function to obtain the length for each collator data. base: collator, usually root. The base is required to be present through the lifetime of the collator. Currently it cannot be NULL. NOT "usually root"! There's a check now that bails out if if(base->tailoring != CollationRoot::getRoot(errorCode)) So using an instance created with icu::Locale::getRoot() (cherry picked from commit a3c627fe38236e82bc6008075d862b3cbfbd9ce3) Conflicts: i18npool/source/collator/collator_unicode.cxx Backported. make DISABLE_DYNLOADING on Android happy, fdo#77071 related (cherry picked from commit dc7ba1af236ec28d399eff833d56608fde9fb70d) Change-Id: Ia9c4e27d5ef4f8083bbe57e4e2f4b3ff63bb42ed Reviewed-on: https://gerrit.libreoffice.org/9215 Reviewed-by: Caolán McNamara Tested-by: Caolán McNamara --- diff --git a/i18npool/source/collator/collator_unicode.cxx b/i18npool/source/collator/collator_unicode.cxx index 90dd2f3b..42dfef3 100644 --- a/i18npool/source/collator/collator_unicode.cxx +++ b/i18npool/source/collator/collator_unicode.cxx @@ -84,6 +84,27 @@ const sal_uInt8* get_collator_data_zh_radical(); const sal_uInt8* get_collator_data_zh_stroke(); const sal_uInt8* get_collator_data_zh_zhuyin(); +size_t get_collator_data_ca_charset_length(); +size_t get_collator_data_dz_charset_length(); +size_t get_collator_data_hu_charset_length(); +size_t get_collator_data_ja_charset_length(); +size_t get_collator_data_ja_phonetic_alphanumeric_first_length(); +size_t get_collator_data_ja_phonetic_alphanumeric_last_length(); +size_t get_collator_data_ko_charset_length(); +size_t get_collator_data_ku_alphanumeric_length(); +size_t get_collator_data_ln_charset_length(); +size_t get_collator_data_my_dictionary_length(); +size_t get_collator_data_ne_charset_length(); +size_t get_collator_data_sid_charset_length(); +size_t get_collator_data_zh_TW_charset_length(); +size_t get_collator_data_zh_TW_radical_length(); +size_t get_collator_data_zh_TW_stroke_length(); +size_t get_collator_data_zh_charset_length(); +size_t get_collator_data_zh_pinyin_length(); +size_t get_collator_data_zh_radical_length(); +size_t get_collator_data_zh_stroke_length(); +size_t get_collator_data_zh_zhuyin_length(); + } #endif @@ -120,6 +141,7 @@ Collator_Unicode::loadCollatorAlgorithm(const OUString& rAlgorithm, const lang:: } if (!collator && OUString::createFromAscii(LOCAL_RULE_LANGS).indexOf(rLocale.Language) >= 0) { const sal_uInt8* (*func)() = NULL; + size_t (*funclen)() = NULL; #ifndef DISABLE_DYNLOADING OUStringBuffer aBuf; @@ -132,11 +154,21 @@ Collator_Unicode::loadCollatorAlgorithm(const OUString& rAlgorithm, const lang:: aBuf.appendAscii("get_").append(rLocale.Language).appendAscii("_"); if ( rLocale.Language == "zh" ) { OUString func_base = aBuf.makeStringAndClear(); + OUString funclen_base = func_base + "_length"; if (OUString("TW HK MO").indexOf(rLocale.Country) >= 0) - func=(const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule, + { + func = (const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule, OUString(func_base + "TW_" + rAlgorithm).pData); + funclen = (size_t (*)()) osl_getFunctionSymbol(hModule, + OUString(funclen_base + "TW_" + rAlgorithm).pData); + } if (!func) - func=(const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule, OUString(func_base + rAlgorithm).pData); + { + func = (const sal_uInt8* (*)()) osl_getFunctionSymbol( + hModule, OUString(func_base + rAlgorithm).pData); + funclen = (size_t (*)()) osl_getFunctionSymbol( + hModule, OUString(funclen_base + rAlgorithm).pData); + } } else { if ( rLocale.Language == "ja" ) { // replace algorithm name to implementation name. @@ -149,72 +181,147 @@ Collator_Unicode::loadCollatorAlgorithm(const OUString& rAlgorithm, const lang:: } else { aBuf.append(rAlgorithm); } - func=(const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule, aBuf.makeStringAndClear().pData); + OUString func_base = aBuf.makeStringAndClear(); + OUString funclen_base = func_base + "_length"; + func = (const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule, func_base.pData); + funclen = (size_t (*)()) osl_getFunctionSymbol(hModule, funclen_base.pData); } } #else if ( rLocale.Language == "ca" ) { if ( rAlgorithm == "charset" ) + { func = get_collator_data_ca_charset; + funclen = get_collator_data_ca_charset_length; + } } else if ( rLocale.Language == "dz" || rLocale.Language == "bo" ) { // 'bo' Tibetan uses the same collation rules as 'dz' Dzongkha if ( rAlgorithm == "charset" ) + { func = get_collator_data_dz_charset; + funclen = get_collator_data_dz_charset_length; + } } else if ( rLocale.Language == "hu" ) { if ( rAlgorithm == "charset" ) + { func = get_collator_data_hu_charset; + funclen = get_collator_data_hu_charset_length; + } } else if ( rLocale.Language == "ja" ) { if ( rAlgorithm == "charset" ) + { func = get_collator_data_ja_charset; + funclen = get_collator_data_ja_charset_length; + } else if ( rAlgorithm == "phonetic (alphanumeric first)" ) + { func = get_collator_data_ja_phonetic_alphanumeric_first; + funclen = get_collator_data_ja_phonetic_alphanumeric_first_length; + } else if ( rAlgorithm == "phonetic (alphanumeric last)" ) + { func = get_collator_data_ja_phonetic_alphanumeric_last; + funclen = get_collator_data_ja_phonetic_alphanumeric_last_length; + } #if (U_ICU_VERSION_MAJOR_NUM < 53) } else if ( rLocale.Language == "ko" ) { if ( rAlgorithm == "charset" ) + { func = get_collator_data_ko_charset; + funclen = get_collator_data_ko_charset_length; + } #endif } else if ( rLocale.Language == "ku" ) { if ( rAlgorithm == "alphanumeric" ) + { func = get_collator_data_ku_alphanumeric; + funclen = get_collator_data_ku_alphanumeric_length; + } } else if ( rLocale.Language == "ln" ) { if ( rAlgorithm == "charset" ) + { func = get_collator_data_ln_charset; + funclen = get_collator_data_ln_charset_length; + } } else if ( rLocale.Language == "my" ) { if ( rAlgorithm == "dictionary" ) + { func = get_collator_data_my_dictionary; + funclen = get_collator_data_my_dictionary_length; + } } else if ( rLocale.Language == "ne" ) { if ( rAlgorithm == "charset" ) + { func = get_collator_data_ne_charset; + funclen = get_collator_data_ne_charset_length; + } } else if ( rLocale.Language == "sid" ) { if ( rAlgorithm == "charset" ) + { func = get_collator_data_sid_charset; + funclen = get_collator_data_sid_charset_length; + } } else if ( rLocale.Language == "zh" && (rLocale.Country == "TW" || rLocale.Country == "HK" || rLocale.Country == "MO") ) { if ( rAlgorithm == "charset" ) + { func = get_collator_data_zh_TW_charset; + funclen = get_collator_data_zh_TW_charset_length; + } else if ( rAlgorithm == "radical" ) + { func = get_collator_data_zh_TW_radical; + funclen = get_collator_data_zh_TW_radical_length; + } else if ( rAlgorithm == "stroke" ) + { func = get_collator_data_zh_TW_stroke; + funclen = get_collator_data_zh_TW_stroke_length; + } } else if ( rLocale.Language == "zh" ) { if ( rAlgorithm == "charset" ) + { func = get_collator_data_zh_charset; + funclen = get_collator_data_zh_charset_length; + } else if ( rAlgorithm == "pinyin" ) + { func = get_collator_data_zh_pinyin; + funclen = get_collator_data_zh_pinyin_length; + } else if ( rAlgorithm == "radical" ) + { func = get_collator_data_zh_radical; + funclen = get_collator_data_zh_radical_length; + } else if ( rAlgorithm == "stroke" ) + { func = get_collator_data_zh_stroke; + funclen = get_collator_data_zh_stroke_length; + } else if ( rAlgorithm == "zhuyin" ) + { func = get_collator_data_zh_zhuyin; + funclen = get_collator_data_zh_zhuyin_length; + } } #endif - if (func) { + if (func && funclen) { const sal_uInt8* ruleImage=func(); - uca_base = new RuleBasedCollator(static_cast(NULL), status); + size_t ruleImageSize = funclen(); + // Not only changed ICU 53.1 the API behavior that a negative + // length (ruleImageSize) now leads to failure, but also that + // the base RuleBasedCollator passed as uca_base here needs to + // have a base->tailoring == CollationRoot::getRoot() otherwise + // the init bails out as well, as it does for the previously + // used "empty" RuleBasedCollator. + // The default collator of the en-US locale would also fulfill + // the requirement. The collator of the actual locale or the + // NULL (default) locale does not. + uca_base = static_cast(icu::Collator::createInstance( + icu::Locale::getRoot(), status)); if (! U_SUCCESS(status)) throw RuntimeException(); - collator = new RuleBasedCollator(reinterpret_cast(ruleImage), -1, uca_base, status); + collator = new RuleBasedCollator( + reinterpret_cast(ruleImage), ruleImageSize, uca_base, status); if (! U_SUCCESS(status)) throw RuntimeException(); } } diff --git a/i18npool/source/collator/gencoll_rule.cxx b/i18npool/source/collator/gencoll_rule.cxx index a801545..5ba9f5d 100644 --- a/i18npool/source/collator/gencoll_rule.cxx +++ b/i18npool/source/collator/gencoll_rule.cxx @@ -65,8 +65,10 @@ void data_write(char* file, char* name, sal_uInt8 *data, sal_Int32 len) fprintf(fp, "#ifndef DISABLE_DYNLOADING\n"); fprintf(fp, "SAL_DLLPUBLIC_EXPORT const sal_uInt8* get_%s() { return %s; }\n", name, name); + fprintf(fp, "SAL_DLLPUBLIC_EXPORT size_t get_%s_length() { return sizeof(%s); }\n", name, name); fprintf(fp, "#else\n"); fprintf(fp, "SAL_DLLPUBLIC_EXPORT const sal_uInt8* get_collator_data_%s() { return %s; }\n", name, name); + fprintf(fp, "SAL_DLLPUBLIC_EXPORT size_t get_collator_data_%s_length() { return sizeof(%s); }\n", name, name); fprintf(fp, "#endif\n"); fprintf(fp, "\n"); fprintf (fp, "}\n"); -- cgit v0.9.0.2-2-gbebe