From e58fa0ba66272c5f28828b15d06c7e42a9882b3b Mon Sep 17 00:00:00 2001 From: Jungshik Shin Date: Sat, 16 Dec 2017 04:19:27 +0000 Subject: [PATCH] Use fromUTF8() for UnicodeString construction from UTF-8 Chrome's copy of ICU is built with U_CHARSET_IS_UTF8=1 so that |char *| buffer is treated as UTF-8 when constructing UnicodeString() regardless of the default encoding of the current locale on Linux or non-Unicode code page on Windows. However, some Linux distros do not set U_CHARSET_IS_UTF=1 when building ICU and Chromium build with system_icu crashes when Chromium is run in non-UTF-8 locale (e.g. 'C'). To make Chromium work in a non-UTF-8 locale (which is pretty rare these days), use 'icu::UnicodeString::fromUTF8(StringPiece)' instead of 'icu::UnicodeString(const char*)'. Bug: 772655 Test: components_unittests --gtest_filter=*IDN* Test: Chromium built with system_icu does not crash in C locale. Change-Id: I0daa284ec06b8e83814fc70eb8e9e5c96444ebfa Reviewed-on: https://chromium-review.googlesource.com/831247 Reviewed-by: Peter Kasting Commit-Queue: Jungshik Shin Cr-Commit-Position: refs/heads/master@{#524586} --- components/url_formatter/idn_spoof_checker.cc | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/components/url_formatter/idn_spoof_checker.cc b/components/url_formatter/idn_spoof_checker.cc index a88c5e8f8331..aee748d8a4d5 100644 --- a/components/url_formatter/idn_spoof_checker.cc +++ b/components/url_formatter/idn_spoof_checker.cc @@ -110,8 +110,8 @@ IDNSpoofChecker::IDNSpoofChecker() { // These Cyrillic letters look like Latin. A domain label entirely made of // these letters is blocked as a simplified whole-script-spoofable. - cyrillic_letters_latin_alike_ = - icu::UnicodeSet(icu::UnicodeString("[асԁеһіјӏорԛѕԝхуъЬҽпгѵѡ]"), status); + cyrillic_letters_latin_alike_ = icu::UnicodeSet( + icu::UnicodeString::fromUTF8("[асԁеһіјӏорԛѕԝхуъЬҽпгѵѡ]"), status); cyrillic_letters_latin_alike_.freeze(); cyrillic_letters_ = @@ -141,8 +141,8 @@ IDNSpoofChecker::IDNSpoofChecker() { UParseError parse_error; diacritic_remover_.reset(icu::Transliterator::createFromRules( UNICODE_STRING_SIMPLE("DropAcc"), - icu::UnicodeString("::NFD; ::[:Nonspacing Mark:] Remove; ::NFC;" - " ł > l; ø > o; đ > d;"), + icu::UnicodeString::fromUTF8("::NFD; ::[:Nonspacing Mark:] Remove; ::NFC;" + " ł > l; ø > o; đ > d;"), UTRANS_FORWARD, parse_error, status)); // Supplement the Unicode confusable list by the following mapping. @@ -158,7 +158,7 @@ IDNSpoofChecker::IDNSpoofChecker() { // - U+0D1F (ട) => s extra_confusable_mapper_.reset(icu::Transliterator::createFromRules( UNICODE_STRING_SIMPLE("ExtraConf"), - icu::UnicodeString( + icu::UnicodeString::fromUTF8( "ӏ > l; [кĸκ] > k; п > n; [ƅь] > b; в > b; м > m; н > h; " "т > t; [шщ] > w; ട > s;"), UTRANS_FORWARD, parse_error, status)); -- 2.15.1