fixed rtl mixed support mode so that words are properly placed in the sentence

This commit is contained in:
Mark Vejvoda 2013-11-14 22:17:30 +00:00
parent 84ea30445d
commit f0737ac682
3 changed files with 137 additions and 289 deletions

View File

@ -112,7 +112,6 @@ public:
int getSize() const;
void setSize(int size);
static std::vector<std::pair<char, int> > extract_mixed_LTR_RTL_map(string &str_);
static void bidi_cvt(string &str_);
static void resetToDefaults();

View File

@ -317,215 +317,6 @@ bool is_ASCII(const int &c) {
return !is_non_ASCII(c);
}
bool prev_word_is_ASCII(const string &str_,int end_index) {
bool result = false;
if(end_index < 0) {
//printf("Line: %d str [%s] end_index: %d\n",__LINE__,str_.substr(end_index).c_str(),end_index);
return result;
}
int start_index = end_index;
//printf("Line: %d str [%s] end_index: %d word [%s]\n",__LINE__,str_.c_str(),end_index,str_.substr(end_index).c_str());
for (; start_index >= 0; --start_index) {
if(str_[start_index] == ' ') {
start_index++;
break;
}
// if(str_.substr(start_index,2) == "\\n") {
// start_index+=2;
// break;
// }
}
if(start_index < 0) {
start_index = 0;
}
//printf("Line: %d start_index: %d end_index: %d\n",__LINE__,start_index,end_index);
if(end_index >= 0) {
if(start_index == end_index) {
// another space
// !!! not sure what to do!
//printf("Line: %d [%s]\n",__LINE__,str_.substr(start_index).c_str());
if(str_[start_index] == ' ') {
return prev_word_is_ASCII(str_,start_index-1);
}
else {
return isalnum(str_[start_index]) != 0;
}
}
else {
int length = end_index-start_index+1;
string word = str_.substr(start_index,length);
//printf("Line: %d word [%s] length: %d\n",__LINE__,word.c_str(),length);
for(int index = 0; index < word.size(); ++index) {
//printf("%c = %d,",word[index],isalnum(word[index]));
if(isalnum(word[index]) != 0) {
//printf("Prev %c = %d [%d] [%s],",word[index],isalnum(word[index]),index,(index > 0 ? word.substr(index-1,2).c_str() : "n/a"));
// if(index > 0 && word.substr(index-1,2) == "\\n") {
// continue;
// }
result = true;
break;
}
}
//printf("Line: %d result = %d\n",__LINE__,result);
}
}
return result;
}
bool next_word_is_ASCII(const string &str_,int start_index) {
bool result = false;
if(start_index >= str_.size()) {
//printf("Line: %d str [%s] start_index: %d\n",__LINE__,str_.substr(start_index).c_str(),start_index);
return result;
}
int end_index = start_index;
//printf("Line: %d str [%s] start_index: %d\n",__LINE__,str_.c_str(),start_index);
for (; end_index < str_.size(); ++end_index) {
if(str_[end_index] == ' ') {
end_index--;
break;
}
// if(str_.substr(end_index,2) == "\\n") {
// end_index-=2;
// break;
// }
}
if(end_index >= str_.size()) {
end_index = (int)str_.size()-1;
}
//printf("Line: %d start_index: %d end_index: %d\n",__LINE__,start_index,end_index);
if(start_index >= 0) {
if(start_index == end_index) {
// another space
// !!! not sure what to do!
//printf("Line: %d [%s]\n",__LINE__,str_.substr(start_index).c_str());
if(str_[start_index] == ' ') {
return next_word_is_ASCII(str_,end_index+1);
}
else {
return isalnum(str_[start_index]) != 0;
}
}
else {
int length = end_index-start_index+1;
string word = str_.substr(start_index,length);
//printf("Line: %d word [%s] length: %d\n",__LINE__,word.c_str(),length);
//int alphaCount = 0;
for(int index = 0; index < word.size(); ++index) {
//printf("%c = %d,",word[index],isalnum(word[index]));
if(isalnum(word[index]) != 0) {
//printf("Next %c = %d [%d] [%s],",word[index],isalnum(word[index]),index,(index > 0 ? word.substr(index-1,2).c_str() : "n/a"));
// if(index > 0 && word.substr(index-1,2) == "\\n") {
// continue;
// }
result = true;
break;
}
}
//printf("Line: %d result = %d\n",__LINE__,result);
}
}
return result;
}
vector<pair<char, int> > Font::extract_mixed_LTR_RTL_map(string &str_) {
vector<pair<char, int> > ascii_char_map;
// replaceAll(str_, "\\n", " \\n ");
for (int index = 0; index < str_.size(); ++index) {
if(is_ASCII(str_[index]) == true) {
if(str_[index] == ' ') {
// Check both sides of the space to see what to do with it
if(prev_word_is_ASCII(str_,index-1) == false) {
//printf("#1 Prev Skip %d [%s]\n",index,str_.substr(index).c_str());
if(next_word_is_ASCII(str_,index+1) == false) {
//printf("#2 Prev Skip %d [%s]\n",index,str_.substr(index).c_str());
//printf("#1 Keep %d [%s]\n",index,str_.substr(index).c_str());
continue;
}
}
// if(next_word_is_ASCII(str_,index+1) == false) {
// //printf("Next Skip %d [%s]\n",index,str_.substr(index).c_str());
// //printf("#2 Keep %d [%s]\n",index,str_.substr(index).c_str());
// continue;
// }
}
// else if(str_.substr(index,2) == "\\n" ||
// (index-1 >= 0 && str_.substr(index-1,2) == "\\n")) {
////
//// //printf("Next Skip %d [%s]\n",index,str_.substr(index).c_str());
//// //printf("#3 Keep %d [%s]\n",index,str_.substr(index).c_str());
////
// //printf("Newline Skip %d [%s]\n",index,str_.substr(index).c_str());
// continue;
// }
// previous character is a space
else if(index-1 >= 0 && str_[index-1]== ' ') {
if(index+1 < str_.size() && str_[index+1] != ' ' &&
next_word_is_ASCII(str_,index+1) == false) {
//printf("Next Skip %d [%s]\n",index,str_.substr(index).c_str());
//printf("#3 Keep %d [%s]\n",index,str_.substr(index).c_str());
continue;
}
}
// next character is a space
else if(index+1 < str_.size() && str_[index+1] == ' ') {
if(index-1 >= 0 && str_[index-1] != ' ' &&
prev_word_is_ASCII(str_,index-1) == false) {
//printf("Next Skip %d [%s]\n",index,str_.substr(index).c_str());
//printf("#4 Keep %d [%s]\n",index,str_.substr(index).c_str());
continue;
}
}
else if(index-1 >= 0 && prev_word_is_ASCII(str_,index-1) == false) {
// //printf("Next Skip %d [%s]\n",index,str_.substr(index).c_str());
//printf("#5 Keep %d [%s] alpha: %d\n",index,str_.substr(index).c_str(),isalnum(str_[index-1]));
if(index+1 < str_.size() && next_word_is_ASCII(str_,index+1) == false) {
continue;
}
else if(index+1 >= str_.size()) {
continue;
}
}
else if(index+1 < str_.size() && next_word_is_ASCII(str_,index+1) == false) {
//
// //printf("Next Skip %d [%s]\n",index,str_.substr(index).c_str());
//printf("#6 Keep %d [%s] alpha: %d\n",index,str_.substr(index).c_str(),isalnum(str_[index+1]));
if(index-1 >= 0 && prev_word_is_ASCII(str_,index-1) == false) {
continue;
}
else if(index-1 < 0) {
continue;
}
}
}
else {
//printf("#5 Keep %d [%s]\n",index,str_.substr(index).c_str());
continue;
}
//printf("Removal %d [%s]\n",index,str_.substr(index).c_str());
ascii_char_map.push_back(make_pair(str_[index],index));
}
for (int index = (int)ascii_char_map.size()-1; index >= 0; --index) {
str_.erase(ascii_char_map[index].second,1);
}
return ascii_char_map;
}
void Font::bidi_cvt(string &str_) {
/*
@ -612,94 +403,158 @@ void Font::bidi_cvt(string &str_) {
str_ = lines[lineIndex];
//printf("Line: %d [%s]\n",lineIndex,str_.c_str());
vector<pair<char, int> > ascii_char_map;
if(Font::fontSupportMixedRightToLeft == true) {
ascii_char_map = extract_mixed_LTR_RTL_map(str_);
vector<string> words;
if(str_.find(" ") != str_.npos) {
Tokenize(str_,words," ");
}
else {
words.push_back(str_);
}
//FriBidi C string holding the original text (that is probably with logical hebrew)
FriBidiChar *logical = NULL;
//FriBidi C string for the output text (that should be visual hebrew)
FriBidiChar *visual = NULL;
vector<string> wordList;
wordList.reserve(words.size());
vector<string> nonASCIIWordList;
nonASCIIWordList.reserve(words.size());
FriBidiStrIndex *ltov = NULL;
FriBidiStrIndex *vtol = NULL;
for(int wordIndex = 0; wordIndex < words.size(); ++wordIndex) {
//if(wordIndex > 0) {
// new_value += " ";
//}
str_ = words[wordIndex];
//C string holding the originall text (not nnecessarily as unicode)
char *ip = NULL;
//C string for the output text (not necessarily as unicode)
char *op = NULL;
//printf("Word: %d [%s]\n",wordIndex,str_.c_str());
//Size to allocate for the char arrays
int size = (int)str_.size() + 2;
//FriBidi C string holding the original text (that is probably with logical hebrew)
FriBidiChar *logical = NULL;
//FriBidi C string for the output text (that should be visual hebrew)
FriBidiChar *visual = NULL;
//Allocate memory:
//It's probably way too much, but at least it's not too little
logical = new FriBidiChar[size * 3];
visual = new FriBidiChar[size * 3];
ip = new char[size * 3];
op = new char[size * 3];
FriBidiStrIndex *ltov = NULL;
FriBidiStrIndex *vtol = NULL;
ltov = new FriBidiStrIndex[size * 3];
vtol = new FriBidiStrIndex[size * 3];
//C string holding the originall text (not nnecessarily as unicode)
char *ip = NULL;
//C string for the output text (not necessarily as unicode)
char *op = NULL;
FriBidiCharType base;
size_t len;
//Size to allocate for the char arrays
int size = (int)str_.size() + 2;
//A bool type to see if conversion succeded
fribidi_boolean log2vis;
//Allocate memory:
//It's probably way too much, but at least it's not too little
logical = new FriBidiChar[size * 3];
visual = new FriBidiChar[size * 3];
ip = new char[size * 3];
op = new char[size * 3];
//Holds information telling fribidi to use UTF-8
FriBidiCharSet char_set_num;
char_set_num = fribidi_parse_charset ("UTF-8");
ltov = new FriBidiStrIndex[size * 3];
vtol = new FriBidiStrIndex[size * 3];
//Copy the given to string into the ip string
strcpy(ip, str_.c_str());
FriBidiCharType base;
size_t len;
//Find length of originall text
len = strlen( ip );
//A bool type to see if conversion succeded
fribidi_boolean log2vis;
//Insert ip to logical as unicode (and find it's size now)
len = fribidi_charset_to_unicode (char_set_num, ip, (FriBidiStrIndex)len, logical);
//Holds information telling fribidi to use UTF-8
FriBidiCharSet char_set_num;
char_set_num = fribidi_parse_charset ("UTF-8");
base = FRIBIDI_TYPE_ON;
//Copy the given to string into the ip string
strcpy(ip, str_.c_str());
//printf("STRIPPED: [%s]\n",str_.c_str());
//Find length of originall text
len = strlen( ip );
//Convert logical text to visual
log2vis = fribidi_log2vis (logical, (FriBidiStrIndex)len, &base, visual, ltov, vtol, NULL);
//Insert ip to logical as unicode (and find it's size now)
len = fribidi_charset_to_unicode (char_set_num, ip, (FriBidiStrIndex)len, logical);
//If convertion was successful
if(log2vis)
{
//Remove bidi marks (that we don't need) from the output text
len = fribidi_remove_bidi_marks (visual, (FriBidiStrIndex)len, ltov, vtol, NULL);
base = FRIBIDI_TYPE_ON;
//Convert unicode string back to the encoding the input string was in
fribidi_unicode_to_charset ( char_set_num, visual, (FriBidiStrIndex)len ,op);
//printf("STRIPPED: [%s]\n",str_.c_str());
//Insert the output string into the result
str_ = op;
//Convert logical text to visual
log2vis = fribidi_log2vis (logical, (FriBidiStrIndex)len, &base, visual, ltov, vtol, NULL);
//printf("LOG2VIS: [%s]\n",str_.c_str());
bool is_converted = false;
//If convertion was successful
if(log2vis)
{
//Remove bidi marks (that we don't need) from the output text
len = fribidi_remove_bidi_marks (visual, (FriBidiStrIndex)len, ltov, vtol, NULL);
if(ascii_char_map.empty() == false) {
for (int index = 0; index < (int)ascii_char_map.size(); ++index) {
str_.insert(ascii_char_map[index].second,1,ascii_char_map[index].first);
//Convert unicode string back to the encoding the input string was in
fribidi_unicode_to_charset ( char_set_num, visual, (FriBidiStrIndex)len ,op);
if(string(op) != str_) {
is_converted = true;
}
//Insert the output string into the result
str_ = op;
//printf("LOG2VIS: [%s]\n",str_.c_str());
// if(ascii_char_map.empty() == false) {
// for (int index = 0; index < (int)ascii_char_map.size(); ++index) {
// str_.insert(ascii_char_map[index].second,1,ascii_char_map[index].first);
// }
// }
//printf("AFTER: [%s]\n",str_.c_str());
}
//printf("AFTER: [%s]\n",str_.c_str());
//Free allocated memory
delete [] ltov;
delete [] vtol;
delete [] visual;
delete [] logical;
delete [] ip;
delete [] op;
if(Font::fontSupportMixedRightToLeft == true) {
if(is_converted == true) {
nonASCIIWordList.push_back(str_);
if(wordIndex+1 == words.size()) {
if(nonASCIIWordList.size() > 1) {
std::reverse(nonASCIIWordList.begin(),nonASCIIWordList.end());
copy(nonASCIIWordList.begin(), nonASCIIWordList.end(), std::inserter(wordList, wordList.begin()));
}
else {
if(wordList.empty() == false) {
copy(nonASCIIWordList.begin(), nonASCIIWordList.end(), std::inserter(wordList, wordList.begin()+wordList.size()));
}
else {
wordList = nonASCIIWordList;
}
}
}
}
else {
if(nonASCIIWordList.size() > 1) {
std::reverse(nonASCIIWordList.begin(),nonASCIIWordList.end());
}
copy(nonASCIIWordList.begin(), nonASCIIWordList.end(), std::inserter(wordList, wordList.begin()));
nonASCIIWordList.clear();
wordList.push_back(str_);
}
}
else {
wordList.push_back(str_);
}
}
//Free allocated memory
delete [] ltov;
delete [] vtol;
delete [] visual;
delete [] logical;
delete [] ip;
delete [] op;
new_value += str_;
//printf("Building New Line: %d [%s]\n",lineIndex,new_value.c_str());
for(int wordIndex = 0; wordIndex < wordList.size(); ++wordIndex) {
//printf("wordIndex: %d [%s]\n",wordIndex,wordList[wordIndex].c_str());
if(wordIndex > 0) {
new_value += " ";
}
new_value += wordList[wordIndex];
}
//printf("New Line: %d [%s]\n",lineIndex,new_value.c_str());
}
str_ = new_value;
//printf("NEW: [%s]\n",str_.c_str());

View File

@ -39,7 +39,6 @@ class FontTest : public CppUnit::TestFixture {
public:
void test_bidi_newline_handling() {
string text = "\n\nHP: 9000/9000\nArmor: 0 (Stone)\nSight: 15\nProduce Slave";
string expected = text;
#ifdef HAVE_FRIBIDI
@ -47,7 +46,6 @@ public:
//printf("Expected: [%s] result[%s]\n",expected.c_str(),text.c_str());
CPPUNIT_ASSERT_EQUAL( expected,text );
#endif
}
void test_LTR_RTL_Mixed() {
Font::fontSupportMixedRightToLeft = true;
@ -56,14 +54,11 @@ public:
string expected = IntroText1;
CPPUNIT_ASSERT_EQUAL( 45,(int)IntroText1.size() );
std::vector<std::pair<char, int> > result = Font::extract_mixed_LTR_RTL_map(IntroText1);
CPPUNIT_ASSERT_EQUAL( 30, (int)result.size() );
#ifdef HAVE_FRIBIDI
IntroText1 = expected;
Font::bidi_cvt(IntroText1);
CPPUNIT_ASSERT_EQUAL( 45,(int)IntroText1.size() );
//CPPUNIT_ASSERT_EQUAL( 45,(int)IntroText1.size() );
CPPUNIT_ASSERT_EQUAL( string("לע ססובמ"),IntroText1.substr(0, 15) );
CPPUNIT_ASSERT_EQUAL( string("\"award-winning classic Glest\""),IntroText1.substr(16) );
#endif
@ -75,9 +70,6 @@ public:
string expected2 = LuaDisableSecuritySandbox;
CPPUNIT_ASSERT_EQUAL( 44,(int)LuaDisableSecuritySandbox.size() );
result = Font::extract_mixed_LTR_RTL_map(LuaDisableSecuritySandbox);
CPPUNIT_ASSERT_EQUAL( 4, (int)result.size() );
//printf("Result: [%s]\n",LuaDisableSecuritySandbox.c_str());
#ifdef HAVE_FRIBIDI
@ -129,17 +121,19 @@ public:
#endif
// This test still failing: xx IP xx
string LanIP = "כתובות IP מקומי:192.168.0.150 ( 61357 / 61357 )";
string LanIP = "כתובות IP מקומי:192.168.0.150 ( 61357 / 61357 )";
string expected5 = LanIP;
CPPUNIT_ASSERT_EQUAL( 59,(int)LanIP.size() );
//printf("LanIP [%s]\n",LanIP.c_str());
CPPUNIT_ASSERT_EQUAL( 58,(int)LanIP.size() );
#ifdef HAVE_FRIBIDI
// Font::bidi_cvt(LanIP);
//
// CPPUNIT_ASSERT_EQUAL( 59,(int)LanIP.size() );
// string expected_result5 = "abc";
//
// CPPUNIT_ASSERT_EQUAL( expected_result5,LanIP );
Font::bidi_cvt(LanIP);
CPPUNIT_ASSERT_EQUAL( 58,(int)LanIP.size() );
string expected_result5 = "192.168.0.150:ימוקמ תובותכ IP ( 61357 / 61357 )";
CPPUNIT_ASSERT_EQUAL( expected_result5,LanIP );
#endif
}
};