/** * * Description : A simple snippet to demonstrate how to extract Arabic Words in Vala programming language * Vala version : 0.7.8 * Developed by : Emad Al-Bloushi * Date : Mon 23 Nov, 2009 * Compile with : valac --pkg gee-1.0 word.vala * **/ using Gee; static int main (string[] args) { string filename = "arabic-file.txt"; string content; StringBuilder word = new StringBuilder(); var word_list = new HashSet<string> (); try { FileUtils.get_contents (filename,out content); } catch (FileError e) { stderr.printf("%s\n",e.message ); return 1; } for (weak string s = content; s.get_char ()!=0 ; s = s.next_char ()) { unichar unichar_content = s.get_char (); UnicodeType unicode_type = unichar_content.type (); switch (unicode_type) { //case unicode_type.UPPERCASE_LETTER: //case unicode_type.LOWERCASE_LETTER: case unicode_type.OTHER_LETTER: case unicode_type.DECIMAL_NUMBER: case unicode_type.OTHER_NUMBER: if ( unichar_content == 'ۃ' || unichar_content == 'ة' ) { // append the last character to complete the word word.append_unichar(unichar_content); // add the word to the HashSet word_list object word_list.add(word.str); // empty and create new word object word = new StringBuilder(); } else { word.append_unichar(unichar_content); } break; case unicode_type.MODIFIER_LETTER: // append ARABIC TATWEEL character 0640 if (unichar_content == 'ـ') { word.append_unichar(unichar_content); } break; case unicode_type.NON_SPACING_MARK: word.append_unichar(unichar_content); break; case unicode_type.FORMAT: case unicode_type.CONTROL: case unicode_type.SPACE_SEPARATOR: case unicode_type.LINE_SEPARATOR: case unicode_type.PARAGRAPH_SEPARATOR: case unicode_type.CONNECT_PUNCTUATION: case unicode_type.OPEN_PUNCTUATION: case unicode_type.CLOSE_PUNCTUATION: case unicode_type.INITIAL_PUNCTUATION: case unicode_type.DASH_PUNCTUATION: case unicode_type.FINAL_PUNCTUATION: case unicode_type.OTHER_PUNCTUATION: case unicode_type.MATH_SYMBOL: case unicode_type.CURRENCY_SYMBOL: case unicode_type.OTHER_SYMBOL: // add the word to the HashSet word_list object word_list.add(word.str); // empty and create new word object word = new StringBuilder(); break; } // end of switch statement } word = new StringBuilder(); foreach (string s in word_list ) { word.append(s+"\n"); stdout.printf ("Word : %s\n", s); } try { FileUtils.set_contents ("word_list.txt",word.str); } catch (FileError e) { stderr.printf("%s\n",e.message ); return 1; } return 0; }