chatngay_chatboard_dev/src/lib/vietnamese.ts

//enhanced function to isStringFound
//which search for vietnamese and non-vietnamese in main_str
//does not care about cases as well
export function isFound(sub_str: string, main_str: string) {
    const sub_str_unique  = unvietnamese(sub_str).toLowerCase();
    const main_str_unique = getUniqueWords(unvietnamese(main_str) +" " + chuyenKhongdau(main_str)).toLowerCase();

    return isStringFound(sub_str_unique, main_str_unique);
}


function isStringFound(sub_str: string, main_str: string) : boolean{
    const test_sub_str  = sub_str.trim();

    //empty str should fail
    if( test_sub_str.length === 0) return false;

    //start
    const sub_str_parts = test_sub_str.split(" ");
    let is_all_parts_found = true;
    let test_part;
    for (let i = 0, total_part = sub_str_parts.length; i < total_part ; i++) {
        test_part = sub_str_parts[i].trim();
        //test if part in the main_str, if not then we dont need further test
        if(test_part.length > 0 && main_str.indexOf(test_part) === -1 ) {
            is_all_parts_found = false;
            break;
        }
    }

    return is_all_parts_found;
}


function unvietnamese(str: string){
    let replacer = getVietnameseEnglishEquivalent();
    return replaceAll(str, replacer);
}


//credit: stackoverflow
function replaceAll(str: string, mapObj: any) {
    let re = new RegExp(Object.keys(mapObj).join("|"), "gi");

    return (str+'').replace(re, function (matched) {
        return mapObj[matched]
    })
}


//28-10-2015
//matching Vietnamese special characters to English equivalent
//used in some functions around the system like Search->sanitizeVietnamese($txt), ListView::buildSqlEquation
function getVietnameseEnglishEquivalent(){
    return {
        "đ" : "dd",
        "Đ" : "DD",

        "ó" : 'os',
        "ỏ" : 'or',
        "ò" : 'of',
        "ọ" : 'oj',
        "õ" : 'ox',

        "ô" : 'oo',
        "ỗ" : 'oox',
        "ổ" : 'oor',
        "ồ" : 'oof',
        "ố" : 'oos',
        "ộ" : 'ooj',

        "ơ" : 'ow',
        "ỡ" : 'owx',
        "ớ" : 'ows',
        "ờ" : 'owf',
        "ở" : 'owr',
        "ợ" : 'owj',

        "Ó" : 'OS',
        "Ỏ" : 'OR',
        "Ò" : 'OF',
        "Ọ" : 'OJ',
        "Õ" : 'OX',

        "Ô" : 'OO',
        "Ỗ" : 'OOX',
        "Ổ" : 'OOR',
        "Ồ" : 'OOF',
        "Ố" : 'OOS',
        "Ộ" : 'OOJ',

        "Ơ" : 'OW',
        "Ỡ" : 'OWX',
        "Ớ" : 'OWS',
        "Ờ" : 'OWF',
        "Ở" : 'OWR',
        "Ợ" : 'OWJ',

        "ì" : 'if',
        "í" : 'is',
        "ỉ" : 'ir',
        "ĩ" : 'ix',
        "ị" : 'ij',

        "Ì" : 'IF',
        "Í" : 'IS',
        "Ỉ" : 'IR',
        "Ĩ" : 'IX',
        "Ị" : 'IJ',

        "ê" : 'ee',
        "ệ" : 'eej',
        "ế" : 'ees',
        "ể" : 'eer',
        "ễ" : 'eex',
        "ề" : 'eef',

        "é" : 'es',
        "ẹ" : 'ej',
        "ẽ" : 'ex',
        "è" : 'ef',
        "ẻ" : 'er',

        "Ê" : 'EE',
        "Ệ" : 'EEJ',
        "Ế" : 'EES',
        "Ể" : 'EER',
        "Ễ" : 'EEX',
        "Ề" : 'EEF',

        "É" : 'ES',
        "Ẹ" : 'EJ',
        "Ẽ" : 'EX',
        "È" : 'EF',
        "Ẻ" : 'ER',

        "ả" : 'ar',
        "á" : 'as',
        "ạ" : 'aj',
        "ã" : 'ax',
        "à" : 'af',

        "â" : 'aa',
        "ẩ" : 'aar',
        "ấ" : 'aas',
        "ầ" : 'aaf',
        "ậ" : 'aaj',
        "ẫ" : 'aax',

        "ă" : 'aw',
        "ẳ" : 'awr',
        "ắ" : 'aws',
        "ằ" : 'awf',
        "ặ" : 'awj',
        "ẵ" : 'awx',

        "Ả" : 'AR',
        "Á" : 'AS',
        "Ạ" : 'AJ',
        "Ã" : 'AX',
        "À" : 'AF',

        "Â" : 'AA',
        "Ẩ" : 'AAR',
        "Ấ" : 'AAS',
        "Ầ" : 'AAF',
        "Ậ" : 'AAJ',
        "Ẫ" : 'AAX',

        "Ă" : 'AW',
        "Ẳ" : 'AWR',
        "Ắ" : 'AWS',
        "Ằ" : 'AWF',
        "Ặ" : 'AWJ',
        "Ẵ" : 'AWX',

        "ũ" : 'ux',
        "ụ" : 'uj',
        "ú" : 'us',
        "ủ" : 'ur',
        "ù" : 'uf',

        "ư" : 'uw',
        "ữ" : 'uwx',
        "ự" : 'uwj',
        "ứ" : 'uws',
        "ử" : 'uwr',
        "ừ" : 'uwf',

        "Ũ" : 'UX',
        "Ụ" : 'UJ',
        "Ú" : 'US',
        "Ủ" : 'UR',
        "Ù" : 'UF',

        "Ư" : 'UW',
        "Ữ" : 'UWX',
        "Ự" : 'UWJ',
        "Ứ" : 'UWS',
        "Ử" : 'UWR',
        "Ừ" : 'UWF',

        "ỹ" : 'yx',
        "ỵ" : 'yj',
        "ý" : 'ys',
        "ỷ" : 'yr',
        "ỳ" : 'yf',

        "Ỹ" : 'YX',
        "Ỵ" : 'YJ',
        "Ý" : 'YS',
        "Ỷ" : 'YR',
        "Ỳ" : 'YF',
    }
}


function chuyenKhongdau(txt: string){
    const arraychar = [
        ["đ"],
        ["Đ"],
        ["ó","ỏ","ò","ọ","õ","ô","ỗ","ổ","ồ","ố","ộ","ơ","ỡ","ớ","ờ","ở","ợ"],
        ["Ó","Ỏ","Ò","Ọ","Õ","Ô","Ỗ","Ổ","Ồ","Ố","Ộ","Ơ","Ỡ","Ớ","Ờ","Ở","Ợ"],
        ["ì","í","ỉ","ì","ĩ","ị",],
        ["Ì","Í","Ỉ","Ì","Ĩ","Ị"],
        ["ê","ệ","ế","ể","ễ","ề","é","ẹ","ẽ","è","ẻ",],
        ["Ê","Ệ","Ế","Ể","Ễ","Ề","É","Ẹ","Ẽ","È","Ẻ"],
        ["ả","á","ạ","ã","à","â","ẩ","ấ","ầ","ậ","ẫ","ă","ẳ","ắ","ằ","ặ","ẵ",],
        ["Ả","Á","Ạ","Ã","À","Â","Ẩ","Ấ","Ầ","Ậ","Ẫ","Ă","Ẳ","Ắ","Ằ","Ặ","Ẵ"],
        ["ũ","ụ","ú","ủ","ù","ư","ữ","ự","ứ","ử","ừ",],
        ["Ũ","Ụ","Ú","Ủ","Ù","Ư","Ũ","Ự","Ứ","Ử","Ừ"],
        ["ỹ","ỵ","ý","ỷ","ỳ",],
        ["Ỹ","Ỵ","Ý","Ỷ","Ỳ"]
    ];
    const arrayconvert = ["d","D","o","O","i","I","e","E","a","A","u","U","y","Y"];

    let mappings: any = {};
    for ( let i = 0, count = arrayconvert.length; i < count; i++){
        for ( let j = 0, total = arraychar[i].length; j < total ; j++){
            mappings[arraychar[i][j]] = arrayconvert[i];
        }
    }

    return replaceAll(txt, mappings);
}


function getUniqueWords(str: string) {
    const sub_str_parts = str.trim().split(" ");
    const unique_values = sub_str_parts.filter( _onlyUnique );

    return unique_values.join(" ").trim();

    function _onlyUnique(value: any, index: any, self: string | any[]) {
        return self.indexOf(value) === index;
    }
}