Thứ Hai, 4 tháng 1, 2010

TCVN2UNI _New

Đây là code mà mình dùng trong một project PHP như sau :
Dữ liệu lấy ra từ DB MS SQL có font TCVN3 , cần chuyển sang font unicode để cho vào DB MySQL ,rồi hiển thị ra Web.
!
<?php
/*
    thuc.lehuy@gmail.com
*/
class takenewcustomer
{
    function connect()
    {
        $host = "localhost";
        $database = "database";
        $username = "user";
        $pass = "passwww";
        $con = mssql_connect($host,$username,$pass);
        if(!$con)
        {
            return 0;
        }
        mssql_select_db($database, $con);
        return $con;
    }
    /*
    * Lấy thông tin điểm khách hàng trong DB
    */
    function takenewcustomer($lastdate = null){
        $query = "select ma_kh as username,ten_kh as name,dia_chi as address from user";
        $link = $this->connect();
        $result = mssql_query($query,$link);
        $rows = array();
        $this->mapping();
        while($row = mssql_fetch_assoc($result)){
            $row["name"] = $this->TCVN3ToUnicode($row["name"]);
            $row["address"] = $this->TCVN3ToUnicode($row["address"]);
            $rows[]=$row;
        }
        mssql_free_result($result);
        return $rows;
    }
/* // Mảng chứa vị trí các kí tự tiếng việt Unicode trong bảng mã ASCII
    //var $unipos  = array(225,224,7843,227,7841,259,7855,7857,7859,7861,7863,226,7845,7847,7849,7851,7853,233,232,7867,7869,7865,234,7871,7873,7875,7877,7879,237,236,7881,297,7883,243,242,7887,245,7885,244,7889,7891,7893,7895,7897,417,7899,7901,7903,7905,7907,250,249,7911,361,7909,432,7913,7915,7917,7919,7921,253,7923,7927,7929,7925,273,193,192,7842,195,7840,258,7854,7856,7858,7860,7862,194,7844,7846,7848,7850,7852,201,200,7866,7868,7864,202,7870,7872,7874,7876,7878,205,204,7880,296,7882,211,210,7886,213,7884,212,7888,7890,7892,7894,7896,416,7898,7900,7902,7904,7906,218,217,7910,360,7908,431,7912,7914,7916,7918,7920,221,7922,7926,7928,7924,272);
    //Mảng chứa vị trí các kí tự tiếng việt Utf8 trong bảng mã ASCII
    //var $utfpos = array(50081,50080,14793379,50083,14793377,50307,14793391,14793393,14793395,14793397,14793399,50082,14793381,14793383,14793385,14793387,14793389,50089,50088,14793403,14793405,14793401,50090,14793407,14793601,14793603,14793605,14793607,50093,50092,14793609,50345,14793611,50099,50098,14793615,50101,14793613,50100,14793617,14793619,14793621,14793623,14793625,50849,14793627,14793629,14793631,14793633,14793635,50106,50105,14793639,50601,14793637,50864,14793641,14793643,14793645,14793647,14793649,50109,14793651,14793655,14793657,14793653,50321,50049,50048,14793378,50051,14793376,50306,14793390,14793392,14793394,14793396,14793398,50050,14793380,14793382,14793384,14793386,14793388,50057,50056,14793402,14793404,14793400,50058,14793406,14793600,14793602,14793604,14793606,50061,50060,14793608,50344,14793610,50067,50066,14793614,50069,14793612,50068,14793616,14793618,14793620,14793622,14793624,50848,14793626,14793628,14793630,14793632,14793634,50074,50073,14793638,50600,14793636,50863,14793640,14793642,14793644,14793646,14793648,50077,14793650,14793654,14793656,14793652,50320);
*/
// Mảng chứa vị trí các kí tự tiếng việt TCVN3 trong bảng mã ASCII
    var $tcvnpos = array(184,181,182,183,185,168,190,187,188,189,198,169,202,199,200,201,203,208,204,206,207,209,170,213,210,211,212,214,221,215,216,220,222,227,223,225,226,228,171,232,229,230,231,233,172,237,234,235,236,238,243,239,241,242,244,173,248,245,246,247,249,253,250,251,252,254,174,16824,16821,16822,16823,16825,161,41406,41403,41404,41405,41414,162,41674,41671,41672,41673,41675,17872,17868,17870,17871,17873,163,41941,41938,41939,41940,41942,18909,18903,18904,18908,18910,20451,20447,20449,20450,20452,164,42216,42213,42214,42215,42217,165,42477,42474,42475,42476,42478,22003,21999,22001,22002,22004,166,42744,42741,42742,42743,42745,23037,23034,23035,23036,23038,167,);
// Mảng chứ vị trí các kí tự unicode tương ứng 1-1 với mảng $tcvnpos
    var $unichars = array("á","à","ả","ã","ạ","ă","ắ","ằ","ẳ","ẵ","ặ","â","ấ","ầ","ẩ","ẫ","ậ","é","è","ẻ","ẽ","ẹ","ê","ế","ề","ể","ễ","ệ","í","ì","ỉ","ĩ","ị","ó","ò","ỏ","õ","ọ","ô","ố","ồ","ổ","ỗ","ộ","ơ","ớ","ờ","ở","ỡ","ợ","ú","ù","ủ","ũ","ụ","ư","ứ","ừ","ử","ữ","ự","ý","ỳ","ỷ","ỹ","ỵ","đ","Á","À","Ả","Ã","Ạ","Ă","Ắ","Ằ","Ẳ","Ẵ","Ặ","Â","Ấ","Ầ","Ẩ","Ẫ","Ậ","É","È","Ẻ","Ẽ","Ẹ","Ê","Ế","Ề","Ể","Ễ","Ệ","Í","Ì","Ỉ","Ĩ","Ị","Ó","Ò","Ỏ","Õ","Ọ","Ô","Ố","Ồ","Ổ","Ỗ","Ộ","Ơ","Ớ","Ờ","Ở","Ỡ","Ợ","Ú","Ù","Ủ","Ũ","Ụ","Ư","Ứ","Ừ","Ử","Ữ","Ự","Ý","Ỳ","Ỷ","Ỹ","Ỵ","Đ");
    // Mảng ánh xạ giữa 2 mảng $tcvnpos và $unichar
    var $mapping = array();
    // Xây dựng mảng $mapping
    function mapping(){
        for($j=0;$j<sizeof($this->unichars);$j++){
            $this->mapping[$this->tcvnpos[$j]] = $this->unichars[$j];
        }
    }
    // Hàm convert TCVN3 to Unicode
    function TCVN3ToUnicode($value = null) {
        $str='';
        for($i=0;$i<strlen($value);$i++){
            $ch = $value[$i];
            // Chuyển kí tự thành vị trí trong ASCII
            $stt = ord($ch);
            if($stt>127){
                //ko là kí tự abc...
                if($this->mapping[$stt]){
                    $unipos = $this->mapping[$stt];
                    $str = $str.$unipos;
                }else{
                    $str = $str.$ch;
                }
            }else{
                // là kí tự abc...
                $str = $str.$ch;
            }
        }
        return $str;
    }
}
?>

Đây là nơi mình tham khảo đối chiếu phần mã của các loại font :
http://www.vovisoft.com...ilities/VietEncoding.htm

Không có nhận xét nào: