Open topicConvert Latin1 to UTF8



Admin (27)
30 November 2009 in 11:38, Edited: 02 December 2009 in 16:58 
Here is the full script
here take a look and learn how it was done.

<?
function DBiconvALLtoUTF($url) {
    global $CD_GLOBALS;
    dbExecute('TRUNCATE TABLE `simple`', 0);
    dbExecute('TRUNCATE TABLE `syslog`', 0);
    dbExecute('TRUNCATE TABLE `templates`', 0);
    dbExecute('TRUNCATE TABLE `cache`', 0);
    dbExecute('TRUNCATE TABLE `table_counts`', 0);
   
    $ln = array(
        'authors|authorid'      => array('login','firstname','lastname','company','address','district','city','state','comments','mail_signature'),
        'authors_pic|id'            => array('name','descr'),
        'authors_pic_cats|catid'    => array('catname','descr'),
        'authors_profile|authorid'  => showColumns('authors_profile'),
        'banners|id'                => array('[LANG]'),
        'blocker|id'                => array('reason'),
        'boxes|name'                => array('descr'),
        'cities|id'     => array('name_[LANG]'),
        'contents|id'            => array('title','teaser','option1','option3','product'),
        'contents_pic|id'         => array('name'),
        'contents_pic_cats|catid'   => array('catname_[LANG]'),
        'countries|code'            => array('name_[LANG]'),
        'dev|id'                    => array('title','original'),
        'dev_cats|catid'            => array('catname_[LANG]'),
        'districts|id'        => array('name_[LANG]'),
        'formfields|id'    => array('text','settings'),
        'formfields_options|id'  => array('value','label_[LANG]'),
        'formfields_originals|setid|location|setfield'=>array('data'),
        'formfields_settings|location'=>array('form_sets','display_sets'),
        'formfields_uploads|id'  => array('title','descr'),
        'forum_cats|catid'      => array('catname_[LANG]','descr_[LANG]'),
        'forum_clubs|id'            => array('title','descr','original'),
        'forum_posts|id'            => array('original','message'),
        'forum_threads|id'      => array('title','descr','original','message'),
        'im|id'      => array('message'),
        'im_cnt|id'     => array('folder','old_folder'),
        'lang|name'     => array('text_[LANG]','location','name'),
        'menu|id'               => array('descr_[LANG]','title_[LANG]','title2_[LANG]','keywords_[LANG]'),
        'phrases|id'                => array('[LANG]','category','name'),
        'product_assigned|id'      => array('title_[LANG]','data_[LANG]'),
        'product_groups|id'   => array('title_[LANG]'),
        'product_options|id'        => array('title_[LANG]','data_[LANG]'),
        'quiz_answers|id'         => array('answer'),
        'quiz_answers2|id'      => array('answer'),
        'quiz_questions|id'   => array('question'),
        'quiz_sets|id'        => array('title','descr','category'),
        'search|keyword'            => array('keyword'),
        'states|id'     => array('name_[LANG]'),
        'votes|id'          => array('comment')
    );
   
    $diff = array('mainid','id','setid','rate','votes','lang','publishdate','expiredate','updatedate','ip','views','viewtime','active','catid','cat','userid');

    $only = array('varchar','text','char','enum','set');
    foreach (getModules() as $m) {
        $cols = describeTable('f_'.$m);
        $al = array();
        foreach ($cols as $col => $type) {
            if (!in_array($col,$diff) && in_array($type,$only)) {
                $al[] = $col;
            }
        }
        if ($al) $ln['f_'.$m.'|id'] = $al;
    }
   
    if (!isset($_SESSION['ICONVtable']) || !is_array($_SESSION['ICONVtable'])) $_SESSION['ICONVtable'] = array();

    $langs = $CD_GLOBALS->g('langs');
   
    $types = array(
        'varchar'   => 'VARBINARY',
        'char'    => 'BINARY',
        'longtext'  => 'LONGBLOB',
        'mediumtext'=> 'MEDIUMBLOB',
        'text'    => 'BLOB',
        'tinytext'  => 'TINYBLOB'
    );
   
    $P = $CD_GLOBALS->g('P'); // prepare
    $R = $CD_GLOBALS->g('R'); // aftar collation change do it all back
   
    if ($P) {
        if (!isset($_SESSION['W']) || !is_array($_SESSION['W'])) $_SESSION['W'] = array();
    }
    elseif ($R) {
        $read = unserialize(file_get_contents('./varchars.txt'));
        dbExecute('SET NAMES \'utf8\';',true);
        dbExecute('SET CHARACTER SET \'utf8\';',true);
        dbExecute('ALTER DATABASE '.DB_NAME.' CHARACTER SET utf8;');
    }

    foreach ($ln as $table_id => $cols) {
        $ex = explode('|',$table_id);
        $table = $ex[0];
        if (in_array($table,$_SESSION['ICONVtable'])) continue;
        $key = $ex[1];
        $key2 = $ex[2];
        $key3 = $ex[3];
        $descr = describeTable($table, true);
       
        $keys = array();
        $q = dbRetrieve('SHOW KEYS FROM '.$table,true,0,0);
        while($k = dbFetch($q)) {
            array_push($keys, $k['Column_name']);
        }
       
        if ($R) {
            $is = array_key_exists($table,$read);
            DBcollation(array($table),'utf8');
        }
       
        $new_cols = array();
        foreach ($cols as $col) {
            if (strstr($col,'[LANG]')) {
                foreach ($langs as $l) {
                    $c = str_replace('[LANG]',$l,$col);
                    $new_cols[] = $c;
                    if (!in_array($c, $keys)) {
                        if ($P && substr($descr[$c],0,7)=='varchar') {
                            $sql = 'ALTER TABLE `'.$table.'` CHANGE `'.$c.'` `'.$c.'` TEXT';
                            dbExecute($sql, true);
                            $_SESSION['W'][$table][$c] = $descr[$c];
                        }
                    }
                }
            } else {
                $new_cols[] = $col
                if (!in_array($col, $keys)) {
                    if ($P && substr($descr[$col],0,7)=='varchar') {
                        $sql = 'ALTER TABLE `'.$table.'` CHANGE `'.$col.'` `'.$col.'` TEXT';
                        dbExecute($sql, true);
                        $_SESSION['W'][$table][$col] = $descr[$col];
                    }
                }
            }
        }

        $sql = 'SELECT `'.$key.'`'.($key2?',`'.$key2.'`':'').($key3?',`'.$key3.'`':'').',`'.join('`, `',$new_cols).'` FROM `'.$table.'` ORDER BY `'.$key.'`';
        $qry = dbRetrieve($sql,true,0,0);      
       
        while ($rs = dbFetch($qry)) {
            $up = array();
            foreach ($rs as $k => $v) {
                if ($k==$key || $k==$key2) continue;
                $up[] = '`'.$k.'`=\''.escape(enc_me($v)).'\'';
            }
            $s = 'UPDATE `'.$table.'` SET '.join(', ',$up).' WHERE';
            if (is_numeric($rs[$key])) {
                $s .= ' `'.$key.'`='.$rs[$key];
            } else {
                $s .= ' `'.$key.'`=\''.escape($rs[$key]).'\'';
            }
            if ($key2) $s .= ' AND `'.$key2.'`=\''.escape($rs[$key2]).'\'';
            if ($key3) $s .= ' AND `'.$key3.'`=\''.escape($rs[$key3]).'\'';
            dbExecute($s, false);
        }
        dbFreeResult($qry);
       
        if ($R) {
            foreach ($cols as $col) {
                if (strstr($col,'[LANG]')) {
                    foreach ($langs as $l) {
                        $c = str_replace('[LANG]',$l,$col);
                        if (!in_array($c, $keys)) {
                            if ($descr[$c]=='text' && $is && $read[$table][$c]) {
                                $sql = 'ALTER TABLE `'.$table.'` CHANGE `'.$c.'` `'.$c.'` '.$read[$table][$c];
                                dbExecute($sql, true);
                            }
                        }
                    }
                } else {
                    if (!in_array($col, $keys)) {
                        if ($descr[$col]=='text' && $is && $read[$table][$col]) {
                            $sql = 'ALTER TABLE `'.$table.'` CHANGE `'.$col.'` `'.$col.'` '.$read[$table][$col];
                            dbExecute($sql, true);
                        }
                    }
                }
            }
        }
               
        $_SESSION['ICONVtable'][] = $table;
        if ($CD_GLOBALS->g('P')) {
            $m = 'Preparing charset process (STEP 1/2)';   
        } elseif ($CD_GLOBALS->g('R')) {
            $m = 'Applying charset (STEP 2/2)'
        } else {
            $m = 'Changing charset';
        }
        return Redirect('?'.getURL(array('nohtml')).'&nohtml',''.$m.'. Please wait.. <br>Current table: `'.$table.'`',50, 0, '</td></tr></table></td></tr></table>');
    }

    if ($P) {
        $fp = fopen('./varchars.txt','w+');
        fwrite($fp, serialize($_SESSION['W']));
        fclose($fp);
    }
    if ($CD_GLOBALS->g('P') && $url=='doICONVdb') {
        unset($_SESSION['ICONVtable'], $_SESSION['W']);
        return Redirect('?'.getURL(array($url)).'&doICONVdb2','Next STEP!',1000, 0, '</td></tr></table></td></tr></table>');
    } else {
        unset($_SESSION['ICONVtable'], $_SESSION['W']);
        return Redirect('?'.getURL(array($url)).'&iconv=FINISHED&reset','Finished!',1000, 0, '</td></tr></table></td></tr></table>');
    }
}

function charset_decode_utf_8($s) {
    if (! ereg("[\200-\237]", $s) and ! ereg("[\241-\377]", $s)) return $s;
    $s = preg_replace("/([\340-\357])([\200-\277])([\200-\277])/e","'&#'.((ord('\\1')-224)*4096 + (ord('\\2')-128)*64 + (ord('\\3')-128)).';'",$s);
    return preg_replace("/([\300-\337])([\200-\277])/e","'&#'.((ord('\\1')-192)*64+(ord('\\2')-128)).';'", $s);
}

function DBprintCharsets() {
    $q = dbRetrieve('SHOW VARIABLES LIKE "%character_set%"',true,0,0);
    while ($r = dbFetch($q)) {
        p ($r);
    }
}

function enc_me($v) {
    global $CD_GLOBALS;
    if ($CD_GLOBALS->g('R')) {
        return html_entity_decode($v,ENT_COMPAT,'UTF-8');
    }
    elseif ($CD_GLOBALS->g('P')) {
        return charset_decode_utf_8($v);
    }
    return iconv('UTF-8', 'CP1251', $v);
}

function DBiconvLATIN2UTF8() {
    global $CD_GLOBALS, $CD_SESSION;
    // /doICONVdb/nohtml
   
    if ($CD_SESSION['UserID']!==SUPER_ADMIN) return false;

    if (isset($_GET['doICONVdb'])) {
        $CD_GLOBALS->s('P',1); // do it for all LATIN
        $CD_GLOBALS->s('R',0); // aftar collation change do it all back
        DBiconvALLtoUTF('doICONVdb');
        return true;
    }
    elseif (isset($_GET['doICONVdb2'])) {
        $CD_GLOBALS->s('P',0);
        $CD_GLOBALS->s('R',1);
        DBiconvALLtoUTF('doICONVdb2');
        $_SESSION['iconvFINITO'] = true;
        return true;
    }
    elseif ($_SESSION['iconvFINITO'] && $_GET['iconv']=='FINISHED') {
        $_SESSION['iconvFINITO'] = false;
        @unlink('./varchars.txt');
       
    }
   
    return false;
}?>


this is implemened in CMSsystem also

Admin (27)
Published: 30 November 2009 in 12:00, Edited: 30 November 2009 in 13:55 
1. change VARCHAR columns in database to TEXT or BLOB;

2. now convert data in each field using function:
<? charset_decode_utf_8()?>


3. this time you need to set names:
<?
 dbExecute('SET NAMES \'utf8\';',true);
dbExecute('SET CHARACTER SET \'utf8\';',true);
dbExecute('ALTER DATABASE '.DB_NAME.' CHARACTER SET utf8;');
 ?>


4. Convert data back to original using function:
<? html_entity_decode($v,ENT_COMPAT,'UTF-8')?>


5. Change those BLOB⁄TEXT columns back to varchars.

6. Done!

The reason why we need to convert them is because non-english letters take more space, for example russian letter Г would be encoded to & #1225; for example…

Another reason why we everytime refreshing the page is made for browser not to die because of the long process, and there is the message that shows to user: please wait, step is 1st table is that one..


I was searching the internet for this solution, many people were looking for it, and nobody gave a normal one. I hope this post will help some coders.
Thanx
Forum home   Page:  1 
Preferences
Language:  Estonian English Russian
Background:
URL mode:
Switch to AJAX or AHAH URL mode to have a music player here
Template:
Authorization
Username:
Password:
Select CMSsystem today
Newsletters
Your name:
Your Email:
 
Subscribe to our newsletters to be informed about updates, new features, improvments, suggestions, tips and special offers.
Forum's Calendar
Previous monthSeptember - 2010Next month
MOTUWDTHFRSASU
303112345
6789101112
13141516171819
20212223242526
27282930123
Do you like this design?