<!DOCTYPE HTML>
<html>
<head>
	<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
	<title>Geodesic Solutions Support Tools</title>
	
	<style type="text/css">
		p.note {
				text-align: left; 
				border: 1px solid #DDD; 
				background-color: #F5F5F5; 
				margin: 5px 0 10px 0; 
				padding: 3px;
				font-family: Arial,Helvetica, sans-serif;
				font-size:8pt;
				font-style: normal;
				line-height: normal;
				font-weight: normal;
				color: #3c3c3c;
		}
	</style>
	
</head>
<body>
<div class="actionResults">
<?php
//This script is for use by Geodesic Support Team.

//set debug...  if config.php file exists, then in base folder, so debug off, and
//does not allow doing debug-like stuff.
$dev = !file_exists('config.php');

/**
 * Basically just wraps a string in a p to give each message it's own paragraph.
 * @param $msg
 */
function msg ($msg, $isNote = false)
{
	$style = ($isNote)? ' class="note"':'';
	echo "<p{$style}>$msg</p>\n\n";
}

function dbLink ()
{
	if (file_exists('config.php')) {
		require 'config.php';
	} else {
		require "../config.php";
	}
	$link = mysql_connect($db_host, $db_username, $db_password);
	if (!$link) {
		return false;
	}
	
	if (!mysql_select_db($database, $link)) {
		msg ("can't use database $database : ".mysql_error());
		
		return false;
	}
	return $link;
}


function convertCharset ($string, $charset_from = 'ISO-8859-1', $charset_to = 'UTF-8')
{
	if ($charset_to == $charset_from || !$charset_to || !$charset_from) {
		//nothing to convert
		return $string;
	}
	
	if ($charset_from == 'ISO-8859-1' && $charset_to == 'UTF-8') {
		//simple, use encode UTF-8
		$string = utf8_encode($string);
	} else if ($charset_from == 'UTF-8' && $charset_to == 'ISO-8859-1') {
		//Use utf8 decode
		$string = utf8_decode($string);
	} else if (function_exists('mb_convert_encoding')) {
		//use mb_convert_encoding
		$string = mb_convert_encoding($string, $charset_to, $charset_from);
	} else if (function_exists('iconv')) {
		//use iconv
		$string = iconv($charset_from, $charset_to, $string);
	} else {
		//hopefully one of those methods worked, if not then oops!
		trigger_error('ERROR STRING: Not able to convert string from '.$charset_from.' TO '.$charset_to.', none of normal methods seem to work on this server.');
	}
	
	return $string;
}

function utf8Convert ($utf8, $utf8tables, $dev)
{
	//Text to convert, need to populate this on per-install basis, to fix things
	//like ? in columns that were not encoded..  Should be UTF-8 and "decoded", and CaSe Sensitive
	$textConvert = array(
		//'bad t?xt as unencoded' => 'bad text as unencoded',
		//below are a few more examples
		//'Otro pa?s' => 'Otro país',
		//'Espa?a' => 'España',
		//'?VILA' => 'ÁVILA',
	);
	
	$charset_from = $utf8['charset'];
	
	if ($textConvert) {
		$search = $replace = array();
		//make sure it is utf-8 already
		foreach ($textConvert as $key => $val) {
			$key = (isUtf8($key))? $key : convertCharset($key, $charset_from);
			$val = (isUtf8($val))? $val : convertCharset($val, $charset_from);
			$search[] = $key;
			$replace[] = $val;
		}
	}
	
	$link = dbLink();
	if (!$link) {
		msg("Error connecting to DB: ".mysql_error());
		return $utf8;
	}
	if (!is_array($utf8['unique'])) {
		$utf8['unique'] = array ($utf8['unique']);
	}
	$result = mysql_query("SELECT * FROM `{$utf8['table']}` ORDER BY ".implode(', ',$utf8['unique'])." LIMIT {$utf8['start']}, {$utf8['count']}", $link);
	if (!$result) {
		msg("DB error getting data, error message: ".mysql_error($link));
		mysql_close($link);
		return $utf8;
	}
	$total_count = mysql_num_rows($result);
	
	$row_count = $errors = 0;
	while ($row = mysql_fetch_assoc($result)) {
		$changes = array ();
		foreach ($row as $key => $value) {
			if (in_array($key, $utf8['unique']) || !strlen($value) || is_numeric($value)) {
				//unique shouldn't be worried about, it should be number...
				continue;
			}
			//msg("processing column $key...  value is ".htmlspecialchars($value));
			$url = urldecode($value);
			if ($url !== $value && urlencode($url) === $value) {
				//this row is processed via urlencode/decode...
				//msg('This row is different url decoded...');
				$isUtf8 = isUtf8($url);
				if ($isUtf8 && !$textConvert) {
					//it is already UTF-8, nothing to do
					continue;
				}
				
				if (!$isUtf8) {
					$url = convertCharset($url, $charset_from);
				}
				if ($textConvert) {
					//do any string replacements
					$url = str_replace($search, $replace, $url);
				}
				$changed = urlencode($url);
			} else {
				$isUtf8 = isUtf8($value);
				if ($isUtf8 && !$textConvert) {
					//it is already utf-8, skip
					continue;
				}
				$changed = $value;
				if (!$isUtf8) {
					$changed = convertCharset($changed, $charset_from);
				}
				if ($textConvert) {
					$changed = str_replace($search, $replace, $changed);
				}
				$url = '';
			}
			if ($value !== $changed) {
				//changed value is different, so update it
				
				if ($utf8['debug']) {
					msg("[Debug] before/after/decoded:<br />".htmlspecialchars($value)."<br />------------<br />".htmlspecialchars($changed)."<br />------------<br />".htmlspecialchars($url),true);
				}
				$changes[] = "`$key` = '".mysql_real_escape_string($changed, $link)."'";
			}
		}
		if (count($changes)) {
			$whereParts = array();
			foreach ($utf8['unique'] as $column) {
				$whereParts[] = "`{$column}`='".mysql_real_escape_string($row[$column],$link)."'";
			}
			$sql = "UPDATE {$utf8['table']} SET ".implode(', ',$changes)." WHERE ".implode(' AND ',$whereParts)." LIMIT 1";
			if (!$utf8['dryrun']) {
				$update = mysql_query($sql, $link);
			} else {
				msg("[Dry Run] Update SQL Query:<br />".htmlspecialchars($sql),true);
				$update = true;
			}
			
			if (!$update) {
				msg("DB error updating data, sql:".htmlspecialchars($sql)."<br />error message: ".mysql_error($link));
				$errors++;
			}
			$row_count++;
		}
	}
	msg("Finished processing, results of this run:<br />From record #: <strong>{$utf8['start']}</strong><br /># Records Checked: <strong>$total_count</strong><br />#Records Converted: <strong>$row_count</strong><br />", true);
	mysql_close($link);
	if (!$errors && !$utf8['dryrun']) {
		if ($total_count < $utf8['count']) {
			//go to next table!
			$tables = array_keys($utf8tables);
			$current = array_search($utf8['table'], $tables);
			$current++;
			$utf8['table'] = (isset($tables[$current]))? $tables[$current] : '';
			$utf8['start'] = 0;
			if (!$dev && !isset($tables[$current])) {
				die ("<h1>Convert finished!</h1>");
			}
		} else {
			$utf8['start'] += $utf8['count'];
		}
	}
	
	return $utf8;
}

function isUtf8 ($string)
{
	if (function_exists('mb_check_encoding')) {
		//use check encoding
		return mb_check_encoding($string, 'UTF-8');
	}
	//regex modified from http://www.w3.org/International/questions/qa-forms-utf-8.en.php
	return preg_match('%^(?:
		  [\x09\x0A\x0D\x20-\x7E]			# ASCII
		| [\xC2-\xDF][\x80-\xBF]			# non-overlong 2-byte
		|  \xE0[\xA0-\xBF][\x80-\xBF]		# excluding overlongs
		| [\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}	# straight 3-byte
		|  \xED[\x80-\x9F][\x80-\xBF]		# excluding surrogates
		|  \xF0[\x90-\xBF][\x80-\xBF]{2}	# planes 1-3
		| [\xF1-\xF3][\x80-\xBF]{3}			# planes 4-15
		|  \xF4[\x80-\x8F][\x80-\xBF]{2}	# plane 16
	)*$%xs', $string); 
}

/**
 * Syntax for tables to convert:  table_name => unique_index_key
 * 
 * If table is special and has more than one column for unique key (like geodesic_tags),
 * need to special code for it in utf8Convert.
 * 
 * !!Important!!  If adding a new table, turn on debug and dry run, and confirm
 * that the changes being made are correct!  Especially if doing on a live site..
 * 
 */
$utf8tables = array (
	'geodesic_classifieds'=>'id',
	'geodesic_countries' => 'country_id',
	'geodesic_states' => 'state_id',
	'geodesic_classifieds_ads_extra' => array('classified_id','question_id'),
	'geodesic_classifieds_sell_questions' => 'question_id',
	'geodesic_classifieds_sell_questions_languages' => array ('question_id','language_id'),
	'geodesic_classifieds_sell_question_choices' => 'value_id',
	'geodesic_classifieds_sell_question_types' => 'type_id',
	'geodesic_classifieds_images_urls' => 'image_id',
	'geodesic_choices' => 'choice_id',
	'geodesic_categories' => 'category_id',
	'geodesic_classifieds_categories_languages' => array('category_id','language_id'),

	'geodesic_userdata' => 'id',
	'geodesic_logins' => 'id',
	
	'geodesic_classifieds_messages_form' => 'message_id',
	'geodesic_classifieds_messages_past' => 'message_id',
	'geodesic_classifieds_subscription_choices' => 'period_id',
	'geodesic_classifieds_votes' => 'vote_id',
	'geodesic_confirm' => array('id','mdhash'),
	'geodesic_currency_types' => 'type_id',
	'geodesic_email_queue' => 'email_id',
	'geodesic_feedbacks' => array('rated_user_id','rater_user_id','auction_id'),
	'geodesic_user_communications' => 'message_id',
	
	
	//See above for instructions on adding more tables to add to the conversion script
);

//set defaults
$utf8 = array (
	'table' => 'geodesic_classifieds',
	'start' => '0',
	'count' => (($dev)? '10' : '1000'),//if "debug" off, use default of 1000 rows at once
	'debug' => false,
	'dryrun' => false,
	'posted' => false,
);

if (isset($_POST['table']) && strlen($_POST['table'])>0) {
	//Convert stuff to UTF-8
	$utf8['errors'] = false;
	if (!isset($utf8tables[$_POST['table']])) {
		msg('No table selected!  Are you done processing all the tables?  (Table is de-selected automatically once the script has gone through converting all the tables)');
		$utf8['errors'] ++;
	} else {
		$utf8['table'] = $_POST['table'];
	}
	
	$utf8['unique'] = $utf8tables[$utf8['table']];
	
	$utf8['start'] = (int)$_POST['start'];
	$utf8['count'] = (int)$_POST['count'];
	$utf8['debug'] = (isset($_POST['debug']) && $_POST['debug'])? true : false;
	$utf8['dryrun'] = (isset($_POST['dryrun']) && $_POST['dryrun'])? true : false;
	$utf8['posted'] = true;
	$utf8['charset'] = (isset($_POST['charset']) && trim($_POST['charset']))? trim($_POST['charset']) : 'ISO-8859-1';
	 
	if (!$utf8['errors']) {
		$utf8 = utf8Convert($utf8, $utf8tables, $dev);
	}
	msg('If conversion went smoothly, proceed to next set of things by clicking continue on the form.');
}

?>
</div>

<h2>UTF-8 Convert</h2>
<?php if ($dev) { ?>
<strong>Developer/Debug Mode ON</strong> (will turn off if script run from base folder)
<br /><br />
<?php } ?>
<p class="note">
	<strong>BEFORE starting:</strong><br /><br />
	1.  Back up the database.<br /><br />
	2.  Turn your site <strong>off</strong> so that new listings are not added after the fact.<br /><br />
	3.  In the admin at <strong>Site Setup > General Settings</strong>, change charset to UTF-8.<br /><br />
	4.  In all templates, change the charset to UTF-8 if it is not already.<br /><br />
	5.  In your <strong>config.php</strong> file, make sure it uses UTF-8 charset.<br /><br />
	<strong>AFTER finished:</strong><br /><br />
	1.  Make sure your templates are using UTF-8 correctly.  Browse the site and look for anything that looks "broken", if you previously changed the design to use another charset you may need to make changes as this script only converts charset for database entries, <strong>NOT for template files</strong>.
</p>

<fieldset>
	<legend>UTF-8 Convert</legend>
	
	<form action="" method="post">
		Next Table to Convert: 
		<?php if ($dev) { ?>
			<select name="table">
				<option value="">Finished...</option>
				<?php 
				$tables = array_keys($utf8tables);
				foreach ($tables as $table) { 
				?>
					<option<?php if ($utf8['table']==$table) {?> selected="selected"<?php } ?>><?php echo $table;?></option>
				<?php } ?>
			</select>
		<?php } else { ?>
			<?php echo $utf8['table']; ?>
			<input type="hidden" name="table" value="<?php echo $utf8['table'];?>" />
		<?php } ?>
		<br />
		From Record:
		<input type="<?php if ($dev) { echo 'text'; } else { echo 'hidden'; } ?>" name="start" value="<?php echo $utf8['start'];?>" size="4" />
		<?php if (!$dev) { echo $utf8['start']; } ?><br />
		<label>Records at once: <input type="text" name="count" value="<?php echo $utf8['count'];?>" size="4" /></label><br />
		
		<label>From charset: <input type="text" name="charset" value="<?php if (isset($_POST['charset'])) { echo $_POST['charset']; } else {?>ISO-8859-1<?php } ?>" /></label>
		[Supported charsets <a href="http://us2.php.net/manual/en/mbstring.supported-encodings.php" onclick="window.open(this.href); return false;">here</a>]<br />
		<?php if ($dev) { ?>
			<label><input type="checkbox" name="debug" value="1" <?php if ($utf8['debug']) { ?>checked="checked"<?php } ?>/> Debug?</label><br />
			<label><input type="checkbox" name="dryrun" value="1" <?php if ($utf8['dryrun']) { ?>checked="checked"<?php } ?>/> Dry-run only (don't process changes, just disply update queries)?</label><br />
		<?php } ?>
		
		<input type="submit" value="<?php if ($utf8['posted']) { ?>Continue<?php } else { ?>Start<?php } ?> Convert" />
		
	</form>
</fieldset>

</body>
</html>
