en un php paso de un archivo .dict a una base de datos (lo separe en 2 pasos, o 2 archivos php) y luego ya todo en la base con este php lo quiero convertir a xml previos pasos por funciones para hacer correcciones, hay muchas funciones, replace, etc porque hay muchos codigos raros que estan en idioma indio, o caracteres raros como raiz cuadrada, etc.
Código PHP:
$link=conectar();
// Fin conexion
set_time_limit(0);
include ('decoder_utf8.php');
include ('sanskrita_export_functions.php');
$shtml='<?xml version="1.0" encoding="utf-8" ?><mediawiki xmlns="http://www.mediawiki.org/xml/export-0.3/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.3/ http://www.mediawiki.org/xml/export-0.3.xsd" version="0.3" xml:lang="en">
<siteinfo>
<sitename>Sanskrita</sitename>
<base></base>
<generator>MediaWiki 1.10.0</generator>
<case>case-sensitive</case>
<namespaces>
<namespace key="-2">Media</namespace>
<namespace key="-1">Special</namespace>
<namespace key="0"/>
<namespace key="1">Talk</namespace>
<namespace key="2">User</namespace>
<namespace key="3">User talk</namespace>
<namespace key="4">Sanskrita</namespace>
<namespace key="5">Sanskrita talk</namespace>
<namespace key="6">Image</namespace>
<namespace key="7">Image talk</namespace>
<namespace key="8">MediaWiki</namespace>
<namespace key="9">MediaWiki talk</namespace>
<namespace key="10">Template</namespace>
<namespace key="11">Template talk</namespace>
<namespace key="12">Help</namespace>
<namespace key="13">Help talk</namespace>
<namespace key="14">Category</namespace>
<namespace key="15">Category talk</namespace>
</namespaces>
</siteinfo>';
$query = "SELECT pagina, hk, hksimbol, definicion, prevraiz, raiz, dictorig, id FROM textos ORDER BY id ASC ";
$result = mysql_query($query);
if (mysql_error()) echo mysql_error()."<br>";
while ($rows = mysql_fetch_array($result))
{
$xid =$rows['id'];
$page=$rows[0];
//echo "$page - $xid <br>";
if ($page!='0') // Esto se imprime una vez por cada pagina
{
$pagedict=$page;
${dictorig.$page}.=rtrim(str_replace("---> ","->",$rows[6]));
$key[$page]=keywordshk($rows[1]);
if ($xid>1)
{
$pagedicttemp=($page-1);
$shtml.=htmlspecialchars('<!-- <mw page="'.(($page-1)*10).'"> -->');
$shtml.=htmlspecialchars('<!-- <keywords>'.$key[$page-1].'</keywords> -->');
$shtml.=htmlspecialchars('<!-- <link></link> -->');
$shtml.=htmlspecialchars('<!-- <progress en="" es=""> -->');
$shtml.="\n\n".htmlspecialchars('<!-- *** START DICT ORIGINAL ***'.${dictorig.$pagedicttemp}."\n\n".'*** END DICT ORIGINAL *** -->');
$shtml.='</text>
</revision>
</page>';
}
$shtml.= "\n<page>\n<title>".$rows[1];
$hkraiz=$rows[5];
$hkprevraiz=trim($rows[4]); // previo hasta raiz
$hkprevraiz=str_replace("'","",$hkprevraiz);
$hksimbol=$hkprevraiz.$hkraiz;
$hk=$rows[1];
$def=str_replace('&','&',$rows[3]);
$def=search_and_replace($def);
$shtml.='</title>
<id></id>
<revision>
<id></id>
<timestamp></timestamp>
<contributor>
<username></username>
<id></id>
</contributor>
<minor/>
<text xml:space="preserve">';
$shtml=$shtml.impmodule($hk,$hkraiz);
$shtml=$shtml. "\n\n";
$shtml=$shtml. "\n" . htmlspecialchars('<html><div id="santitle"><santitle>');
$shtml=$shtml. $hk;
$shtml=$shtml. htmlspecialchars("</santitle></div></html>")."\n\n";
$shtml=$shtml. htmlspecialchars('==<html><div id="sanskrita"><sanskrita>');
$shtml=$shtml. $hk;
$shtml=$shtml. htmlspecialchars('</sanskrita></div></html>');
$shtml=$shtml. htmlspecialchars(" ''").htmlspecialchars($hkprevraiz).raizlink($hkraiz).htmlspecialchars("'' ");
$shtml=$shtml. htmlspecialchars('<font color="#616E33">[ ');
$shtml=$shtml. $hk;
$shtml=$shtml. htmlspecialchars(' ]</font><!--<html><a href="/wiki/images/');
$shtml=$shtml. substr($hk,0,1)."/".$hk.".wav";
$shtml=$shtml. htmlspecialchars('"><img src="/wiki/images/audio.gif" width="19" height="18" border="0" /></a></html>-->==')."\n";
$shtml=$shtml. "\n\n";
$shtml=$shtml. htmlspecialchars('<multilang>')."\n";
$shtml=$shtml. "\n\n";
$shtml=$shtml. htmlspecialchars('@es|')."\n";
$shtml=$shtml. htmlspecialchars('<!-- *** AQUI COMIENZA LA VERSION EN ESPANOL *** -->');
$shtml=$shtml. ltrim($def)."\n";
$shtml=$shtml. htmlspecialchars('<!-- *** AQUI TERMINA LA VERSION EN ESPANOL *** -->')."\n";
$shtml=$shtml. htmlspecialchars('@en|')."\n";
$shtml=$shtml. htmlspecialchars('<!-- *** ENGLISH VERSION STARTS HERE *** -->');
$shtml=$shtml. ltrim($def)."\n";
$shtml=$shtml. htmlspecialchars('<!-- *** ENGLISH VERSION ENDS HERE *** -->')."\n";
$shtml=$shtml. htmlspecialchars('</multilang>')."\n";
}
else
{
${dictorig.$pagedict}.=rtrim(str_replace("---> ","->",$rows[6]));
$key[$page].=", ".keywordshk($rows[1]);
$hkraiz=$rows[5];
$hkprevraiz=trim($rows[4]); // previo hasta raiz
$hkprevraiz=str_replace("'","",$hkprevraiz);
$palabras=split("√",$hkraiz); // cortamos la raiz por si hay mas de una en la misma palabra
$hkraiz1='';
for($i=0;$palabras[$i];$i++)
{
if ($i>0)
$hkraiz1.="√";
$hkraiz1.= raizlink($palabras[$i]);
}
$hk=$rows[1];
$def=str_replace('&','&',$rows[3]);
$def=search_and_replace($def);
$hksimbol=$hkprevraiz.$hkraiz1;
if (strlen($hkraiz)<1)
$hksimbol=simbols($hksimbol);
$shtml=$shtml. "\n\n";
//echo htmlspecialchars("==''").rtrim(ltrim($hksimbol)).htmlspecialchars("'' ");
$shtml=$shtml. htmlspecialchars("==''").htmlspecialchars($hksimbol).htmlspecialchars("'' ");
$shtml=$shtml. htmlspecialchars('<font color="#616E33">[ ');
$shtml=$shtml. $hk;
$shtml=$shtml. htmlspecialchars(' ]</font><!--<html><a href="/wiki/images/');
$shtml=$shtml. substr($hk,0,1)."/".$hk.".wav";
$shtml=$shtml. htmlspecialchars('"><img src="/wiki/images/audio.gif" width="19" height="18" border="0" /></a></html>-->==')."\n";
$shtml=$shtml. "\n\n";
$shtml=$shtml. htmlspecialchars('<multilang>')."\n";
$shtml=$shtml. "\n\n";
$shtml=$shtml. htmlspecialchars('@es|')."\n";
$shtml=$shtml. rtrim(ltrim(htmlspecialchars('<!-- *** AQUI COMIENZA LA VERSION EN ESPANOL *** -->')));
$shtml=$shtml. ltrim($def)."\n";
$shtml=$shtml. htmlspecialchars('<!-- *** AQUI TERMINA LA VERSION EN ESPANOL *** -->')."\n";
$shtml=$shtml. htmlspecialchars('@en|')."\n";
$shtml=$shtml. rtrim(ltrim(htmlspecialchars('<!-- *** ENGLISH VERSION STARTS HERE *** -->')));
$shtml=$shtml. ltrim($def)."\n";
$shtml=$shtml. htmlspecialchars('<!-- *** ENGLISH VERSION ENDS HERE *** -->')."\n";
$shtml=$shtml. htmlspecialchars('</multilang>')."\n";
}
}
$shtml.=htmlspecialchars('<!-- <mw page="'.($page*10).'"> -->');
$shtml.=htmlspecialchars('<!-- <keywords>'.$key[$page].'</keywords> -->');
$shtml.=htmlspecialchars('<!-- <link> </link> -->');
$shtml.=htmlspecialchars('<!-- <progress en="" es=""> -->');
$shtml.=htmlspecialchars('<!-- *** START DICT ORIGINAL ***'.${dictorig.$pagedict}.'*** END DICT ORIGINAL *** -->');
$shtml.='</text>
</revision>
</page>
</mediawiki>';
$shtml=str_replace("> ",">",$shtml);
$sfile="test.xml"; //ruta del archivo a generar
$fp=fopen($sfile,"w+");
fwrite($fp,$shtml);
fclose($fp);
echo "<a href='".$sfile."'>Haz click aqui para bajarlo</a>";
?>