Forum Moderators: coopster
I have a 50 MB xml file, with around 16.000 products that have to parse into a mysql database but im getting the time-out error. This are my following restrictions:
1. Can't change php.ini.
2. set_time_limit(0) won't work.
3. flush() or ob_flush() isn't working.
4. An .htaccess file won't work.
5. Client has php 4.
Any help'd be appreciated!
Nicolas.
<?php
$file = "C:\\file.xml";
$lastByte = $_REQUEST["lastbyte"]; //Last Byte readed -> This must be the last </product> founded.
$products_parsed = $_REQUEST["productsparsed"]; //How many products I have already parsed.
if (!isset($products_parsed) ¦¦ $products_parsed =="")
$products_parsed = 0;
$GLOBALS["productsparsed"]=$products_parsed; //Count how many products we have parsed this time.
if (!isset($lastByte) ¦¦ $lastByte == "") $lastByte = 0; //If we have not a start, then start from the beggining.
$GLOBALS["product_buffer"] = "";
$GLOBALS["last_byte"] = $lastByte;
$fp = fopen($file, "r"); //Open the file to read.
$xml_parser = xml_parser_create();
xml_set_element_handler($xml_parser, "startTag", "endTag");
//Go to the last product byte -> This wont read all the file again...
fseek($fp,$lastByte,SEEK_SET);
$data = fread($fp, 60000);
if(!(xml_parse($xml_parser, $data))){
if (feof($fp)){
echo "End of the file reached. ";
exit;
}else{
echo "ERROR: " . xml_error_string(xml_get_current_line_number($xml_parser)) . " on line " . xml_get_current_line_number($xml_parser) . " column " . xml_get_current_column_number($xml_parser) . " - Byte " . xml_get_current_byte_index($xml_parser);
}
}
xml_parser_free($xml_parser);
?>
<?
function contents($parser, $data){
$GLOBALS["product_buffer"] .= $data;
}
function startTag($parser, $data){
if ($data == "PRODUCT"){
$GLOBALS["product_buffer"] = "";
$GLOBALS["product_buffer"] = "<PRODUCT>";
}else{
$GLOBALS["product_buffer"] .= "<$data>";
}
}
function endTag($parser, $data){
if ($data == "PRODUCT"){
$GLOBALS["productsparsed"] ++;
$GLOBALS["product_buffer"] .= "</PRODUCT>";
$GLOBALS["actual_product_count"]++;
$lastByte = xml_get_current_byte_index($parser) + $GLOBALS["start_bytes"];
if ($GLOBALS["actual_product_count"] == 10){
//Process data here---(not implemented)
changeLocation($lastByte, $GLOBALS["productsparsed"]);
}
}else{
$GLOBALS["product_buffer"] .= "</$data>";
}
}
function changeLocation($byte, $productsParsed){
?>
<html>
<head>
<script type="text/javascript">
window.location = "getContent.php?lastbyte=<?echo $byte?>&productsparsed=<?echo $GLOBALS["productsparsed"]?>";
</script>
</head>
<body>
<div id="message">
</div>
</body>
</html>
<?
}
?>
The idea is to read in 60.000 bytes jump (10+ products are there sure).
When 10 products are found, the page'll be re-loaded, and start again from where the last </product> byte was founded.
This way i'll not get the time out error.
The problem is that i get an attribute not finished error. I can't solve that.
Is there any "easier" way to do this?
Thanks!