Forum Moderators: coopster
The script has to log the entire process, download the new file via ftp (or perhaps wget?), then unzip it after clearing all of the existing .txt files, then process each "¦" delimited .txt file that is extracted.
Upon processing, we run some basic checks, making sure the product is > $1.00, checking to see if the product parameters have changed or if it is a new product, etc and then add the product to the appropriate mysql categories.
Our server has 4GB memory on the back end and 8GB on the front... its definately not slow and we are running PHP5 and MYSQL 5 on Redhat Enterprise with Apache. This particular script takes over 7 hours to run. We need to significantly speed it up. I wanted to get some feedback and strategy before tackling this issue. I'm sure there are parts of this code than can be significantly sped up. I appreciate all of the feedback in advance! Let me know if we need any more information.
Here is the code:
<?php
ignore_user_abort(TRUE);
set_time_limit(0);
ini_set("memory_limit", "768M");
define('ROOT_DIR', dirname(__FILE__) . "\\..\\");
include '/var/www/html/incs/Javuh.inc';
include '/var/www/html/incs/Environ/Vriess.inc';
class MasterDB extends Environ_Vriess_DB
{
var $Database = "database";
var $User = "un";
var $Password = "pass";
}
function readfile_chunked($filename,$retbytes=true) {
//$chunksize = 1*(1024*1024); // how many bytes per chunk
$buffer = '';
$cnt =0;
// $handle = fopen($filename, 'rb');
$handle = fopen($filename, 'rb');
if ($handle == false) {
return false;
}
while (!feof($handle)) {
$buffer = fgets($handle, $chunksize);
//echo $buffer;
ob_flush();
flush();
if ($retbytes) {
$cnt += strlen($buffer);
}
}
$status = fclose($handle);
if ($retbytes && $status) {
return $cnt; // return num. bytes delivered like readfile() does.
}
return $status;
}
function vmlog($log_dir,$v)
{
$logfile = fopen($log_dir, "a");
fputs($logfile, $v);
fclose($logfile);
}
if ( !defined( "COMMISSIONJUNCTIONDB_DEFINED" ) )
{
define( "COMMISSIONJUNCTIONDB_DEFINED", 1 );
class commissionjunctionDB extends MasterDB {
function log_update($mid) {
$this->query("UPDATE OURCOMPANY_source_site SET last_updated=now() WHERE merchantid='$mid'");
}
// returns the merchantid associated with the site, requires valid site name
function get_source_siteid() {
$this->query("SELECT uss.merchantid FROM OURCOMPANY_source_site uss, commis_temp tt WHERE uss.site_name=tt.PROGRAMNAME ");
// if merchant is not found, insert into table
if ($this->num_rows() == 0) {
$temp = new MasterDB;
$temp->query("SELECT PROGRAMNAME FROM commis_temp");
$temp->next_record();
$value1 = mysql_real_escape_string($temp->f('PROGRAMNAME'));
$temp->next_record();
$temp->query("SELECT PROGRAMURL FROM commis_temp");
$temp->next_record();
$value2 = mysql_real_escape_string($temp->f('PROGRAMURL'));
$this->query("INSERT INTO OURCOMPANY_source_site (site_name, site_url, isactive, merchantLocation,last_updated ) VALUES ('$value1', '$value2', '1', 'CommissionJunction', now() ) ");
$this->query("SELECT last_insert_id() AS theid FROM merchant_category");
$this->next_record();
$theid = $this->f("theid");
$saveid = $theid;
// check if this merchant id is already defined in table
$found = false;
// searching in table for undefined merchantid starting with current siteid
while (!$found) {
$this->query("SELECT merchantid FROM OURCOMPANY_source_site WHERE merchantid='$theid'");
if ($this->num_rows() == 0)
$found = true;
else
$theid++;
}
// now we update new merchant with undefined merchantid
$this->query("UPDATE OURCOMPANY_source_site SET merchantid='$theid' WHERE siteid=$saveid");
return $theid;
}
else {
$this->next_record();
return $this->f('merchantid');
}
}
function insert_OURCOMPANYproduct($merchantid) {
// getting merchantid for this company from table
// checking if cj's advertising category contains a value, if so insert
// if not, use a default value
$theid2 = 0;
$mcid = 0;
$productid = -1;
$inserted = 0;
$updated = 0;
$count = 0;
$class_id = 0;
$temp = new MasterDB;
$temp2 = new MasterDB;
$temp->query("SELECT * FROM commis_temp");
while($temp->next_record()){
$NAME = mysql_real_escape_string($temp->f('NAME'));
$SKU = mysql_real_escape_string($temp->f('SKU'));
$ADVERTISERCATEGORY = mysql_real_escape_string($temp->f('ADVERTISERCATEGORY'));
$THIRDPARTYCATEGORY = mysql_real_escape_string($temp->f('THIRDPARTYCATEGORY'));
$BUYURL = mysql_real_escape_string($temp->f('BUYURL'));
$IMAGEURL = mysql_real_escape_string($temp->f('IMAGEURL'));
$DESCRIPTION = mysql_real_escape_string($temp->f('DESCRIPTION'));
$SALEPRICE = mysql_real_escape_string($temp->f('SALEPRICE'));
$PRICE = mysql_real_escape_string($temp->f('PRICE'));
$STARTDATE = mysql_real_escape_string($temp->f('STARTDATE'));
$ENDDATE = mysql_real_escape_string($temp->f('ENDDATE'));
$KEYWORDS = mysql_real_escape_string($temp->f('KEYWORDS'));
$MANUFACTURERID = mysql_real_escape_string($temp->f('MANUFACTURERID'));
$MANUFACTURER = mysql_real_escape_string($temp->f('MANUFACTURER'));
$PROMOTIONALTEXT = mysql_real_escape_string($temp->f('PROMOTIONALTEXT'));
$UPC = mysql_real_escape_string($temp->f('UPC'));
$IMPRESSIONURL = mysql_real_escape_string($temp->f('IMPRESSIONURL'));
$ISBN = mysql_real_escape_string($temp->f('ISBN'));
$DESCRIPTION = mysql_real_escape_string($temp->f('DESCRIPTION'));
$SALEPRICE = mysql_real_escape_string($temp->f('SALEPRICE'));
$PRICE = mysql_real_escape_string($temp->f('PRICE'));
$STARTDATE = mysql_real_escape_string($temp->f('STARTDATE'));
$ENDDATE = mysql_real_escape_string($temp->f('ENDDATE'));
$KEYWORDS = mysql_real_escape_string($temp->f('KEYWORDS'));
$MANUFACTURERID = mysql_real_escape_string($temp->f('MANUFACTURERID'));
$INSTOCK = mysql_real_escape_string($temp->f('INSTOCK'));
$CURRENCY = mysql_real_escape_string($temp->f('CURRENCY'));
if ($PRICE > 1.0)
$isactive = 1;
else
$isactive = 0;
$isnewproduct=1;
if ($PRICE > 1.0)
{
$this->query("SELECT mci.merchant_categoryid FROM merchant_category mci WHERE mci.merchantid='$merchantid' AND mci.category_text='$ADVERTISERCATEGORY'");
if ($this->num_rows() > 0) {
$this->next_record();
$merchant_categoryid = $this->f('merchant_categoryid');
}
else{
// categoryid and merchant_categoryid will be the same for all commission junction products
$temp2->query("INSERT INTO merchant_category (category_text, merchantid, merchant_categoryid) VALUES ('$ADVERTISERCATEGORY','$merchantid','$mcid')");
if($updated == 0){
$this->query("SELECT last_insert_id() AS theid FROM merchant_category");
$this->next_record();
$mcid = $this->f("theid");
$this->query("UPDATE merchant_category SET merchant_categoryid='$mcid' WHERE categoryid='$mcid'");
$updated = 1;
}
$merchant_categoryid = $mcid;
$mcid++;
}
$this->query("UPDATE OURCOMPANY_products tt2 SET tt2.product_name='$NAME', tt2.sku_number='$SKU', tt2.primary_category='$ADVERTISERCATEGORY', tt2.secondary_category='$THIRDPARTYCATEGORY', tt2.product_url='$BUYURL', tt2.image_url='$IMAGEURL', tt2.buy_url='$BUYURL', tt2.long_description='$DESCRIPTION', tt2.sale_price='$SALEPRICE', tt2.retail_price='$PRICE', tt2.begin_date='$STARTDATE', tt2.end_date='$ENDDATE', tt2.keywords='$KEYWORDS', tt2.manufacturer_part='$MANUFACTURERID', tt2.manufacturer_name='$MANUFACTURER', tt2.shipping_information='$PROMOTIONALTEXT', tt2.availability='$INSTOCK', tt2.universal_price_code='$UPC', tt2.class_id='$class_id', tt2.currency='$CURRENCY', tt2.pixel='$IMPRESSIONURL', tt2.attribute_4='$ISBN', tt2.last_updated=now(), tt2.isnewproduct=0 WHERE tt2.merchantid='$merchantid' AND tt2.sku_number = '$SKU'");
if (substr(mysql_info(),14,1) == 0) {
$this->query("INSERT INTO OURCOMPANY_products (productid, product_name, sku_number, primary_category, secondary_category, product_url, image_url, buy_url, long_description, sale_price, retail_price, begin_date, end_date, keywords, manufacturer_part, manufacturer_name, shipping_information, availability, universal_price_code, class_id, currency, pixel, attribute_4, merchantid, merchant_categoryid, isactive, last_updated, isnewproduct) VALUES ('$theid2', '$NAME', '$SKU', '$ADVERTISERCATEGORY', '$THIRDPARTYCATEGORY', '$BUYURL', '$IMAGEURL', '$BUYURL', '$DESCRIPTION', '$SALEPRICE', '$PRICE', '$STARTDATE', '$ENDDATE', '$KEYWORDS', '$MANUFACTURERID', '$MANUFACTURER', '$PROMOTIONALTEXT', '$INSTOCK', '$UPC', '$class_id', '$CURRENCY', '$IMPRESSIONURL', '$ISBN', '$merchantid', '$merchant_categoryid', '$isactive', now(), '$isnewproduct')");
if($inserted == 0){
$this->query("SELECT last_insert_id() AS theid FROM OURCOMPANY_products");
$this->next_record();
$theid2 = $this->f("theid");
$this->query("UPDATE OURCOMPANY_products SET productid=$theid2 WHERE OURCOMPANYproductid=$theid2");
$inserted = 1;
}
$theid2++;
}
}
}
return true; // true for insert
}
}
// create business layer instance
$BL_commissionjunction = new commissionjunctionDB("dbname","db-un","db-pass");
}
print "[FTP START]\n";
//---------------- FTP SECTION --------------------------
// SET FTP VARIABLES
$ftp_server = 'datatransfer.cj.com';
$ftp_user_name = 'ourcj-un';
$ftp_user_pass = 'ourcj-pw';
// SET OTHER VARIABLES
$product_dir = "/var/www/html/cron/commission_junction";
$logpath = "/var/www/html/cron/log/comissionjunction_log.txt";
// STARTING LOGGER
$logfile = fopen($logpath, "a");
fputs($logfile, "Beggining FTP ...".strftime('%c')."\n");
fclose($logfile);
// beginning ftp connection vmlog($logpath,"\nCHDIR: ".$sub_dir.""); ftp_chdir($conn_id, "outgoing/productcatalog/".$sub_dir) or die ("Can't change directory for ftp"); // getting the file in the current direcory vmlog($logpath,"[fetching]:".$file." "); if (substr_count($file, 'zip') > 0) { ftp_close($conn_id); vmlog($logpath,"\n[FTP END]"); vmlog($logpath,"\n[PURGE TXT FILES]"); exec("rm -f /var/www/html/cron/commission_junction/products/*.txt"); vmlog($logpath,"\n[PURGE DONE]"); vmlog($logpath,"\n[UNZIP START] ".strftime('%c').""); $r = exec("unzip ".$product_dir."/".$thefile." -d /var/www/html/cron/commission_junction/products"); //----------- END FTP SECTION ---------------------- $logpath = "/var/www/html/cron/log/comissionjunction_log.txt"; // getting all text files in directory vmlog($logpath,"\n[START IMPORT]:".$dir." "); while (false != ($filename = readdir($dh))) { // now we parse each file in the directory if ((substr_count($filename, '10387773')>0 ¦¦ substr_count($filename, '10387780')>0 ¦¦ substr_count($filename, '10387781')>0) ) $logfile = fopen($logpath, "a"); vmlog($logpath,"\n[PROCESS DONE].".strftime('%c')."\n"); ?> [1][edited by: phattymatty at 9:35 pm (utc) on Mar. 13, 2008]
$conn_id = ftp_connect($ftp_server) or die ("Can't connect to FTP Server : $ftp_server");
$login_result = ftp_login($conn_id, $ftp_user_name, $ftp_user_pass);
$sub_dir = $argv;
if ($sub_dir=='')
{
$sub_dir='16544';
}else{
$sub_dir=$argv[1];
};
$contents = ftp_rawlist($conn_id, "");
foreach ($contents as $file) {
$file_array = preg_split("/[\s]+/",$file,9);
$thefile = $file_array[8];
echo $thefile;
$logfile = fopen($logpath, "a");
fputs($logfile, "found file".$thefile.strftime('%c')."\n");
fclose($logfile);
}
}
$logfile = fopen($logpath, "a");
fputs($logfile, "...downloading...".$thefile.strftime('%c')."\n");
fclose($logfile);
ftp_get($conn_id, $product_dir."/".$thefile, $thefile, FTP_BINARY);
vmlog($logpath,"\n[UNZIP END]:".$r." ".strftime('%c')."");
$dir = "/var/www/html/cron/commission_junction/products";
$dh = opendir($dir);
$files1[] = $filename;
}
$file_count = 0;
foreach ($files1 as $filename) {
{
vmlog($logpath,"\n..............\n");
vmlog($logpath,"\n[SKIPPING]:".$filename." File Size Too Large - Between 150MB and 1.3GB\n");
vmlog($logpath,"\n..............\n");
}else{
if ($filename != "." && $filename != "..") {
$location = $dir."/".$filename;
vmlog($logpath,"\n[IMPORTING]:".$location."\n");
$trunc = "TRUNCATE TABLE commis_temp;";
$BL_commissionjunction->query($trunc) or die(mysql_error());
$testing = "LOAD DATA LOCAL INFILE '$location' INTO TABLE commis_temp FIELDS TERMINATED BY '¦' LINES TERMINATED BY '\\n' IGNORE 1 LINES;";
$BL_commissionjunction->query($testing) or die(mysql_error());
$mid = $BL_commissionjunction->get_source_siteid();
$logfile = fopen($logpath, "a");
fputs($logfile, "File ".$filename." opened, beginning to read products...".strftime('%c')."\n");
fclose($logfile);
$BL_commissionjunction->insert_OURCOMPANYproduct($mid);
$BL_commissionjunction->log_update($mid);
$trunc = "TRUNCATE TABLE commis_temp;";
$BL_commissionjunction->query($trunc) or die(mysql_error());
} // end if (file is not . or ..*/
fputs($logfile, "File ".$filename." opened, Ending insert/update of products...".strftime('%c')."\n\n");
fclose($logfile);
}
} //end for each file