Question

I have to parse 3 remote XML files for my shop (products, prices, categories), largest of them is about 500MB+. I have to parse them and insert into mysql database.

I can choose from 2 formats

  1. all 3 XML files compressed in one .tar.gz archive
  2. each one separately, simple .xml

so, basically I have 2 options (I think)

  1. parse the XML "on the go" while streaming
  2. download the compressed XML and then parse it

I have troubles with both.

  1. parse the XML "on the go" while streaming

        $url = "http://example.xml";
        $reader = new XMLReader();
        $reader->open($url);
        $item = array();
        while ($reader->read()) {
           switch ($reader->nodeType) {
              case (XMLReader::ELEMENT):
                 if ($reader->localName == 'item') {
                    $item = array();
                    $item['id'] = $reader->getAttribute('id');
                    while ($reader->read()){
                       if ($reader->nodeType == XMLReader::ELEMENT) {
                          $name = strtolower($reader->localName);
                          $reader->read();
                          $item[$name] = $reader->value;
                       }
                       if ($reader->nodeType == XMLReader::END_ELEMENT && $reader->localName == 'item')
                          break;
                    }
                    // Yii framework's mysql query
                    Yii::app()->db->createCommand('INSERT INTO `products`              (id, name, parent_id, parent_name, brand, image) VALUES
                    ('.$item['id'].', "'.$item['name'].'", '.$item['parent_id'].', "'.$item['parent_name'].'", "'.$item['brand'].'", "'.$item['img'].'")
                    ')->execute();
    
        }
    

    } }

this code works ok with no mysql queries, but throws all kinds of errors if i put them

parser error : Extra content at the end of the document

  1. Download and then parse

say i want to parse products.xml inside myxml.tar.gz, is it even possible?

    $url = "compress.zlib:///myxml.tar.gz";
    $reader = new XMLReader();
    $reader->open($url);
    $reader->read();

it says that the file is empty

Was it helpful?

Solution

I do something like you.

I have do download from web-service zip archive with 3 xml large file compressed. I do this: on init i set :

ini_set('max_execution_time', 1000); //600 seconds

ini_set('mysql.connect_timeout', 1000); //run large sql

ini_set('default_socket_timeout', 1000);

I download the zip file to temp folder:

  /**
  * Metoda care scrie arhiva pe hardisc
  * @param $string textul de scris in fisierul zip
  * @return string Calea catre fisiser
  */
 private function write_to_file($string)
{

    $base = $this->tmpPath;
    $date_folder = $base.date('Y_m').DIRECTORY_SEPARATOR.date('d');

    if(!file_exists($date_folder))
    {
        mkdir($date_folder, 0777, TRUE);
    }

    $file = $date_folder.DIRECTORY_SEPARATOR.'products_'.date("Y_m_d_H_i").'.zip';

    // This uses less memory than file_put_contents
    $f = fopen($file, 'w');
    fwrite($f, $string);
    fclose($f);

    return $file;
}

After this i extract the xml files from the zip to temp folder:

    public function dezarhiveaza($file)
{
    $zip = new ZipArchive;
    $res = $zip->open($file);
    if ($res === TRUE) {
      $zip->extractTo($this->tmpPath);
      $zip->close();
        $this->write_log('extract success');
    } else {
        $this->write_log('error ');
    }
}

And next i extract the list of products from the XML and i make the insert MySQL query with 1000 rows:

private function deserializeazaForme()
{
    $formePath=$this->tmpPath.
    "data".DIRECTORY_SEPARATOR.'forme.xml';
    $xml = simplexml_load_file($formePath);         
    $forme = $xml->xpath('//Table');        
    if($forme)
    {           
        $strFormeInsertFirst="INSERT INTO `forme` (`id`, `denumire`) VALUES ";  
        $strFormeInsert=$strFormeInsertFirst;
        foreach ($forme as $key=>$forma) {
            $strFormeInsert .= "(".$forma->id.",'".$forma->denumire."),";   
            if($key%1000==0  && $key >0){
                $strFormeInsert = rtrim($strFormeInsert, ",") ;
                $strFormeInsert .=";";
                $this->runQuery($strFormeInsert);
                $strFormeInsert=$strFormeInsertFirst;
            }
          }
            $strFormeInsert = rtrim($strFormeInsert, ",") ;
            $strFormeInsert .=";";
            $this->runQuery($strFormeInsert);       
    }   
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top