Question

I'm trying to write a lot of x/y double points to a file.

I came up with the following function to be the fastest solution, yet.

Are there any other ways speed up the process?

Writing to the stringstream first and then opening the file gave a nice speed boost.

bool printPoints(const vector <pair <double,double> > &points, const string &file)
{
    if(points.empty())
        return false;

    vector <pair <double,double> > const_iterator i;

    if(file != "")
    {
        stringstream ss;
        for(i=points.begin(); i != points.end();++i )
        {
           ss << i->first << " " << i->second << "\n";
        }

        ofstream out(file.c_str());
        if(out.fail())
        {
            out.close();
            return false;
        }
        out << ss.str();
        out.close();
    }
    return true;
}
Was it helpful?

Solution

I tested this. Writing to a stringstream buys you next to nothing. Using FILE * instead of fstream does give a reasonable improvement.

Here's my test-code:

#include <vector>
#include <utility>
#include <fstream>
#include <iostream>
#include <sstream>
#include <cstdio>

using namespace std;

bool printPoints(const vector <pair <double,double> > &points, const string &file)
{
    if(points.empty())
        return false;

    vector <pair <double,double> >::const_iterator i;

    if(file != "")
    {
        stringstream ss;
        for(i=points.begin(); i != points.end();++i )
        {
           ss << i->first << " " << i->second << "\n";
        }

        ofstream out(file.c_str());
        if(out.fail())
        {
            out.close();
            return false;
        }
        out << ss.str();
        out.close();
    }
    return true;
}

bool printPoints2(const vector <pair <double,double> > &points, const string &file)
{
    if(points.empty())
        return false;

    vector <pair <double,double> >:: const_iterator i;

    if(file != "")
    {
        ofstream out(file.c_str());
        if(out.fail())
        {
            out.close();
            return false;
        }
        for(i=points.begin(); i != points.end();++i )
        {
           out << i->first << " " << i->second << "\n";
        }

        out.close();
    }
    return true;
}


bool printPoints3(const vector <pair <double,double> > &points, const string &file)
{
    if(points.empty())
        return false;

    vector <pair <double,double> >:: const_iterator i;

    if(file != "")
    {
    FILE *out = fopen(file.c_str(), "w");
        if(!out)
        {
            return false;
        }
        for(i=points.begin(); i != points.end();++i )
        {
        fprintf(out, "%f %f", i->first, i->second);
        }

        fclose(out);
    }
    return true;
}

static __inline__ unsigned long long rdtsc(void)
{
    unsigned hi, lo;
    __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
    return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
}

int main()
{
    vector <pair <double,double> >  v;
    unsigned long long t1, t2;

    for(int i = 1; i <= 10000000; i++)
    {
    v.push_back(make_pair<double, double>((double)i, 1.0/i)); 
    }
    t1 = rdtsc();
    printPoints(v, "points.txt");
    t2 = rdtsc();
    cout << "time = " << t2 - t1 << endl;
    t1 = rdtsc();
    printPoints2(v, "points2.txt");
    t2 = rdtsc();
    cout << "time = " << t2 - t1 << endl;
    t1 = rdtsc();
    printPoints3(v, "points3.txt");
    t2 = rdtsc();
    cout << "time = " << t2 - t1 << endl;
}   

Results:
time = 55363637480
time = 54413392112
time = 33069402767

Obviously, the results may vary depending on the processor type, memory type, hard disk system (or network drive storage), etc, etc. But I've tested this in the past, and found similar results.

OTHER TIPS

Serializing of the coordinates can be parallelized by dividing the input across several threads and then joining the return values from them. Then writing the returned value to the file. This way we can speed up the process.

You can gain some speed boost by writing all coord at once in binary format

ofstream out(file.c_str(),std::ios_base::binary);
out.write(reinterpret_cast<const char*>(points.begin()),sizeof(double)*2*points.size());

It may not work if points are not stored contigously in memory for pair (for vector they are), then you may copy it to a single vector of double first (x,y,x,y...) in vector of double, write to disk next.

Would you consider memory-mapped-file? Just copy necessary data (in serialized representation, of course) to a memory area returned by in-mem file mapping, then close the mapping. From my past experience, this is quite fast way to transfer large amounts of data from STL structures to file.

typedef pair<double, double> PDD;

namespace std {

  inline
  ostream&
  operator<<(ostream& os, const PDD& p)
  {
    return os << p.first << ' ' << p.second;
  }

}

bool
PrintPoints(const vector<PDD>& points, const string& file)
{
  if (points.empty() || file.empty())
    return false;

  ofstream fout(file.c_str());
  if (!fout)
    return false;

  copy(points.begin(), points.end(), ostream_iterator<PDD>(fout, "\n"));

  return true;
}

Close will be called by ofstream dtor.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top