Question

I have a text file that looks like this:

./4F6cFLnAAFc.png
./4Flj9plmKGQ.png
./4fQeHtSdw80.png
./4I9iibPLdBw.png
./4J-dRA3MGc8.png
./4mdEsouIXGM.png
./4-_-wQc3EGE.png
./595Tiga1gIg.png
./60u_ctp6UIw.png
./64bH_27Ehoc.png
./64_ZfIemRGM.png
./66pQLO--Ghk.png
./69rDtSpshAw.png
./69-YUSazuic.png

I'd like to replace the . at the beginning of each line with 'myString' using node.js. So each will end up looking like: myString/4F6cFLnAAFc.png

I am trying to use a regular expression but I suspect it can be done using read and write streams.

var fs = require('fs');

fs.readFile('filelist.txt', 'utf-8', function(err, data){
    if (err) throw err;

    console.log(typeof(data)); // string

    data.replace(/\.\/+/, 'myString');

});

What regex will work or what's the best way to edit this file using node?

Was it helpful?

Solution

Here are two options for Sync or Async depending on what you want.

var fs = require('fs');

function readWriteAsync() {
  fs.readFile('filelist.txt', 'utf-8', function(err, data){
    if (err) throw err;

    var newValue = data.replace(/^\./gim, 'myString');

    fs.writeFile('filelistAsync.txt', newValue, 'utf-8', function (err) {
      if (err) throw err;
      console.log('filelistAsync complete');
    });
  });
}

function readWriteSync() {
  var data = fs.readFileSync('filelist.txt', 'utf-8');

  var newValue = data.replace(/^\./gim, 'myString');

  fs.writeFileSync('filelistSync.txt', newValue, 'utf-8');

  console.log('readFileSync complete');
}

readWriteAsync();
readWriteSync();

OTHER TIPS

This regex should work: /^\.(.+)/gm

It performs a global, multi-line match(/gm) for all lines that begin with a period(^\.), captures whatever follows the period((.+)) and replaces the entire string with 'myString' concatenated with the captured value($1).

var fs = require('fs'),
fileList = '/filelist.txt';

fs.readFile(fileList, function(err, data) {
    if(err) throw err;
    data = data.toString();
    data = data.replace(/^\.(.+)/gm, 'myString$1');
    fs.writeFile(fileList, data, function(err) {
        err || console.log('Data replaced \n', data);
    });
});

If you want to update/edit/change lines/files with a really big file, in my experience the best way is:

  • Using readline module from Nodejs, it wont cause any memory issues
  • For each line you read, you can use fs.appendFileSync to write data in another file

Code example:

const fs = require('fs');
const readline = require('readline');

const rd = readline.createInterface({
    input: fs.createReadStream('./old-data.csv'),
    output: process.stdout,
    console: false
});

const regex_pattern = /(.*),.*,.*,.*,(.*),.*,.*,.*,.*/
rd.on('line', function(line) {
    // if your line matches the pattern, you need to process it
    if(regex_pattern .test(line)) {
      // do your job here to get your string that you want
      // my_string_1 = ''
      // my_string_2 = ''
      fs.appendFileSync('new-data.csv', `\n${my_string_1 }`, 'utf-8')
      fs.appendFileSync('new-data.csv', `\n${my_string_2 }`, 'utf-8')
    } else { // otherwise, append to a new file
      fs.appendFileSync('new-data.csv', `\n${line}`, 'utf-8')
    }
});

In my case, when edit a file with more than 1 million rows, it only consumes around 10-12% cpu with very little memory

enter image description here

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top