Question

Given a seed string, I want to find its neighbors with at most differ in 2 positions. All the digits involve in generating string are only four (i.e. 0,1,2,3). This is the example for what I mean:

# In this example, 'first' column
# are neighbors with only 1 position differ.
# The rest of the columns are 2 positions differ

Seed = 000
100 110 120 130 101 102 103
200 210 220 230 201 202 203
300 310 320 330 301 302 303
010 011 012  013
020 021 022  023
030 031 032  033
001  
002  
003

Seed = 001
101 111 121 131 100 102 103   
201 211 221 231 200 202 203      
301 311 321 331 300 302 303      
011 010 012 013
021 020 022 023
031 030 032 033               
000
003
002     

Hence given a tag of length L
we will have 3*L + 9L(L-1)/2   neighbors  

But why this code of mine fails to generate it correctly? Especially when the seed string is other than "000".

Other approaches are also welcomed, escpecially with speed improvement. Since we will be processing millions of seed tags of length 34 to 36.

#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
using namespace std;

string ConvertInt2String(int IntVal) {
    std::string S;
    std::stringstream out;
    out << IntVal;
    S = out.str();

    return S;
}

string Vec2Str (vector <int> NTg) {

    string StTg = "";
    for (unsigned i = 0; i < NTg.size(); i++) {
         StTg += ConvertInt2String(NTg[i]);
    }
    return StTg;
}

template <typename T> void  prn_vec(const std::vector < T >&arg, string sep="")
{
    for (unsigned n = 0; n < arg.size(); n++) {
        cout << arg[n] << sep;
    }
    return;
}

vector <int> neighbors(vector<int>& arg, int posNo, int baseNo) {
    // pass base position and return neighbors

    vector <int> transfVec;
    transfVec = arg;

    //modified according to strager's first post
    transfVec[posNo % arg.size()] = baseNo;

    return transfVec;

}


int main () {

    vector <int> numTag;
    numTag.push_back(0);
    numTag.push_back(0);
    numTag.push_back(1); // If "000" this code works, but not 001 or others


    // Note that in actual practice numTag can be greater than 3

     int TagLen = static_cast<int>(numTag.size());

     for ( int p=0; p< TagLen  ; p++ ) {

         // First loop is to generate tags 1 position differ
         for ( int b=1; b<=3 ; b++ ) {

             int bval = b;
             if (numTag[p] == b) {
                 bval = 0;
             }

             vector <int> nbnumTag = neighbors(numTag, p, bval);
             string SnbnumTag = Vec2Str(nbnumTag);

             cout << SnbnumTag;
             cout << "\n";


             // Second loop for tags in 2 position differ 

             for (int l=p+1; l < TagLen; l++) {

                 for (int  c=1; c<=3; c++) {

                     int cval = c;

                     if (nbnumTag[l] == c) {
                         cval = c;
                     }
                     vector <int> nbnumTag2 = neighbors(nbnumTag, l, cval);
                     string SnbnumTag2 = Vec2Str(nbnumTag2);

                     cout << "\t" << SnbnumTag2;
                     cout << "\n";

                 }
             }


         }
     }

    return 0;
}
Was it helpful?

Solution

Would this do it? It enumerates the tree of possible strings, pruning all with >2 differences from the original.

void walk(char* s, int i, int ndiff){
  char c = s[i];
  if (ndiff > 2) return;
  if (c == '\0'){
    if (ndiff > 0) print(s);
  }
  else {
    s[i] = '0'; walk(s, i+1, (s[i]==c ? ndiff : ndiff+1);
    s[i] = '1'; walk(s, i+1, (s[i]==c ? ndiff : ndiff+1);
    s[i] = '2'; walk(s, i+1, (s[i]==c ? ndiff : ndiff+1);
    s[i] = '3'; walk(s, i+1, (s[i]==c ? ndiff : ndiff+1);
    s[i] = c;
  }
}

char seed[] = "000";
main(){
  walk(seed, 0, 0);
}

OTHER TIPS

Here's one way to do it that should work for any number of characters and length of string:

string base = "000";
char values[] = {'0', '1', '2', '3' };

for (int i = 0; i < base.length(); ++i)
{
   for (int j = 0; j < countof(values); ++j)
   {
      if (base[i] != values[j])
      {
          string copy = base;
          copy[i] = values[j];
          cout << copy << endl;

          for (int k = i+1; k < base.length(); ++k)
          {
              for (int l = 0; l < countof(values); ++l)
              {
                   if (copy[k] != values[l])
                   {
                       string copy2 = copy;
                       copy[k] = values[l];
                       cout << copy2 << endl;
                   }
              }
          }
      }
   }
}

This should be equivalent to generating all the strings within a hamming distance of 2, over a 4-symbol alphabet. I've seen algorithms for it, but I'm at a loss to find them right now. Perhaps this can serve as a pointer in the right direction.

Your problem [EDIT: the original one (see previous revisions of question)] is that in your inner loop, you're only assigning the 'next' element. A quick fix is to wrap the write in neighbors:

vector <int> neighbors(const vector<int>& arg, int posNo, int baseNo) {
    // pass base position and return neighbors

    vector <int> transfVec = arg

    transfVec[posNo % arg.size()] = baseNo;

    return transfVec;

}

This fix only works when you have two or three items in your array. If you want more, you need to rewrite your algorithm as it doesn't handle cases where the length is greater than three at all. (It shouldn't need to, even. The algorithm you use is just too restrictive.)

These two if's:

 if (numTag[p] == b) {
     bval = 0;
 }

 if (nbnumTag[l] == c) {
     cval = c;
 }

Should instead have bodies of continue.


These two loops should start at 0:

for ( int b=1; b<=3 ; b++ ) {
for (int  c=1; c<=3; c++) {

// i.e.

for ( int b=0; b<=3 ; b++ ) {
for (int  c=0; c<=3; c++) {

It looks like strager has identified the main problem: the loop conditions. Your alphabet is 0,1,2,3, so you should loop over that whole range. 0 is not a special case, as your code tries to treat it. The special case is to skip the iteration when the alphabet value equals the value in your key, which is what the continue suggested by strager accomplishes.

Below is my version of your algorithm. It has some alternative ideas for loop structures, and it avoids copying the key by modifying it in place. Note that you can also change the size of the alphabet by changing the MIN_VALUE and MAX_VALUE constants.

Here's the output for the "001" case:

101 111 121 131 102 103 100
201 211 221 231 202 203 200
301 311 321 331 302 303 300
011 012 013 010
021 022 023 020
031 032 033 030
002
003
000

And here's the code:

#include <iostream>
#include <vector>
#include <string>
#include <sstream>

using namespace std;

const int MIN_VALUE = 0;
const int MAX_VALUE = 3;

int increment(int& ch)
{
    if (ch == MAX_VALUE)
        ch = MIN_VALUE;
    else
        ++ch;
    return ch;
}

string stringKey(const vector<int>& key)
{
    ostringstream sout;
    for (int i = 0; i < key.size(); ++i) 
        sout << key[i];
    return sout.str();
}

int main()
{
    vector<int> key;
    key.push_back(0);
    key.push_back(0);
    key.push_back(1);

    for (int outerKeyPos = 0;  outerKeyPos < key.size(); ++outerKeyPos)
    {
        int outerOriginal = key[outerKeyPos];
        while (increment(key[outerKeyPos]) != outerOriginal)
        {
            cout << stringKey(key);
            for (int innerKeyPos = outerKeyPos + 1; innerKeyPos < key.size(); ++innerKeyPos)
            {
                int innerOriginal = key[innerKeyPos];
                while (increment(key[innerKeyPos]) != innerOriginal)
                {
                    cout << " " << stringKey(key);
                }
            }
            cout << endl;
        }
    }
}

I've tried to correct your algorithm, staying as close as possible to the original one:

 int TagLen = static_cast<int>(numTag.size());

 for ( int p=0; p< TagLen  ; p++ ) {
     // First loop is to generate tags 1 position differ
     for ( int b=0; b<=3 ; b++ ) { // Loop over all 4 elements

         int bval = b;
         if (numTag[p] == b) {
             continue; // This is the seed vector, ignore it
         }

         vector <int> nbnumTag = neighbors(numTag, p, bval);
         string SnbnumTag = Vec2Str(nbnumTag);

         cout << SnbnumTag;
         cout << "\n";

         // Second loop for tags in 2 position differ 
         for (int l=p+1; l < TagLen; l++) {

             for (int  c=0; c<=3; c++) {

                 int cval = c;

                 if (nbnumTag[l] == c) { // Loop over all 4 elements
                     continue; // This is nbnumTag, ignore it
                 }
                 vector <int> nbnumTag2 = neighbors(nbnumTag, l, cval);
                 string SnbnumTag2 = Vec2Str(nbnumTag2);

                 cout << "\t" << SnbnumTag2;
                 cout << "\n";
             }
         }
     }
 }

The problem is that you don't iterate over all 4 possible values (0,1,2,3), but you skip 0 for some reason. The way I am doing it is to iterate over all of them and ignore (by using a continue) the vector that is the same with the seed or the 1-point different tag computed at phase 1.

Having said that, I believe that better algorithms than yours are proposed and it would be better to consider one of them.

Here's my ugly, hacky solution:

#include <iostream>
#include <vector>

using std::cout;
using std::endl;
using std::vector;

struct tri
{
    tri(int a, int b, int c)
    {
        switch (a)
        {
            case 0:
                m[0] = 0;
                m[1] = b;
                m[2] = c;
                break;
            case 1:
                m[0] = b;
                m[1] = 0;
                m[2] = c;
                break;
            case 2:
                m[0] = b;
                m[1] = c;
                m[2] = 0;
                break;
        }
    }
    int m[3];
};

int main()
{
    vector<tri> v;
    for (int i = 0; i < 3; i++)
        for (int j = 0; j < 4; j++)
            for (int k = 0; k < 4; k++)
            {
                v.push_back(tri(i,j,k));
            }

    vector<tri>::iterator it;
    for (it = v.begin(); it != v.end(); ++it)
    {
        cout << (*it).m[0];
        cout << (*it).m[1];
        cout << (*it).m[2];
        cout << endl;
    }
}
Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top