1

Can anyone give me hint how to develop the suffix array part? I know the concept; LCP array design but I am not getting how to implement it in C? Can anyone please help? I know the uses, algorithm of the suffix array as I have read a lot on it. I want the implementation hint of the part in which I have to sort the suffixes of a string.

For example, if the string is given as 'banana', then:

Data structure should be like this:($ -> mnemonic)

banana
anana
nana
ana
na
a
$

Then, after keeping it, I need to sort it, which means the lowest substring should be at the top most point. So how to do that? Strings can be of large length. How to do this thing? Can you please give hint or link? I have tried and now thinking from your help.

1
  • 1
    Consider providing your attempt at solving the problem in a code block, and the context of your question. Commented Sep 27, 2012 at 19:50

2 Answers 2

1

You may want to have a look at this article

At the end of the article, you will find this implementation of the suffix tree:

NOTE: the following code is C++, however if you substitute the new[] and delete[] operators with C like heap allocation you could reuse it very easily.

inline bool leq(int a1, int a2,   int b1, int b2) { // lexic. order for pairs
  return(a1 < b1 || a1 == b1 && a2 <= b2); 
}                                                   // and triples
inline bool leq(int a1, int a2, int a3,   int b1, int b2, int b3) {
  return(a1 < b1 || a1 == b1 && leq(a2,a3, b2,b3)); 
}
// stably sort a[0..n-1] to b[0..n-1] with keys in 0..K from r
static void radixPass(int* a, int* b, int* r, int n, int K) 
{ // count occurrences
  int* c = new int[K + 1];                          // counter array
  for (int i = 0;  i <= K;  i++) c[i] = 0;         // reset counters
  for (int i = 0;  i < n;  i++) c[r[a[i]]]++;    // count occurences
  for (int i = 0, sum = 0;  i <= K;  i++) { // exclusive prefix sums
     int t = c[i];  c[i] = sum;  sum += t;
  }
  for (int i = 0;  i < n;  i++) b[c[r[a[i]]]++] = a[i];      // sort
  delete [] c;
}

// find the suffix array SA of s[0..n-1] in {1..K}^n
// require s[n]=s[n+1]=s[n+2]=0, n>=2
void suffixArray(int* s, int* SA, int n, int K) {
  int n0=(n+2)/3, n1=(n+1)/3, n2=n/3, n02=n0+n2; 
  int* s12  = new int[n02 + 3];  s12[n02]= s12[n02+1]= s12[n02+2]=0; 
  int* SA12 = new int[n02 + 3]; SA12[n02]=SA12[n02+1]=SA12[n02+2]=0;
  int* s0   = new int[n0];
  int* SA0  = new int[n0];

  // generate positions of mod 1 and mod  2 suffixes
  // the "+(n0-n1)" adds a dummy mod 1 suffix if n%3 == 1
  for (int i=0, j=0;  i < n+(n0-n1);  i++) if (i%3 != 0) s12[j++] = i;

  // lsb radix sort the mod 1 and mod 2 triples
  radixPass(s12 , SA12, s+2, n02, K);
  radixPass(SA12, s12 , s+1, n02, K);  
  radixPass(s12 , SA12, s  , n02, K);

  // find lexicographic names of triples
  int name = 0, c0 = -1, c1 = -1, c2 = -1;
  for (int i = 0;  i < n02;  i++) {
    if (s[SA12[i]] != c0 || s[SA12[i]+1] != c1 || s[SA12[i]+2] != c2) { 
      name++;  c0 = s[SA12[i]];  c1 = s[SA12[i]+1];  c2 = s[SA12[i]+2];
    }
    if (SA12[i] % 3 == 1) { s12[SA12[i]/3]      = name; } // left half
    else                  { s12[SA12[i]/3 + n0] = name; } // right half
  }

  // recurse if names are not yet unique
  if (name < n02) {
    suffixArray(s12, SA12, n02, name);
    // store unique names in s12 using the suffix array 
    for (int i = 0;  i < n02;  i++) s12[SA12[i]] = i + 1;
  } else // generate the suffix array of s12 directly
    for (int i = 0;  i < n02;  i++) SA12[s12[i] - 1] = i; 

  // stably sort the mod 0 suffixes from SA12 by their first character
  for (int i=0, j=0;  i < n02;  i++) if (SA12[i] < n0) s0[j++] = 3*SA12[i];
  radixPass(s0, SA0, s, n0, K);

  // merge sorted SA0 suffixes and sorted SA12 suffixes
  for (int p=0,  t=n0-n1,  k=0;  k < n;  k++) {
#define GetI() (SA12[t] < n0 ? SA12[t] * 3 + 1 : (SA12[t] - n0) * 3 + 2)
    int i = GetI(); // pos of current offset 12 suffix
    int j = SA0[p]; // pos of current offset 0  suffix
    if (SA12[t] < n0 ? 
        leq(s[i],       s12[SA12[t] + n0], s[j],       s12[j/3]) :
        leq(s[i],s[i+1],s12[SA12[t]-n0+1], s[j],s[j+1],s12[j/3+n0]))
    { // suffix from SA12 is smaller
      SA[k] = i;  t++;
      if (t == n02) { // done --- only SA0 suffixes left
        for (k++;  p < n0;  p++, k++) SA[k] = SA0[p];
      }
    } else { 
      SA[k] = j;  p++; 
      if (p == n0)  { // done --- only SA12 suffixes left
        for (k++;  t < n02;  t++, k++) SA[k] = GetI(); 
      }
    }  
  } 
  delete [] s12; delete [] SA12; delete [] SA0; delete [] s0; 
}
Sign up to request clarification or add additional context in comments.

Comments

0

This might help you.

http://code.google.com/p/code-share/source/browse/trunk/cpp/algo/suffix_array/SuffixArray.h

#ifndef _SUFFIX_ARRAY_H
#define _SUFFIX_ARRAY_H

#include<algorithm>
#include<cstring>
#include <stdexcept>

using namespace std;
template<class T>
struct comp_func
{
    bool operator()(const T l,const T r)
    {
        return strcmp(l,r) < 0;

    }
};
template<class T =char>
class SuffixArray
{
    int len_;
    T **data_;

    public:
    T *operator[](int i)
    {
        if(i<0 || i>len_)
            throw std::out_of_range("Out of range error\n");
        return data_[i];
    }
    SuffixArray(T *str):len_(strlen(str)),data_(new T*[len_])
    {
        //len_ = strlen(str);
        //data_= new T*[len];
        for(int i =0;i<len_;++i)
        {
            data_[i] = &str[i];
            cout << data_[i] << endl;
        }
        std::sort(&data_[0],&data_[len_],comp_func<T *>());
    }
    void Print()
    {
        for(int i =0;i<len_;++i)
        {
            cout << data_[i] << endl;
        }

    }
};


#endif

1 Comment

Welcome to Stack Overflow. Please note that the question is tagged C but your answer is strictly C++. You should at least note that your answer is in C++ and not C. However, generally, you should answer C questions with C code; other people might not be as tolerant of the mistake, even though you're a newcomer here. Please do take the time to read the FAQ. And welcome to SO, once more.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.