2

So I've been working on a project for school for sometime, and I've run up against a wall. My add_node function isn't working correctly, and I know why. What I'm trying to do is take in a file with multiple randomly generated letters, and create trees out of them, then make a confirmation.

The thing is that it overwrites the same node, instead of making multiple nodes. I figured this out using Visual studios debugger, but I have no idea what to implement to fix it. What happens is that instead of having multiple nodes create a tree (like gagtttca), it makes one node and overwrites it. The node becomes g, then a, etc. How would I go about adding more nodes to the tree without overwriting it? The add_node function is the very last one.

#include "stdafx.h"
#include <iostream>
#include <stack>
#include <fstream>
#include <vector>
#include <cstring>
#include <string>

using namespace std;

class myTreeNode
{
public:
    char Data;
    myTreeNode *childA;   //A's always go in child1
    myTreeNode *childT;   //T's always go in child2
    myTreeNode *childC;   //c's always go in child3
    myTreeNode *childG;   //G's always go in child4
};

class Tree
{
public:
    myTreeNode * Root;
    Tree()
    {
        Root = new myTreeNode;
        Root->Data = '-';
        Root->childA = Root->childC = Root->childG = Root->childT = NULL;
    }
    bool add_a_word(string word);
    bool is_this_word_in_the_tree(string word);
    bool add_node(myTreeNode * parent, char letter);
    bool add_words(vector<string> w);
};

bool get_words_from_the_file(char * my_file_name, vector<string> &vector_of_words);
bool get_the_reads_from_file(char * my_file_name, vector<string> &reads);
bool write_out_the_vector_to_screen(vector<string> my_vector);
bool write_out_the_vector_to_file(vector<string> my_vector, char * my_file_name);

ofstream out;

int main()
{

    out.open("my_results.txt");

    vector<string> words_in_genome;
    char * genome_file_name = "my_genome.txt";//make certain to place this file in the correct folder. Do not change path.
    if (!get_words_from_the_file(genome_file_name, words_in_genome))
        return 1;
    Tree * trees = new Tree();
    trees->add_words(words_in_genome);

    char * reads_file_name = "reads.txt";                 //make certain to place this file in the correct folder. Do not change path.
    if (!get_the_reads_from_file(reads_file_name, reads_to_be_tested))
        return 1;
    for (int i = 0; i < reads_to_be_tested.size(); i++)
    {
        out <<reads_to_be_tested[i] << " " <<    trees->is_this_word_in_the_tree(reads_to_be_tested[i]);
    }
    cout << "All done" << endl;

    //Write out a file named "myResults.txt".
    //For each read, list its sequence and either "Yes" or "No".
    //This will indicate if it does or doesn't map to the genome.

    /** Used for debugging
     cout << "words" << endl;
     write_vector_to_screen(words);
     write_vector_to_file(words,"testing.txt");
     cout << "reads" << endl;
     write_vector_to_screen(reads);
     ***/
    out.close();
}



bool get_words_from_the_file(char * my_file_name, vector<string> &vector_of_words)
{
    int i, j;
    int len = 0;
    ifstream in;
    in.open(my_file_name);
    if (!in.is_open())
    {
        cout << "I could not find " << my_file_name << endl;
        cout << "Check the location.\n";
        return false;
    }

    char * my_word = new char[11];
    while (in.peek() != EOF) { in >> my_word[0]; len++; }
    in.clear(); in.close(); in.open(my_file_name);

    for (i = 0; i<10; i++)
    {
        in >> my_word[i];
        if (my_word[i]<97) my_word[i] += 32;     //makes it lowercase
    }
    my_word[10] = '\0';
    vector_of_words.push_back(my_word);

    for (i = 1; i<(len - 10 - 1); i++)   //read until the end of the file
    {
        //shift
        for (j = 0; j<9; j++) my_word[j] = my_word[j + 1];
        in >> my_word[9];
        if (my_word[9]<97) my_word[9] += 32;     //makes it lowercase
        my_word[10] = '\0';
        cout << i << "\t" << my_word << endl; cout.flush();
        vector_of_words.push_back(my_word);
    }
    in.clear(); in.close();

    return true;
}


bool get_the_reads_from_file(char * my_file_name, vector<string> &reads)
{
    int i;
    ifstream in;
    in.open(my_file_name);
    if (!in.is_open())
    {
        cout << "The read file " << my_file_name << " could not be opened.\nCheck the location.\n";
        return false;
    }

    char * word = new char[20];                              //this is a default, we'll be looking at words of size 10

    while (in.peek() != EOF)
    {
        in.getline(word, 20, '\n');
        for (i = 0; i<10; i++) { if (word[i]<97) word[i] += 32; }     //makes it lowercase
        reads.push_back(word);
    }
    in.clear(); in.close();
    delete word;
    return true;
}

bool write_out_the_vector_to_screen(vector<string> my_vector)
{
    int i;
    for (i = 0; i < my_vector.size(); i++)
    {
        cout << my_vector[i].c_str() << endl;
    }
    return true;
}


bool write_out_the_vector_to_file(vector<string> my_vector, char * my_file_name)
{
    ofstream out;
    out.open(my_file_name);
    int i;
    for (i = 0; i<my_vector.size(); i++)
        out << my_vector[i].c_str()<< endl;
    out.clear();
    out.close();
    return true;
}


bool Tree::add_words(vector<string> w)
{
    for (int i = 0; i < w.size(); i++)
        add_a_word(w[i]);

    return true;
}


bool Tree::add_a_word(string word)
{
    myTreeNode * tempNode = new myTreeNode;
    tempNode = Root;



    if (tempNode == NULL)
    {
        cout << "The tree is empty" << endl;
    }
    else
    {
        while (tempNode != NULL)
        {
            for (int i = 0; i < word.size(); i++)
            {
                if (word[i] == 'a')
                {
                    if (tempNode->childA != NULL)
                        tempNode = tempNode->childA;
                    else
                    {
                        add_node(tempNode, word[i]);//add a node: what letter, who's my parent
                        tempNode = tempNode->childA;
                    }
                }
                else if (word[i]== 'g')
                {
                    if (tempNode->childG != NULL)
                        tempNode = tempNode->childG;
                    else
                    {
                        add_node(tempNode, word[i]);
                        tempNode = tempNode->childG;
                    }
                }
                else if (word[i] == 'c')
                {
                    if (tempNode->childC != NULL)
                        tempNode = tempNode->childG;
                    else
                    {
                        add_node(tempNode, word[i]);
                        tempNode = tempNode->childC;
                    }
                }
                else if (word[i] == 't')
                {
                    if (tempNode->childT != NULL)
                        tempNode = tempNode->childT;
                    else
                    {
                        add_node(tempNode, word[i]);
                        tempNode = tempNode->childT;
                    }

                }
                else
                {
                    cout << "The tree is full, or can't find data" << endl;
                    return NULL;
                    break;
                }
            }

        }
    }
}

bool Tree::is_this_word_in_the_tree(string word)
{
    myTreeNode * tempNode = new myTreeNode;
    tempNode = Root;


    char com1, com2, com3, com4;


    if (tempNode == NULL)
    {
        cout << "The tree is empty. Sorry" << endl;

    }
    else
    {
        while (tempNode != NULL)
        {
            for (int i = 0; i < word.size(); i++)
            {
                if (word[i] == 'a')
                {
                    if (tempNode->childA != NULL)
                    {
                        if (tempNode->childA)
                        {
                            tempNode = tempNode->childA;
                            com1 = 'y';
                        }
                    }
                    else
                    {

                        com1 = 'n';
                    }
                }

                if (word[i] == 'g')
                {
                    if (tempNode->childG != NULL)
                    {
                        if (tempNode->childG)
                        {
                            tempNode = tempNode->childG;
                            com2 = 'y';
                        }
                    }
                    else
                    {

                        com2 = 'n';
                    }
                }


                if (word[i] == 't')
                {
                    if (tempNode->childT != NULL)
                    {
                        if (tempNode->childT)
                        {
                            tempNode = tempNode->childG;
                            com3 = 'y';
                        }
                    }
                    else
                    {

                        com3 = 'n';
                    }
                }
                if (word[i] == 'c')
                {
                    if (tempNode->childC != NULL)
                    {
                        if (tempNode->childC)
                        {
                            tempNode = tempNode->childC;
                            com4 = 'y';
                        }
                    }
                    else
                    {

                        com4 = 'n';
                    }
                }

            }
            out << com1 << com2 << com3 << com4 << endl;
            if (com1 == com2 == com3 == com4)
            {
                out << "The test passed" << endl;
            }
            else
            {
                out << "The test failed" << endl;
                return false;
            }
        }
    }
    return true;

}

bool Tree::add_node(myTreeNode * parent, char letter)
{
    //Can't figure out how to fix error. Run-Time error is that it overwrites the node instead of adding it.
    //How would i make it so it's a new node every time?//
    myTreeNode * tempNode = new myTreeNode;
    tempNode = Root;
    tempNode->Data = letter;
    tempNode->childA = tempNode->childC = tempNode->childG = tempNode->childT = NULL;
    if (tempNode == NULL)
    {
        cout << "The tree is empty" << endl;
    }
    else
    {
        while (tempNode != NULL)
        {
            if (parent->childA == NULL && letter =='a')
            {
                parent->childA = tempNode;

            }
            else if (parent->childC == NULL && letter == 'c')
            {
                parent->childC = tempNode;

            }
            else if (parent->childG == NULL && letter == 'g')
            {
                parent->childG = tempNode;

            }
            else if (parent->childT == NULL && letter == 't')
            {
                parent->childT = tempNode;

            }
            else
            {
                cout<<"no"<<endl; //for testing//
                return false;
                break;

            }
        }
    }

    return true;    

}

Like I stated before, this is a project. I'm not here looking for an easy way out. I just want learn how to fix my code.

0

2 Answers 2

3

The most fundamental problem in your code is the simple obviousness that you're not comfortable using pointers. From the looks of it you may have come from other languages where the vernacular of:

Type *p = new Type;
p = Something;

was common. It is anything-but-common in C++. As in C, dynamic allocation is managed by a returned address, which is saved, cared for, and if all goes well, eventually disposed of. Those addresses are kept in pointer variables. Pointers in C++ don't hold objects; they hold addresses.

That said, I'm not going to destroy everything you wrote. I'm not going to sugar coat this; it would be shooting fish in a barrel. I'm rather going to describe what you should be doing in add_node, show you where you went wrong, and finally proffer up a simple example that eliminates much of the cruft (file io, etc) in your existing code, focusing rather on the real problem at hand: tree node management and the pointer-jockeying that is needed to accomplish it.

The Task

You should be starting at the root node, and for each successive letter in your string, move down the tree. When you encounter a path you want to take, but can't because there is no node hanging there yet, that is when you allocate a new node, hang it, move to it, and continue the process until there are no more characters in your input string.

Your Code

That said, review the comments in the following

bool Tree::add_node(myTreeNode * parent, char letter)
{
    myTreeNode * tempNode = new myTreeNode;

    // this is outright wrong. you just leaked the memory 
    //  you allocated above. this has no place here and
    //  should be removed.
    //
    // Note: the remainder of this analysis will assume you
    //  have, in fact, removed this line.
    tempNode = Root;

    // all of this belongs in your myTreeNode constructor.
    tempNode->Data = letter;
    tempNode->childA = tempNode->childC = tempNode->childG = tempNode->childT = NULL;

    // this is flat-out impossible. Assuming you fixed your incorrect
    // Root assignment mentioned above, you just allocated a new node
    // therefore this can NEVER be NULL (an exception would have thrown
    // on a failure to allocate).
    if (tempNode == NULL)
    {
        cout << "The tree is empty" << endl;
    }
    else
    {
        // This NEVER changes. Nowhere in the code below this is
        //  tempNode ever assigned a different value. this loop
        //  should not even be here. A simple if-else-if stack or
        //  a switch on letter is all that is needed.
        while (tempNode != NULL)
        {
            if (parent->childA == NULL && letter =='a')
            {
                parent->childA = tempNode;

            }
            else if (parent->childC == NULL && letter == 'c')
            {
                parent->childC = tempNode;

            }
            else if (parent->childG == NULL && letter == 'g')
            {
                parent->childG = tempNode;

            }
            else if (parent->childT == NULL && letter == 't')
            {
                parent->childT = tempNode;

            }
            else
            {
                cout<<"no"<<endl; //for testing//
                return false;
                break;

            }
        }
    }

    return true;
}

Sample Code

The following strips out all the file io, and most of the insanity regarding managing the tree. There are only two member functions, add_word and has_word (the latter used to validate something is indeed present).

What makes this code work is how a pointer-to-pointer is used in the addition and check functions add_word and has_word. For addition, we start at the root node pointer, and with each successive character in the input string, move down the tree. When a child pointer is hit that is NULL, we allocate a new node, hang it, and move on. The check function has_word does exactly the same thing, save for one difference: it doesn't hang new nodes. When it encounters a NULL where there shouldn't be one, it means something is wrong and the input word is not in the tree.

#include <iostream>
#include <random>
#include <string>

struct myTreeNode
{
    char data;

    myTreeNode *childA;
    myTreeNode *childT;
    myTreeNode *childC;
    myTreeNode *childG;

    myTreeNode( char c )
        : data(c), childA(), childT(), childC(), childG()
    {
    }

    ~myTreeNode()
    {
        delete childA;
        delete childT;
        delete childC;
        delete childG;
    }

    // squelch these
    myTreeNode(const myTreeNode&) = delete;
    myTreeNode& operator=(const myTreeNode&) = delete;
};

class Tree
{
private:
    myTreeNode *Root;

public:
    Tree() : Root( new myTreeNode('-')) { }
    ~Tree() { delete Root; }

    // squelch these
    Tree(const Tree&) = delete;
    Tree& operator =(const Tree&) = delete;


    // adds a given string into the tree if it isn't already there.
    void add_word(const std::string& word)
    {
        myTreeNode **pp = &Root;

        for (auto c : word)
        {
            c = std::tolower((unsigned int)c);
            switch(c)
            {
                case 'a':
                    pp = &(*pp)->childA;
                    break;

                case 't':
                    pp = &(*pp)->childT;
                    break;

                case 'c':
                    pp = &(*pp)->childC;
                    break;

                case 'g':
                    pp = &(*pp)->childG;
                    break;

                default:
                    std::cerr << "skipping unsupported char '" << c << "'\n";
            }

            if (!*pp)
                *pp = new myTreeNode(c);
        }
    }

    // returns true if the given string is in the tree
    bool has_word(const std::string& word)
    {
        myTreeNode **pp = &Root;

        for (auto c : word)
        {
            c = std::tolower((unsigned int)c);
            switch(c)
            {
                case 'a':
                    pp = &(*pp)->childA;
                    break;

                case 't':
                    pp = &(*pp)->childT;
                    break;

                case 'c':
                    pp = &(*pp)->childC;
                    break;

                case 'g':
                    pp = &(*pp)->childG;
                    break;


                default: // should never happen with proper input
                    return false;
            }

            if (!*pp)
                return false;
        }
        return true;
    }
};
////////////////////////////////////////////////////////////////////////////////


int main()
{
    // setup a random device and some uniform distributions
    std::random_device rd;
    std::mt19937 rng(rd());
    std::uniform_int_distribution<> dchar(0,3);
    std::uniform_int_distribution<> dlen(3,8);

    // our restricted alphabet. random indexes for creating our
    //  strings will be coming by indexing with dchar(rng)
    char s[] = {'a', 't', 'c', 'g' };


    // build set of random strings
    std::vector<std::string> strs;
    for (int i=0; i<20; ++i)
    {
        std::string str;
        int len = dlen(rng);
        for (int j=0; j<len; ++j)
            str.push_back(s[dchar(rng)]); // push random char
        strs.emplace_back(str);
    }

    // drop list of strins into tree
    Tree tree;
    for (auto const& str : strs)
    {
        std::cout << str << '\n';
        tree.add_word(str);
    }

    // now verify every string we just inserted is in the tree
    for (auto const& str : strs)
    {
        if (!tree.has_word(str))
        {
            std::cerr << "Word \"" << str << "\" should be in tree, but was NOT\n";
            std::exit(EXIT_FAILURE);
        }
    }

    std::cout << "All test words found!!\n";
    return EXIT_SUCCESS;
}

Output (varies due to random generators)

gctccgga
agtccatt
gagcg
gtggg
tca
aga
cacaggg
cga
tgga
ttatta
cagg
aac
tatttg
gccttat
acctcca
tgagac
aagacg
tgc
aaccgg
tca
All test words found!!

Summary

I strongly advise you run this in the debugger and step through it with a firm grasp on the watch-window. Follow pointer trails to see how things set up as the program progresses. There are many things I did not talk about: proper construction, initialization, Rule of Three compliance etc. I also could have (and would have had this not been an academic case) used smart pointers such as std::unique_ptr<> or std::shared_ptr<>. I sincerely hope you get something out of this. It's only going to get worse from here.

Best of luck

Sign up to request clarification or add additional context in comments.

1 Comment

I forgot to say thanks when it was due but thank you!! Got a perfect score!
0

I don't know why but this :

Root->childA = Root->childC = Root->childG = Root->childT = NULL;

Doesn't look right for me, haven't done c++ for a while and nodes but i don't think that's how you gotta do it? Will check and edit this.

1 Comment

That is one of the few lines of sanity in the posted code.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.