2

I have stored two text files into two separate arrays. Now, I am trying to compare both arrays to find duplicate values. I am having issues with my logic, and I am unable to print out the number of times a duplicate value appears.

file1 contains:

1913 2016 1 1913 186
2016 1711 32843 2016 518
3 1913 32843 32001 4
250 5 3500 6 7
8 27 73 9 10
1711 73 11 2 1.4
1.4 12 33.75278 84.38611 1913
19 1930 20 21 1947
22 1955 23 1961 23
1969 27 1995 26 27
1962 28 29 30 1970
31 31 

file2 contains:

1913 2016 32843 31 27 1.4 4 7 2 23

I am trying to find values in file2 that are duplicated in file1, and how many times.

I have the following code:

 public static void findDuplicates() {

        // array for first file
        for (int n = 0; n < nums.size(); n++) {

            // matches are false by default
            boolean match = false;

            int count = 0;

                String v = nums.get(n);

            // array for second file
            for (int k = 0; k < nums1.size(); k++) {

                String p = nums1.get(k);

                // second file contains values from first file
                if (p.contains(v)) {

                    // there is a match
                    match = true;

                    // when there is a match print out matched values and the number of times they appear in second file
                    if (match) {

                    count++;

                        System.out.println( p + " " + "is duped" + " " + count + " " + "times");

                    }


                }

            }

        }


    }

When I compile and run this code, this is the output:

31 is duped 1 times

Could someone let me know what I am doing wrong here?

EDIT

Here is the rest of my code:

 public static ArrayList<String> nums;
 public static ArrayList<String> nums1;

    //Create a main method to start the program.
    //Add FileNot FoundException in case the file can't be found by computer.
    public static void main(String[] args) throws FileNotFoundException{

        //The while will help us read the content into our computer piece by piece. It will not stop until the end of assignment.csv.

        while(FILE1.hasNext()){

                //Create a String variable - TempString. We use TempString to store each piece temporarily.

                String TempString = FILE1.next();

                String temp1 = TempString.replaceAll("[\\,]", "");

                String pattern1 = "[0-9]+\\.{1}[0-9]+";

                //Compile the Regular Expression into Pattern and store it in r1 so that the computer can understand the Regular Expression.
                Pattern r1 = Pattern.compile(pattern1);


                Matcher m1 = r1.matcher(temp1);

                String pattern2 = "[0-9]+";

                //Compile the Regular Expression into Pattern and store it in r2 so that the computer can understand the Regular Expression.
                Pattern r2 = Pattern.compile(pattern2);


                Matcher m2 = r2.matcher(temp1);

                nums = new ArrayList<String>();



                //Recollect, m1 is used to match decimal numbers.

                if(!(m1.find())){//if a decimal number CAN'T be found

                    //We use while statement instead of if statement here. 
                    //If there is only one piece per line, we can use either while statement or if statement.
                    //However, we have to use while statement if there is more than one piece per line.
                    while(m2.find()) {//if an integer number CAN be found
                        //If an Integer is found, we add 1 to Variable count.

                        count++;
                        //Even though the number (i.e., m2.group(0)) is an Integer, its data type is String. So we store it to a String variable - number.

                        String number = m2.group(0);


                        nums.add(number);

                        //If the remainder of count by 5 is zero, we display the number and advance to a new line.
                        if (count % 5 == 0){

                            System.out.println(number);

                        }
                        //Otherwise, we just display the number on the same line and divide numbers by a space.
                        else
                            System.out.print(number + " ");

                    }
                }

                //If we find a decimal number
                else{
                        //We add 1 to Variable count.



                        count++;

                        //Even though the number (i.e., m1.group(0)) is a decimal number, its data type is String. So we store it to a String variable - number.

                        String number = m1.group(0);

                        nums.add(number);

                        //If the remainder of count by 5 is zero, we display the number and advance to a new line.
                        if (count % 5 == 0) {

                            System.out.println(number);

                        }

                        //Otherwise, we just display the number on the same line and divide numbers by a space.
                        else
                            System.out.print(number + " ");

                }


        }



    FILE1.close();//Once we finish the task, we close the file.

        while(FILE2.hasNext()){

            //Create a String variable - TempString. We use TempString to store each piece temporarily.
            String TempString = FILE2.next();


            //So I use replaceAll function to eliminate comma (,) and store the new string in temp1.
            String temp1 = TempString.replaceAll("[\\,]", "");



            String pattern1 = "[0-9]+\\.{1}[0-9]+";

            //Compile the Regular Expression into Pattern and store it in r1 so that the computer can understand the Regular Expression.
            Pattern r1 = Pattern.compile(pattern1);

            //Match the Regular Expression with the piece (temp1) we read from assignment.csv.
            Matcher m1 = r1.matcher(temp1);

            String pattern2 = "[0-9]+";

            //Compile the Regular Expression into Pattern and store it in r2 so that the computer can understand the Regular Expression.
            Pattern r2 = Pattern.compile(pattern2);

            //Match the Regular Expression with the piece (temp1) we read from assignment.csv.
            Matcher m2 = r2.matcher(temp1);

            nums1 = new ArrayList<String>();


            //We have two types of numbers - Integer and Decimal
            //Let's start us Integer.
            //Recollect, m1 is used to match decimal numbers.
            if(!(m1.find())){//if a decimal number CAN'T be found

                //We use while statement instead of if statement here.
                //If there is only one piece per line, we can use either while statement or if statement.
                //However, we have to use while statement if there is more than one piece per line.
                while(m2.find()) {//if an integer number CAN be found
                    //If an Integer is found, we add 1 to Variable count.


                    count++;
                    //Even though the number (i.e., m2.group(0)) is an Integer, its data type is String. So we store it to a String variable - number.

                    String number = m2.group(0);

                    nums1.add(number);

                    //If the remainder of count by 5 is zero, we display the number and advance to a new line.
                    if (count % 5 == 0){

                        //System.out.println(number);

                    }
                    //Otherwise, we just display the number on the same line and divide numbers by a space.
                    else
                        System.out.println(/*number + " "*/);

                        }
            }

            //If we find a decimal number
            else{
                //We add 1 to Variable count.


                count++;

                //Even though the number (i.e., m1.group(0)) is a decimal number, its data type is String. So we store it to a String variable - number.

                String number = m1.group(0);

                nums1.add(number);

                //If the remainder of count by 5 is zero, we display the number and advance to a new line.
                if (count % 5 == 0){

                    //System.out.println(number);
                }
                //Otherwise, we just display the number on the same line and divide numbers by a space.
                else
                    System.out.println(/*number + " "*/);

            }


            findDuplicates();


        }


        FILE2.close();//Once we finish the task, we close the file.

  }

I tried to delete as much unnecessary code as I could.

EDIT

Expected output should be:

1913 is duplicated 3 times.
2016 is duplicated 2 times.
32843 is duplicated 1 times.
31 is duplicated 2 times..... 

EDIT

So I believe i've found the problem. For some reason,

String p = nums.get(k)

in my findDuplicates() method is only returning the value 31, and not the other values. I am working on solving the problem, and will post an answer when I do.

5
  • Can you also add the code where you are actually creating num and num1 arrays. Commented Oct 1, 2016 at 14:32
  • Also what is your expected output? Commented Oct 1, 2016 at 14:39
  • I have added edits @vatsalmevada Commented Oct 1, 2016 at 14:45
  • Files.readAllLines, and a split on the file should read all numbers (or a Scanner). To count occurrences, either use a map or a guava Multiset. Commented Oct 1, 2016 at 14:55
  • The current code runs in O(n^2). Why not try to store the data in 2 lists, sort it and then do a Binary Search on them. That would be far more efficient Commented Oct 1, 2016 at 14:58

3 Answers 3

1

I think the biggest issue is that the printline is inside the second for loop.
Furthermore I would remove the boolean and just compare the 2 Strings (p==v).

So the code would look more like this:

public static void main(String[] args) {
    // array for second file
    for (int n = 0; n < nums1.size(); n++) {

        // matches are false by default

        int count = 0;

            String v = nums1.get(n);

        // array for first file
        for (int k = 0; k < nums.size(); k++) {

            String p = nums.get(k);

            // second file contains values from first file
            if (p==v) {

                count++;

                }


            }
        System.out.println( v + " " + "is duped" + " " + count + " " + "times");

        }

    }

}

With the changes I made the code runs as intended.
You can check out a live demo here.


Output:

1913 is duped 4 times
2016 is duped 3 times
32843 is duped 2 times
31 is duped 2 times
27 is duped 3 times
1.4 is duped 2 times
4 is duped 1 times
7 is duped 1 times
2 is duped 1 times
23 is duped 2 times
Sign up to request clarification or add additional context in comments.

1 Comment

@codeREXO your question is about the function that counts duplicates. this has been answered correctly here, meaning your question is answered. Now if the creation of your arrays is wrong that's an entirely different question and should be therefore asked as such.
0

You should use the System.out.println statement outside inner loop so that first whole of second arraylist get iterated before number of times the number is duplicated is printled.

You also need to make a few other changes to run the program correctly

for (int n = 0; n < nums.size(); n++) {

        // matches are false by default
        boolean match = false;

        int count = 0;

            String v = nums.get(n);

        // array for second file
        for (int k = 0; k < nums1.size(); k++) {

            String p = nums1.get(k);

            // second file contains values from first file
            if (p.contains(v)) {

                // there is a match
                match = true;

                // when there is a match print out matched values and the number of times they appear in second file
                if (match) {

                count++;

                match = false;
                }
           }
           System.out.println( p + " " + "is duped" + " " + count + " " + "times");
           count = 0;               

        }

    }

But still then your logic will not work all case because you are not comparing how many times a number is repeated in first file. You are only comparing second file numbers with first file ones. For the case which you gave in question interchanging the two files after modifying the code as I have mentioned it will work.

Comments

0

please try it on.

package stackoverflow.test;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

public class Test {

    public List<Integer> list = new ArrayList<Integer>();

    public List<Integer> dup = new ArrayList<Integer>();
    public Map<Integer, Integer> hashDup = new HashMap<Integer, Integer>();

    public void fileReader() {

        File file = new File("/home/john/Documents/file1.txt");
        List<Integer> list1 = this.output(file);
        File file2 = new File("/home/john/Documents/file2.txt");
        List<Integer> list2 = this.output(file2);
        for (int i = 0; i < list1.size(); i++) {
            int counter = 0;
            for (int j = 0; j < list2.size(); j++) {
                if (list1.get(i) == list2.get(j)) {
                    counter++;
                }
            }
            if (!hashDup.containsKey(list1.get(i))) {
                hashDup.put(list1.get(i), counter);
                System.out.println(" dup  " + list1.get(i) + " :" + counter);
            }

        }
    }

    public List<Integer> output(File file) {
        BufferedReader reader = null;

        try {
            reader = new BufferedReader(new FileReader(file));
            String text = null;

            while ((text = reader.readLine()) != null) {
                // System.out.println( text);
                String[] str = text.split(" ");
                for (String string : str) {
                    list.add(Integer.parseInt(string));
                }

            }
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            try {
                if (reader != null) {
                    reader.close();
                }
            } catch (IOException e) {
            }
        }

        // print out the list
        // System.out.println(list.toString());

        return list;
    }

    public static void main(String args[]) {

        Test t = new Test();
        t.fileReader();
    }
}

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.