0

Hi i am developping an Email Filter for an application that scan through mails to determine if they are or not spams, here is my class:

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.mail.MessagingException;
import javax.mail.internet.MimeMessage;

import ca.etsmtl.logti.log619.lab05.utilities.EmailSplicer;

public class MotsClesFilter implements EmailFilter {

    final String NAME = "Filtrage par mots cles";
    private Pattern chaineSpam;
    private Matcher chaineCourriel;
    private int nbOccMotSpam =0;
    private byte confidenceLevel;
    @Override
    public String getFilterName() {
        return this.NAME;

    }

    @Override
    public byte checkSpam(MimeMessage message) {
        analyze(message);
        switch(this.nbOccMotSpam){
        case 0:
            this.confidenceLevel = 1;
            break;
        case 1:
            this.confidenceLevel = CANT_SAY;
            break;
        case 2:
            this.confidenceLevel= 50;
            break;
        case 3:
            this.confidenceLevel = 70;
            break;
        case 4 :
            this.confidenceLevel = 80;
            break;



        } return (getConfidenceLevel());
    }


    public void analyze(MimeMessage message){
        try {
            List<String> listeChaines = new ArrayList<String>(); 
            BufferedReader bis = new BufferedReader(new InputStreamReader(new FileInputStream(new File("SpamWords.txt"))));
            while(bis.ready()){
                String ligne = bis.readLine();
                listeChaines.add(ligne);
            }
            String[] tabMots = EmailSplicer.getMessageContent(message);
            for (int i =0;i<tabMots.length;i++){
                /*System.out.print("*************************************");
                System.out.print(tabMots[0]);
                System.out.print("**************************************");*/
                for (int j =0; j<listeChaines.size();j++){
                    this.chaineSpam = Pattern.compile(listeChaines.get(j));
                    this.chaineCourriel = this.chaineSpam.matcher(tabMots[i]);
                    if (this.chaineCourriel.matches())
                        this.nbOccMotSpam++;

                }
            }
        } catch (FileNotFoundException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (MessagingException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    @Override
    public byte getConfidenceLevel() {
        // TODO Auto-generated method stub
        return this.confidenceLevel;
    }

    @Override
    public boolean enabled() {
        // TODO Auto-generated method stub
        return true;
    }
}

and here's the EmailSplicer utility class that i am using:

import java.io.IOException;
import java.util.ArrayList;

import javax.mail.MessagingException;
import javax.mail.Multipart;
import javax.mail.internet.MimeMessage;

/**
 * Utility class to return all the content of a MimeMessage
 * @author Maxime Caumartin <[email protected]>
 */
public class EmailSplicer {

    /**
     * Contains the types of email parts that can be analyzed by this class.
     * @author Maxime Caumartin <[email protected]>
     */
    private enum ContentTypes
    {
        Plain("text/plain"), HTML("text/html"), Multipart("multipart"), Unknown(
                "?");

        private String  type;

        ContentTypes(String type)
        {
            this.type = type;
        }

        public static ContentTypes getType(String type)
        {
            if (type.contains(Plain.type))
                return Plain;
            if (type.contains(HTML.type))
                return HTML;
            if (type.contains(Multipart.type))
                return Multipart;
            return Unknown;
        }

    }

    /**
     * Recursive method that passes through all the parts of the Mutlipart message and returns an ArrayList<String> of the content of these parts.
     * @param multiPartMsg The Multipart that needs to be dissected.
     * @return The ArrayList<String> containing all the content of the Mutlipart message.
     * @throws MessagingException Exception thrown if the analyzer cannot read the message.
     * @throws IOException Exception thrown if the encoding type isn't valid.
     */
    private static ArrayList<String> getMutlipartContent(Multipart multiPartMsg)
            throws MessagingException, IOException
    {
        ArrayList<String> returnTable = new ArrayList<String>(
                multiPartMsg.getCount());

        for (int i = 0; i < multiPartMsg.getCount(); i++)
        {
            switch (ContentTypes.getType(multiPartMsg.getBodyPart(i)
                    .getContentType()))
            {
                case Plain:
                    returnTable.add((String) multiPartMsg.getBodyPart(i)
                            .getContent());
                    break;
                case HTML:
                    String s = org.clapper.util.html.HTMLUtil.textFromHTML((String) multiPartMsg.getBodyPart(i)
                            .getContent()).trim();
                    if (s.length() != 0)
                        returnTable.add(s);
                    break;
                case Multipart:
                    returnTable
                            .addAll(getMutlipartContent((Multipart) multiPartMsg
                                    .getBodyPart(i).getContent()));
                    break;
                default:
            }
        }
        return returnTable;
    }

    /**
     * Returns all the content of the MimeMessage passed as a parameter. The whole content will be parsed.
     * @param message The MimeMessage containing textual information.
     * @return The array of string containing all the strings from the content of the message.
     * @throws MessagingException Exception thrown if the analyzer cannot read the message.
     * @throws IOException Exception thrown if the encoding type isn't valid.
     */
    public static String[] getMessageContent(MimeMessage message)
            throws MessagingException, IOException
    {
        String contentType = message.getContentType();

        switch (ContentTypes.getType(contentType))
        {
            case Plain:
                return new String[] { (String) message.getContent() };
            case Multipart:
                return getMutlipartContent(
                        (Multipart) message.getContent()).toArray(new String[0]);
            case HTML:
                String s = org.clapper.util.html.HTMLUtil.textFromHTML((String) message
                        .getContent()).trim();
                if (s.length() != 0)
                    return new String[] {s};
            default:
                return new String[0];
        }

    }

}

Now when i am executing the main method of the whole app, this the exception that i am getting:

java.io.UnsupportedEncodingException: iso-0621-9
at sun.nio.cs.StreamDecoder.forInputStreamReader(Unknown Source)
at java.io.InputStreamReader.<init>(Unknown Source)
at com.sun.mail.handlers.text_plain.getContent(text_plain.java:82)
at javax.activation.DataSourceDataContentHandler.getContent(Unknown Source)
at javax.activation.DataHandler.getContent(Unknown Source)
at javax.mail.internet.MimeBodyPart.getContent(MimeBodyPart.java:629)
at ca.etsmtl.logti.log619.lab05.utilities.EmailSplicer.getMutlipartContent(EmailSplicer.java:69)
at ca.etsmtl.logti.log619.lab05.utilities.EmailSplicer.getMessageContent(EmailSplicer.java:101)
at ca.etsmtl.logti.log619.lab05.filter.MotsClesFilter.analyze(MotsClesFilter.java:66)
at ca.etsmtl.logti.log619.lab05.filter.MotsClesFilter.checkSpam(MotsClesFilter.java:34)
at ca.etsmtl.logti.log619.lab05.Application.main(Application.java:107)

Can someone enlighten me on how to fix it ?

2 Answers 2

2

ISO-0621-9 is not an encoding, and if it were it is not one that is supported by Java. I'd guess this might even be a good indicator of spam: no valid encoding => spam.

A little googling for ISO 621 shows that ISO-621 is the internation standard for "Manganese ores -- Determination of metallic iron content (metallic iron content not exceeding 2 %) -- Sulphosalicylic acid photometric method"

I'd say that has little to do with computers and even less with encodings ;)

Sign up to request clarification or add additional context in comments.

2 Comments

Where in email splicer can i t catch the exception ?
That depends on how you want handle it. EmailSplicer might not even be the right place.
2

This document lists the supported encodings by Java.

iso-0621-9 is not in the list.

1 Comment

Link seems to be broken, but here is the 1.6 equivalent: docs.oracle.com/javase/6/docs/technotes/guides/intl/…

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.