2

I have a String such as:

Cerepedia, una apliación web

I would like to transform it into something URL valid such as:

Cerepedia,unaaplicacionweb

Note: the special character transformation and spaces removal.

By the way, are commas allowed in URLs?

5 Answers 5

2

Have you looked at URLEncoder? That seems to do what you need it to do. Though the special characters will be transformed to escaped entities and not stripped from their "special" properties.

Sign up to request clarification or add additional context in comments.

Comments

2

Try convertNonAscii() in the class below

public class AsciiUtils {

    /**
     * Contains a list of all the characters that map one to one for UNICODE.
     */
    private static final String PLAIN_ASCII = 
              "AaEeIiOoUu"    // grave
            + "AaEeIiOoUuYy"  // acute
            + "AaEeIiOoUuYy"  // circumflex
            + "AaEeIiOoUuYy"  // tilde
            + "AaEeIiOoUuYy"  // umlaut
            + "Aa"            // ring
            + "Cc"            // cedilla
            + "Nn"            // n tilde (spanish)
            ;

    /**
     * Actual accented values, corresponds one to one with ASCII
     */
    private static final String UNICODE =
         "\u00C0\u00E0\u00C8\u00E8\u00CC\u00EC\u00D2\u00F2\u00D9\u00F9"             
        +"\u00C1\u00E1\u00C9\u00E9\u00CD\u00ED\u00D3\u00F3\u00DA\u00FA\u00DD\u00FD" 
        +"\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177" 
        +"\u00C2\u00E2\u00CA\u00EA\u00CE\u00EE\u00D4\u00F4\u00DB\u00FB\u0176\u0177" 
        +"\u00C4\u00E4\u00CB\u00EB\u00CF\u00EF\u00D6\u00F6\u00DC\u00FC\u0178\u00FF" 
        +"\u00C5\u00E5"                                                             
        +"\u00C7\u00E7"  
        +"\u00D1\u00F1"
     ;

    // private constructor, can't be instanciated!
    private AsciiUtils() {      
    }


    /**
     * Removes accentued from a string and replace with ascii equivalent
     * @param s The string to englishify
     * @return The string without the french and spanish stuff.
     */
    public static String convertNonAscii(String s) {

        StringBuilder b = new StringBuilder();

        int n = s.length();
        for (int i = 0; i < n; i++) {
            char c = s.charAt(i);
            int pos = UNICODE.indexOf(c);
            if (pos > -1) {
              b.append(PLAIN_ASCII.charAt(pos));
            } else {
              b.append(c);
            }
        }

       return b.toString();

    }

}

1 Comment

It is working if the string is in source code, However, it is not working if the string is retrieved from a UTF-8 encoded file.
0

URLEncoder subsitutes spaces with +. The Asccii class posted by Don does not remove spaces but the next function can be used for that propouse:

public static String removeSpaces(String s) {
    StringTokenizer st = new StringTokenizer(s," ",false);
    String t="";
    while (st.hasMoreElements()) t += st.nextElement();
        return t;
}

Comments

0

Note Don solution works with strings in code but does not work with strings coming from a file with UTF-8 encoding

This is the best solution i have, using URLEncode and escaping the hexadecimal characters afterwards:

String s = "Cerepedia, una apliación web";
String ENCODING= "uft-8";
String encoded_s = URLEncoder.encode(s,ENCODING); // Cerepedia+una+aplicaci%C3%83%C2%B3n+web
String s_hexa_free = EncodingTableUtils.replaceHexa(,ENCODING)); //  Cerepedia+una+aplicacion+web

EncodingTableUtils

import java.util.HashMap;
import java.util.Iterator;
import java.util.Set;

public class EncodingTableUtils {
    public final static HashMap iso88591 = new HashMap();
    static {
        iso88591.put("%C3%A1", "a"); // á
        iso88591.put("%C3%81", "A"); // Á
        iso88591.put("%C3%A9", "e"); // é
        iso88591.put("%C3%89", "E"); // É
        iso88591.put("%C3%AD", "i"); // í
        iso88591.put("%C3%8D", "I"); // Í
        iso88591.put("%C3%93", "O"); // Ó
        iso88591.put("%C3%B3", "o"); // ó
        iso88591.put("%C3%BA", "u"); // ú
        iso88591.put("%C3%9A", "U"); // Ú
        iso88591.put("%C3%91", "N"); // Ñ
        iso88591.put("%C3%B1", "n"); // ñ
    }
    public final static HashMap utf8 = new HashMap();
    static {
        utf8.put("%C3%83%C2%A1", "a"); // á
        utf8.put("%C3%83%EF%BF", "A"); // Á
        utf8.put("%BD%C3%83%C2", "e"); // é
        utf8.put("%A9%C3%83%E2", "E"); // É
        utf8.put("%80%B0%C3%83", "i"); // í
        utf8.put("%C2%AD%C3%83", "I"); // Í
        utf8.put("%EF%BF%BD%C3", "O"); // Ó
        utf8.put("%C3%83%C2%B3", "o"); // ó
        utf8.put("%83%E2%80%9C", "u"); // ú     
        utf8.put("%C3%83%C2%BA", "U"); // Ú
        utf8.put("%C3%83%C5%A1", "N"); // Ñ
        utf8.put("%C3%83%E2%80", "n"); // ñ
    }

    public final static HashMap enc_table = new HashMap();
    static {
        enc_table.put("iso-8859-1", iso88591);
        enc_table.put("utf-8", utf8);
    }


    /**
     * Replace Hexadecimal characters with equivalent english not special ones
     * <p>Example: á Hexa: %C3%A1 gets replaced with a</p>
     * @param s Usually a string coming from URLEncode.encode
     * @param enc Encoding UTF-8 or ISO-8850-1
     */
    public static String convertHexaDecimal(String s, String enc) {
        HashMap characters = (HashMap) enc_table.get(enc.toLowerCase());
        if(characters==null) return "";
        Set keys = characters.keySet();
        Iterator it = keys.iterator();
        while(it.hasNext()) {
            String key = (String) it.next();
            String regex = EscapeChars.forRegex(key);
            String replacement = (String) characters.get(key); 
            s = s.replaceAll(regex, replacement);           
        }
        return s;
    }
}

EscapeChars Class

public final class EscapeChars {
/**
  * Replace characters having special meaning in regular expressions
  * with their escaped equivalents, preceded by a '\' character.
  *
  * <P>The escaped characters include :
  *<ul>
  *<li>.
  *<li>\
  *<li>?, * , and +
  *<li>&
  *<li>:
  *<li>{ and }
  *<li>[ and ]
  *<li>( and )
  *<li>^ and $
  *</ul>
  */
  public static String forRegex(String aRegexFragment){
    final StringBuilder result = new StringBuilder();

    final StringCharacterIterator iterator = new StringCharacterIterator(aRegexFragment);
    char character =  iterator.current();
    while (character != CharacterIterator.DONE ){
      /*
      * All literals need to have backslashes doubled.
      */
      if (character == '.') {
        result.append("\\.");
      }
      else if (character == '\\') {
        result.append("\\\\");
      }
      else if (character == '?') {
        result.append("\\?");
      }
      else if (character == '*') {
        result.append("\\*");
      }
      else if (character == '+') {
        result.append("\\+");
      }
      else if (character == '&') {
        result.append("\\&");
      }
      else if (character == ':') {
        result.append("\\:");
      }
      else if (character == '{') {
        result.append("\\{");
      }
      else if (character == '}') {
        result.append("\\}");
      }
      else if (character == '[') {
        result.append("\\[");
      }
      else if (character == ']') {
        result.append("\\]");
      }
      else if (character == '(') {
        result.append("\\(");
      }
      else if (character == ')') {
        result.append("\\)");
      }
      else if (character == '^') {
        result.append("\\^");
      }
      else if (character == '$') {
        result.append("\\$");
      }
      else {
        //the char is not a special one
        //add it to the result as is
        result.append(character);
      }
      character = iterator.next();
    }
    return result.toString();
  }
}

Comments

-1

Try this code

 public class Test {

    public static void main(final String[] args) {
        String str = "Cerepedia, una apliación web";
        String[] parts = str.split(" ");
        int sum=0;
        for (int i=0;i<=parts.length-1;i++) {
            sum = sum+parts[i].length();
        }

        int k=0;
        char[] url = new char[25];
        for (int i=0;i<=parts.length-1;i++) {
             char[] temp = parts[i].toCharArray();


             for(int j=0;j<temp.length;j++){

                 url[k]=temp[j];
                 k++;
             }

        }
        System.out.println(url);

    }
}

1 Comment

This prints Cerepedia,unaapliaciónweb, which is not what was asked for.

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.