How do I make Java conform to JavaScript's encodeURIComponent () method?

I am trying to pass these strings in a URL that contains special characters and the only way I can get it to work is with JavaScript encodeURIComponent ("tester ΰΉ† 8 ΰΉ…") which produces "tag % C3% A6% C3% B8% C3% A5 "

Everything I try to do in Java produces different encodings and doesn't work on the other end ... Any idea how I can get a tester ΰΉ† 8 ΰΉ… coded to test% C3% A6% C3% B8% C3% A5 in Java ? Thanks in advance!

package com.mastercard.cp.sdng.domain.user;

import org.apache.commons.lang.StringUtils;

import javax.script.ScriptEngine;
import javax.script.ScriptEngineManager;
import javax.script.ScriptException;
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URLEncoder;

public class UrlEncodingSample
{
    public static void main(String[] args)
    {
        String userId = "dummy";
        try
        {
            validateEncoding(userId);

            userId = "testeræøΓ₯";

            validateEncoding(userId);

            userId = URLEncoder.encode(userId);

            validateEncoding(userId);
        }
        catch (UnsupportedEncodingException e)
        {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }

    }

    private static void validateEncoding(String userId) throws UnsupportedEncodingException
    {
        System.out.println("------ START TESTING WITH USER ID = '"+userId+"' ----------------------");
        System.out.println("Test URLEncoder.encode(userId): " + URLEncoder.encode(userId));
        System.out.println("Test URLEncoder.encode(userId,\"UTF-8\"): " + URLEncoder.encode(userId, "UTF-8"));
        System.out.println("Test URLEncoder.encode(userId,\"UTF-16\"): " + URLEncoder.encode(userId,"UTF-16"));
        System.out.println("Test URLEncoder.encode(userId,\"UTF-16LE\"): " + URLEncoder.encode(userId,"UTF-16LE"));
        System.out.println("Test URLEncoder.encode(userId,\"UTF-16BE\"): " + URLEncoder.encode(userId,"UTF-16BE"));

        ScriptEngine engine = new ScriptEngineManager().getEngineByName("JavaScript");
        try
        {
            System.out.println("Test engine.eval(\"encodeURIComponent(\\\"\"+userId+\"\\\")\"): " +
                    engine.eval("encodeURIComponent(\""+userId+"\")"));
        }
        catch (ScriptException e)
        {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
        System.out.println("Test encodeURIComponent(userId): " + encodeURIComponent(userId));
        try
        {
            System.out.println("TEST new URI(userId).toASCIIString(): " + new URI(userId).toASCIIString());
        }
        catch (URISyntaxException e)
        {
            e.printStackTrace();  //To change body of catch statement use File | Settings | File Templates.
        }
        System.out.println("------ END TESTING WITH USER ID = '"+userId+"' ----------------------\n\n");

    }



    public static String encodeURIComponent(String input) {
        if(StringUtils.isEmpty(input)) {
            return input;
        }

        int l = input.length();
        StringBuilder o = new StringBuilder(l * 3);
        try {
            for (int i = 0; i < l; i++) {
                String e = input.substring(i, i + 1);
                if (ALLOWED_CHARS.indexOf(e) == -1) {
                    byte[] b = e.getBytes("utf-8");
                    o.append(getHex(b));
                    continue;
                }
                o.append(e);
            }
            return o.toString();
        } catch(UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return input;
    }

    private static String getHex(byte buf[]) {
        StringBuilder o = new StringBuilder(buf.length * 3);
        for (int i = 0; i < buf.length; i++) {
            int n = (int) buf[i] & 0xff;
            o.append("%");
            if (n < 0x10) {
                o.append("0");
            }
            o.append(Long.toString(n, 16).toUpperCase());
        }
        return o.toString();
    }

    public static final String ALLOWED_CHARS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_.!~*'()";
}

      

The output of the class above:

    ------ START TESTING WITH USER ID = 'dummy' ----------------------
    Test URLEncoder.encode (userId): dummy
    Test URLEncoder.encode (userId, "UTF-8"): dummy
    Test URLEncoder.encode (userId, "UTF-16"): dummy
    Test URLEncoder.encode (userId, "UTF-16LE"): dummy
    Test URLEncoder.encode (userId, "UTF-16BE"): dummy
    Test engine.eval ("encodeURIComponent (\" "+ userId +" \ ")"): dummy
    Test encodeURIComponent (userId): dummy
    TEST new URI (userId) .toASCIIString (): dummy
    ------ END TESTING WITH USER ID = 'dummy' ----------------------


    ------ START TESTING WITH USER ID = 'tester ΰΉ† ๘ ΰΉ…' ----------------------
    Test URLEncoder.encode (userId): tester% E6% F8% E5
    Test URLEncoder.encode (userId, "UTF-8"): tester% E0% B9% 86% E0% B9% 98% E0% B9% 85
    Test URLEncoder.encode (userId, "UTF-16"): tester% FE% FF% 0E% 46% 0E% 58% 0E% 45
    Test URLEncoder.encode (userId, "UTF-16LE"): tester% 46% 0E% 58% 0E% 45% 0E
    Test URLEncoder.encode (userId, "UTF-16BE"): tester% 0E% 46% 0E% 58% 0E% 45
    Test engine.eval ("encodeURIComponent (\" "+ userId +" \ ")"): tester% e0% b9% 86% e0% b9% 98% e0% b9% 85
    Test encodeURIComponent (userId): tester% E0% B9% 86% E0% B9% 98% E0% B9% 85
    TEST new URI (userId) .toASCIIString (): tester% E0% B9% 86% E0% B9% 98% E0% B9% 85
    ------ END TESTING WITH USER ID = 'tester ΰΉ† ๘ ΰΉ…' ----------------------


    ------ START TESTING WITH USER ID = 'tester% E6% F8% E5' ----------------------
    Test URLEncoder.encode (userId): tester% 25E6% 25F8% 25E5
    Test URLEncoder.encode (userId, "UTF-8"): tester% 25E6% 25F8% 25E5
    Test URLEncoder.encode (userId, "UTF-16"): tester% FE% FF% 00% 25E6% FE% FF% 00% 25F8% FE% FF% 00% 25E5
    Test URLEncoder.encode (userId, "UTF-16LE"): tester% 25% 00E6% 25% 00F8% 25% 00E5
    Test URLEncoder.encode (userId, "UTF-16BE"): tester% 00% 25E6% 00% 25F8% 00% 25E5
    Test engine.eval ("encodeURIComponent (\" "+ userId +" \ ")"): tester% 25E6% 25F8% 25E5
    Test encodeURIComponent (userId): tester% 25E6% 25F8% 25E5
    TEST new URI (userId) .toASCIIString (): tester% E6% F8% E5
    ------ END TESTING WITH USER ID = 'tester% E6% F8% E5' ----------------------

Note. As I was writing this, it occurred to me that I could use URLEncoder.encode (userId, "UTF-8") as long as I used the corresponding decoder on the other side ... but I was still trying to find a way to encode it according to the JavaScript function encodeURIComponent, which seems to work without having to decode it from the other side. :)

+3


source to share


1 answer


According to the Mozilla Developer Docs, encodeURICompoent () uses UTF-8 for encoding. When I run this on your line, I get the% C3% A6% C3% B8% C3% A5 tester as expected. When I run the following Java code:

System.out.println(URLEncoder.encode("testeræøΓ₯", "UTF-8"));

      

It also prints% C3% A6% C3% B8% C3% A5 tester. I also checked your test and got:



    ------ START TESTING WITH USER ID = 'dummy' ----------------------
Test URLEncoder.encode(userId): dummy
Test URLEncoder.encode(userId,"UTF-8"): dummy
Test URLEncoder.encode(userId,"UTF-16"): dummy
Test URLEncoder.encode(userId,"UTF-16LE"): dummy
Test URLEncoder.encode(userId,"UTF-16BE"): dummy
Test engine.eval("encodeURIComponent(\""+userId+"\")"): dummy
Test encodeURIComponent(userId): dummy
TEST new URI(userId).toASCIIString(): dummy
------ END TESTING WITH USER ID = 'dummy' ----------------------


------ START TESTING WITH USER ID = 'testeræøΓ₯' ----------------------
Test URLEncoder.encode(userId): tester%C3%A6%C3%B8%C3%A5
Test URLEncoder.encode(userId,"UTF-8"): tester%C3%A6%C3%B8%C3%A5
Test URLEncoder.encode(userId,"UTF-16"): tester%FE%FF%00%E6%00%F8%00%E5
Test URLEncoder.encode(userId,"UTF-16LE"): tester%E6%00%F8%00%E5%00
Test URLEncoder.encode(userId,"UTF-16BE"): tester%00%E6%00%F8%00%E5
Test engine.eval("encodeURIComponent(\""+userId+"\")"): tester%C3%A6%C3%B8%C3%A5
Test encodeURIComponent(userId): tester%C3%A6%C3%B8%C3%A5
TEST new URI(userId).toASCIIString(): tester%C3%A6%C3%B8%C3%A5
------ END TESTING WITH USER ID = 'testeræøΓ₯' ----------------------


------ START TESTING WITH USER ID = 'tester%C3%A6%C3%B8%C3%A5' ----------------------
Test URLEncoder.encode(userId): tester%25C3%25A6%25C3%25B8%25C3%25A5
Test URLEncoder.encode(userId,"UTF-8"): tester%25C3%25A6%25C3%25B8%25C3%25A5
Test URLEncoder.encode(userId,"UTF-16"): tester%FE%FF%00%25C3%FE%FF%00%25A6%FE%FF%00%25C3%FE%FF%00%25B8%FE%FF%00%25C3%FE%FF%00%25A5
Test URLEncoder.encode(userId,"UTF-16LE"): tester%25%00C3%25%00A6%25%00C3%25%00B8%25%00C3%25%00A5
Test URLEncoder.encode(userId,"UTF-16BE"): tester%00%25C3%00%25A6%00%25C3%00%25B8%00%25C3%00%25A5
Test engine.eval("encodeURIComponent(\""+userId+"\")"): tester%25C3%25A6%25C3%25B8%25C3%25A5
Test encodeURIComponent(userId): tester%25C3%25A6%25C3%25B8%25C3%25A5
TEST new URI(userId).toASCIIString(): tester%C3%A6%C3%B8%C3%A5
------ END TESTING WITH USER ID = 'tester%C3%A6%C3%B8%C3%A5' ----------------------

      

This is what I would expect.

I think you need to check the file encoding for the Java source file. If you are using Eclipse the default is cp1252. The first thing I do when I install Eclipse is change the default encoding to UTF-8.

+4


source







All Articles