How can I create a java.net.URI with a query parameter that contains an ampersand in the value

75 Views Asked by At

This is Clojure but hopefully you can read it as a Java developer:

I am using this constructor to construct a URI.

When I have a foo query parameter with spaces and ampersand in the value, I can do this:

(new java.net.URI "https" "foobar.net" "/" "foo= hello&bye " nil )

but this creates actually two parameters since the ampersand isn't escaped:

#object[java.net.URI 0x3b809711 "https://foobar.net/?foo=%20hello&bye%20"]

If I escape the ampersand manually:

user=> (new java.net.URI "https" "foobar.net" "/" "foo= hello%26bye " nil )
#object[java.net.URI 0x5c84624f "https://foobar.net/?foo=%20hello%2526bye%20"]

you see it double-escapes my escaped ampersand. What to do here?

2

There are 2 best solutions below

7
majusebetter On BEST ANSWER

What you're trying to achieve cannot work using this constructor, since it only encodes characters which are not allowed in a URI. The % is reserved for escaping characters, so it must be escaped to %25. On the other hand, the & is a valid character used as the delimiter for query parameters and thus must not be escaped by this constructor.

In Java I would do something like

URI.create("https://foobar.net/?foo=" + URLEncoder.encode(" Hello&bye", "UTF-8"));

or

new URI("https://foobar.net/?foo=" + URLEncoder.encode("Hello&bye", "UTF-8"));

respectively, which only escapes invalid characters in the value of the foo query parameter.

0
VGR On

Unfortunately, the URI class cannot handle this properly. There is even an open bug for it.

If your program is a Java EE application or Java EE library, you can make use of the UriBuilder class, which builds URIs correctly, including query parameters.

Here’s what I am currently using:

import java.net.URI;
import java.net.URISyntaxException;

import java.io.Serializable;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;

import java.util.Formatter;
import java.util.Collection;
import java.util.Collections;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.LinkedHashMap;
import java.util.Objects;
import java.util.Arrays;

/**
 * Query portion of a URI, separated into names and values.
 * Unlike the {@code java.net.URI} class, this class properly
 * encodes names and values;  that is, {@code '&'} and {@code '='}
 * will be URI-encoded, in addition to all other characters which are
 * not in the “pchar” production of RFC 3986, Appendix A.
 * <p>
 * Unless otherwise specified, null values are not permitted as
 * arguments to any constructor or method, and will result in a
 * runtime exception.  Null query parameter names and null query
 * parameter values are not permitted.
 *
 * <p>
 * A simple example:
 * <pre>
 * URI uri = new URI("http://example.com/app" + 
 *     new URIQuery("user", "james", "password", "insecure?123"));
 * </pre>
 *
 * <p>
 * An example of adding a query parameter to an existing URI:
 * <pre>
 * URI uri = /* ... *&#x2f;;
 * URIQuery query = URIQuery.of(uri);
 * query.add("timestamp", String.valueOf(System.currentTimeMillis()));
 * uri = query.replaceQueryOf(uri);
 * </pre>
 *
 * @see <a href="https://www.rfc-editor.org/rfc/rfc3986">RFC 3986:
 *      Uniform Resource Identifier (URI): Generic Syntax</a>
 */
public final class URIQuery
implements Serializable {
    /** Version of serialized form. */
    private static final long serialVersionUID = 1;

    // SuppressWarnings is needed because compiler warns that the declared type
    // is not serializable, even though its value always is.
    /** All query parameter names and values. */
    @SuppressWarnings("serial")
    private final Map<String, List<String>> params = new LinkedHashMap<>();

    /**
     * Creates a new instance.  Arguments must be name-value pairs.
     * For example, {@code new URIQuery("name", "value1", "name", "value2", "anotherName", "value3");}
     *
     * @param namesAndValues pairs of names and values
     *
     * @throws IllegalArgumentException if any argument is null
     * @throws IllegalArgumentException if odd number of arguments is supplied
     */
    public URIQuery(String... namesAndValues) {
        Objects.requireNonNull(namesAndValues, "Argument cannot be null.");
        if (namesAndValues.length % 2 != 0) {
            throw new IllegalArgumentException(
                "Name without value specified:"
                + " an even number of arguments is required.");
        }
        if (Arrays.stream(namesAndValues).anyMatch(Objects::isNull)) {
            throw new IllegalArgumentException(
                "Null name or value not permitted.");
        }
        for (int i = 0; i < namesAndValues.length; i += 2) {
            add(namesAndValues[i], namesAndValues[i + 1]);
        }
    }

    /**
     * Copy constructor which makes a new instance with copies of values
     * from another instance.
     *
     * @param other instance whose names and values will be copied
     */
    public URIQuery(URIQuery other) {
        Objects.requireNonNull(other, "Argument cannot be null.");
        for (Map.Entry<String, List<String>> entry : other.params.entrySet()) {
            this.params.put(entry.getKey(), new ArrayList<>(entry.getValue()));
        }
    }

    /**
     * Two instances are equal if they are both URIQuery objects
     * which have the same names and the same values in the same order.
     *
     * @param obj argument to check for equality;  may be null
     *
     * @return true if argument is a URIQuery with the same names and values,
     *         false otherwise
     */
    @Override
    public boolean equals(Object obj) {
        return (obj instanceof URIQuery other &&
            this.params.equals(other.params));
    }

    /**
     * Generates a hash code based on this query's names and values.
     *
     * @return numeric hash code for this instance
     */
    @Override
    public int hashCode() {
        return params.hashCode();
    }

    /**
     * Returns a string describing this object's data.
     *
     * @return textual representation of this object
     */
    public String paramString() {
        return getClass().getName() + params;
    }

    /**
     * Returns a raw query identical to the text returned by
     * {@link #toRawQueryString()}.
     *
     * @return this object as a query string
     */
    @Override
    public String toString() {
        return toRawQueryString();
    }

    /**
     * Returns this object's names and values, in order, as a fully escaped
     * URI query component.
     *
     * @return this object's data as a query string
     */
    public String toRawQueryString() {
        Formatter query = new Formatter();
        appendTo(query);
        return query.toString();
    }

    /**
     * Returns all names present in this query.
     *
     * @return new list of this object's names
     */
    public List<String> names() {
        return new ArrayList<>(params.keySet());
    }

    /**
     * Returns all values for the specified name.  If no values are
     * present for the specified name, an empty list is returned.
     *
     * @param paramName name of query parameter whose values are desired
     *
     * @return new, possibly empty list containing all values
     *         associated with the specified name
     */
    public List<String> get(String paramName) {
        Objects.requireNonNull(paramName, "Name cannot be null.");

        List<String> values = params.get(paramName);
        if (values != null) {
            return new ArrayList<>(values);
        } else {
            return new ArrayList<>();
        }
    }

    /**
     * Replaces the values for a specified name.
     *
     * @param paramName name of query parameter whose values will be replaced
     * @param values new values to associate with specified name
     */
    public void set(String paramName,
                    Collection<String> values) {

        Objects.requireNonNull(paramName, "Name cannot be null.");
        Objects.requireNonNull(values, "Value list cannot be null.");
        if (values.stream().anyMatch(Objects::isNull)) {
            throw new IllegalArgumentException("Null value not permitted.");
        }

        if (values.isEmpty()) {
            params.remove(paramName);
        } else {
            params.put(paramName, new ArrayList<>(values));
        }
    }

    /** Removes all names and values. */
    public void clear() {
        params.clear();
    }

    /**
     * Deletes all values for a specific name.
     *
     * @param paramName name whose values will be deleted
     */
    public void removeAll(String paramName) {
        Objects.requireNonNull(paramName, "Name cannot be null.");
        params.remove(paramName);
    }

    /**
     * Deletes a single value from a particular name.  If the value
     * is not associated with the specified name, no action is taken.
     *
     * @param paramName name whose value will be deleted
     * @param value value to delete
     */
    public void remove(String paramName,
                       String value) {

        Objects.requireNonNull(paramName, "Name cannot be null.");
        Objects.requireNonNull(value, "Value cannot be null.");

        List<String> values = params.get(paramName);
        if (values != null) {
            values.remove(value);

            if (values.isEmpty()) {
                params.remove(paramName);
            }
        }
    }

    /**
     * Adds values to the existing values for a name.
     *
     * @param paramName name with which the specified values will be associated
     * @param valuesToAdd new values to add to specified name
     */
    public void add(String paramName,
                    String... valuesToAdd) {

        Objects.requireNonNull(paramName, "Name cannot be null.");
        Objects.requireNonNull(valuesToAdd, "Value list cannot be null.");

        if (Arrays.stream(valuesToAdd).anyMatch(Objects::isNull)) {
            throw new IllegalArgumentException("Null value not permitted.");
        }
        List<String> values =
            params.computeIfAbsent(paramName, k -> new ArrayList<>());
        Collections.addAll(values, valuesToAdd);
    }

    /**
     * Adds a list of values to the existing values for a name.
     *
     * @param paramName name with which the specified values will be associated
     * @param valuesToAdd new values to add to specified name
     */
    public void add(String paramName,
                    Collection<String> valuesToAdd) {

        Objects.requireNonNull(paramName, "Name cannot be null.");
        Objects.requireNonNull(valuesToAdd, "Value list cannot be null.");

        if (valuesToAdd.stream().anyMatch(Objects::isNull)) {
            throw new IllegalArgumentException("Null value not permitted.");
        }
        List<String> values =
            params.computeIfAbsent(paramName, k -> new ArrayList<>());
        values.addAll(valuesToAdd);
    }

    /**
     * Obtains the hexadecimal value of a character.  Throws an exception
     * if the character is not a hex digit.
     *
     * @param index index in string of character to convert to a number
     * @param s string whose character will be converted
     *
     * @return value from 0 to 15 inclusive
     *
     * @throws IllegalArgumentException if specified character is not
     *                                  a hex character
     * @throws IndexOutOfBoundsException if index is not valid for
     *                                   specified string
     */
    private static int hexDigitAt(int index,
                                  CharSequence s) {
        char c = s.charAt(index);
        int digit = Character.digit(c, 16);
        if (digit < 0) {
            throw new IllegalArgumentException(
                String.format(
                    "Character '%s' (\\u%04x) at position %d " +
                    "is not a hex digit.", c, (int) c, index));
        }
        return digit;
    }

    /**
     * Decodes a URI as a String.  Unlike the URI class, allows even
     * a name or value which itself conforms to the syntax of a URI
     * to be treated as a single value.
     *
     * @param s string to decode
     *
     * @return URI-decoded string
     *
     * @throws URISyntaxException if string contains invalid UTF-8 sequence
     */
    private static String uriDecode(CharSequence s)
    throws URISyntaxException {
        // Can't use this because s might itself be a URI with a scheme.
        //return URI.create(s).getSchemeSpecificPart();
        StringBuilder decoded = new StringBuilder();
        ByteBuffer utf8Seq = ByteBuffer.allocate(4);
        utf8Seq.limit(0);

        int len = s.length();
        for (int i = 0; i < len; i++) {
            char c = s.charAt(i);
            if (c == '%') {
                if (i > len - 3) {
                    throw new URISyntaxException(s.toString(),
                        "Malformed URI: '%' at position " + i +
                        " does not have two characters after it.", i);
                }
                int high = hexDigitAt(++i, s);
                int low = hexDigitAt(++i, s);
                byte b = (byte) ((high << 4) | low);

                if (b >= 0) {
                    if (utf8Seq.position() > 0) {
                        throw new URISyntaxException(s.toString(),
                            "Unfinished UTF-8 sequence at position " + i, i);
                    }
                    decoded.append((char) b);
                } else if (b >= (byte) 0b11000000) {
                    if (utf8Seq.position() > 0) {
                        throw new URISyntaxException(s.toString(),
                            "Unfinished UTF-8 sequence at position " + i, i);
                    }
                    utf8Seq.clear();
                    if (b >= (byte) 0b11110000) {
                        utf8Seq.limit(4);
                    } else if (b >= (byte) 0b11100000) {
                        utf8Seq.limit(3);
                    } else {
                        utf8Seq.limit(2);
                    }
                    utf8Seq.put(b);
                } else {
                    if (!utf8Seq.hasRemaining()) {
                        throw new URISyntaxException(s.toString(),
                            String.format(
                                "Found unexpected UTF-8 tail byte %%%02x " +
                                "at position %d", b, i - 2), i - 2);
                    }

                    utf8Seq.put(b);
                    if (!utf8Seq.hasRemaining()) {
                        utf8Seq.flip();
                        decoded.append(StandardCharsets.UTF_8.decode(utf8Seq));
                        utf8Seq.clear();
                        utf8Seq.limit(0);
                    }
                }
            } else {
                if (utf8Seq.position() > 0) {
                    throw new URISyntaxException(s.toString(),
                        "Unfinished UTF-8 sequence at position " + i, i);
                }
                decoded.append(c);
            }
        }

        if (utf8Seq.hasRemaining()) {
            throw new URISyntaxException(s.toString(),
                "Unfinished UTF-8 sequence \"" + s + "\"", s.length());
        }

        return decoded.toString();
    }

    /**
     * Creates a new instance from the query portion of a
     * {@code java.net.URI} object.  An empty instance (that is,
     * an instance with no names or values) will be returned
     * if the URI argument has no query.
     *
     * @param uri uri whose query will be parsed
     *
     * @return new {@code URIQuery} instance, possibly empty
     *
     * @throws URISyntaxException if URI contains invalid UTF-8 sequence
     */
    public static URIQuery of(URI uri)
    throws URISyntaxException {
        URIQuery q = new URIQuery();
        String pairs = uri.getRawQuery();
        if (pairs != null) {
            for (String pair : pairs.split("&")) {
                String[] nameAndValue = pair.split("=", 2);
                String name = uriDecode(nameAndValue[0]);
                String value;
                if (nameAndValue.length > 1) {
                    value = uriDecode(nameAndValue[1]);
                } else {
                    value = "";
                }
                q.params.computeIfAbsent(name,
                    k -> new ArrayList<>()).add(value);
            }
        }
        return q;
    }

    /**
     * URI-encodes a query name or value.
     *
     * @param s name or value to encode
     * @param uri destination to which encoded name/value will be appended
     */
    private static void encode(String s,
                               Formatter uri) {

        for (byte b : s.getBytes(StandardCharsets.UTF_8)) {
            boolean unsafe = (b <= 32 || b >= 127 ||
                "\"#%&/<>=?[]\\`^{}|".indexOf(b) >= 0);
            uri.format(unsafe ? "%%%02x" : "%c", b);
        }
    }

    /**
     * Adds a fully escaped query string based on this object's data
     * to the specified Formatter.
     *
     * @param uri destination to which this object's names and values
     *            will be appended
     *
     * @return Formatter argument
     */
    private Formatter appendTo(Formatter uri) {
        String sep = "";
        for (Map.Entry<String, List<String>> entry : params.entrySet()) {

            String name = entry.getKey().toString();
            List<String> values = entry.getValue();

            for (String value : values) {
                uri.format(sep);
                encode(name, uri);
                uri.format("=");
                encode(value, uri);
                sep = "&";
            }
        }

        return uri;
    }

    /**
     * Creates a new URI with the same components as the specified URI,
     * except that its query is replaced with this object's data.
     *
     * @param uri uri whose query will be replaced
     *
     * @return new URI instance whose query contains only this object's
     *         names and values
     */
    public URI replaceQueryOf(URI uri) {
        String scheme = uri.getScheme();
        String authority = uri.getAuthority();
        String path = uri.getPath();

        // Create with no query or fragment, initially.
        Formatter newURI = new Formatter();
        try {
            newURI.format("%s", 
                new URI(scheme, authority, path, null, null).toASCIIString());
        } catch (URISyntaxException e) {
            // Since the parts came from an existing URI instance, 
            // we should never get here.
            throw new RuntimeException(e);
        }

        if (!params.isEmpty()) {
            newURI.format("?");
            appendTo(newURI);
        }

        String fragment = uri.getRawFragment();
        if (fragment != null) {
            newURI.format("#%s", fragment);
        }

        return URI.create(newURI.toString());
    }

    /**
     * Tests this class.
     *
     * @param args command-line arguments, each of which will be treated
     *             as a URI whose query will be decoded
     *
     * @throws URISyntaxException if any argument is not a valid URI
     */
    public static void main(String[] args)
    throws URISyntaxException {
        for (String arg : args) {
            URI uri = URI.create(arg);
            System.out.println(URIQuery.of(uri));
        }

        URI uri = URI.create("https://www.example.com/Search");
        URIQuery query = new URIQuery();
        query.set("HideInStock", List.of("false"));
        query.set("HidePreorder", List.of("false"));
        query.set("HideSoldOut", List.of("true"));
        query.set("InventoryStatus", List.of("i,p",
                                             "i%p",
                                             "i#p",
                                             "i<p",
                                             "i:p",
                                             "i[p",
                                             "i{p",
                                             "i|p",
                                             "i\\p",
                                             "i^p",
                                             "i`p",
                                             "i\"p",
                                             "i$p"));
        query.set("Brand", List.of("16675"));
        query.set("PageSize", List.of("50"));
        query.set("SortOrder", List.of("New"));
        query.set("Company", List.of("311"));
        query.set("utm_source", List.of("retail_news"));
        query.set("utm_medium", List.of("email"));
        query.set("utm_campaign", List.of("Enzyklop\u00e4die"));
        query.set("utm_content", List.of(""));
        uri = query.replaceQueryOf(uri);
        System.out.println(uri);
    }
}