URL encoding

You are encouraged to solve this task according to the task description, using any language you may know.
- Task
Provide a function or mechanism to convert a provided string into URL encoding representation.
In URL encoding, special characters, control characters and extended characters are converted into a percent symbol followed by a two digit hexadecimal code, So a space character encodes into %20 within the string.
For the purposes of this task, every character except 0-9, A-Z and a-z requires conversion, so the following characters all require conversion by default:
- ASCII control codes (Character ranges 00-1F hex (0-31 decimal) and 7F (127 decimal).
- ASCII symbols (Character ranges 32-47 decimal (20-2F hex))
- ASCII symbols (Character ranges 58-64 decimal (3A-40 hex))
- ASCII symbols (Character ranges 91-96 decimal (5B-60 hex))
- ASCII symbols (Character ranges 123-126 decimal (7B-7E hex))
- Extended characters with character codes of 128 decimal (80 hex) and above.
- Example
The string "http://foo bar/
" would be encoded as "http%3A%2F%2Ffoo%20bar%2F
".
- Variations
- Lowercase escapes are legal, as in "
http%3a%2f%2ffoo%20bar%2f
". - Special characters have different encodings for different standards:
- RFC 3986, Uniform Resource Identifier (URI): Generic Syntax, section 2.3, says to preserve "-._~".
- HTML 5, section 4.10.22.5 URL-encoded form data, says to preserve "-._*", and to encode space " " to "+".
- encodeURI function in Javascript will preserve "-._~" (RFC 3986) and ";,/?:@&=+$!*'()#".
- Options
It is permissible to use an exception string (containing a set of symbols that do not need to be converted). However, this is an optional feature and is not a requirement of this task.
- Related tasks
11l
F url_encode(s)
V r = ββ
V buf = ββ
F flush_buf() // this function is needed because strings in 11l are UTF-16 encoded
I @buf != ββ
V bytes = @buf.encode(βutf-8β)
L(b) bytes
@r ββ= β%βhex(b).zfill(2)
@buf = ββ
L(c) s
I c C (β0β..β9β, βaβ..βzβ, βAβ..βZβ, β_β, β.β, β-β, β~β)
flush_buf()
r ββ= c
E
buf ββ= c
flush_buf()
R r
print(url_encode(βhttp://foo bar/β))
print(url_encode(βhttps://ru.wikipedia.org/wiki/Π’ΡΠ°Π½ΡΠΏΠ°ΠΉΠ»Π΅Ρβ))
- Output:
http%3A%2F%2Ffoo%20bar%2F https%3A%2F%2Fru.wikipedia.org%2Fwiki%2F%D0%A2%D1%80%D0%B0%D0%BD%D1%81%D0%BF%D0%B0%D0%B9%D0%BB%D0%B5%D1%80
Action!
BYTE FUNC MustEncode(CHAR c CHAR ARRAY ex)
BYTE i
IF c>='a AND c<='z OR c>='A AND c<='Z OR c>='0 AND c<='9 THEN
RETURN (0)
FI
IF ex(0)>0 THEN
FOR i=1 TO ex(0)
DO
IF ex(i)=c THEN
RETURN (0)
FI
OD
FI
RETURN (1)
PROC Append(CHAR ARRAY s CHAR c)
s(0)==+1
s(s(0))=c
RETURN
PROC Encode(CHAR ARRAY in,out,ex BYTE spaceToPlus)
CHAR ARRAY hex=['0 '1 '2 '3 '4 '5 '6 '7 '8 '9 'A 'B 'C 'D 'E 'F]
BYTE i
CHAR c
out(0)=0
FOR i=1 TO in(0)
DO
c=in(i)
IF spaceToPlus=1 AND c=32 THEN
Append(out,'+)
ELSEIF MustEncode(c,ex) THEN
Append(out,'%)
Append(out,hex(c RSH 4))
Append(out,hex(c&$0F))
ELSE
Append(out,c)
FI
OD
RETURN
PROC EncodeRaw(CHAR ARRAY in,out)
Encode(in,out,"",0)
RETURN
PROC EncodeRFC3986(CHAR ARRAY in,out)
Encode(in,out,"-._~",0)
RETURN
PROC EncodeHTML5(CHAR ARRAY in,out)
Encode(in,out,"-._*",1)
RETURN
PROC PrintInv(CHAR ARRAY a)
BYTE i
IF a(0)>0 THEN
FOR i=1 TO a(0)
DO
Put(a(i)%$80)
OD
FI
RETURN
PROC Test(CHAR ARRAY in)
CHAR ARRAY out(256)
PrintInv("input ")
PrintF(" %S%E",in)
EncodeRaw(in,out)
PrintInv("encoded ")
PrintF(" %S%E",out)
EncodeRFC3986(in,out)
PrintInv("RFC 3986")
PrintF(" %S%E",out)
EncodeHTML5(in,out)
PrintInv("HTML 5 ")
PrintF(" %S%E%E",out)
RETURN
PROC Main()
Test("http://foo bar/")
Test("http://www.rosettacode.org/wiki/URL_encoding")
Test("http://foo bar/*_-.html")
RETURN
- Output:
Screenshot from Atari 8-bit computer
input http://foo bar/ encoded http%3A%2F%2Ffoo%20bar%2F RFC 3986 http%3A%2F%2Ffoo%20bar%2F HTML 5 http%3A%2F%2Ffoo+bar%2F input http://www.rosettacode.org/wiki/URL_encoding encoded http%3A%2F%2Fwww%2Erosettacode%2Eorg%2Fwiki%2FURL%5Fencoding RFC 3986 http%3A%2F%2Fwww.rosettacode.org%2Fwiki%2FURL_encoding HTML 5 http%3A%2F%2Fwww.rosettacode.org%2Fwiki%2FURL_encoding input http://foo bar/*_-.html encoded http%3A%2F%2Ffoo%20bar%2F%2A%5F%2D%2Ehtml RFC 3986 http%3A%2F%2Ffoo%20bar%2F%2A_-.html HTML 5 http%3A%2F%2Ffoo+bar%2F*_-.html
Ada
with AWS.URL;
with Ada.Text_IO; use Ada.Text_IO;
procedure Encode is
Normal : constant String := "http://foo bar/";
begin
Put_Line (AWS.URL.Encode (Normal));
end Encode;
- Output:
http%3A%2F%2Ffoo%20bar%2F
ALGOL 68
BEGIN
# encodes the specified url - 0-9, A-Z and a-z are unchanged, #
# everything else is converted to %xx where xx are hex-digits #
PROC encode url = ( STRING url )STRING:
IF url = "" THEN "" # empty string #
ELSE
# non-empty string #
# ensure result will be big enough for a string of all encodable #
# characters #
STRING hex digits = "0123456789ABCDEF";
[ 1 : ( ( UPB url - LWB url ) + 1 ) * 3 ]CHAR result;
INT r pos := 0;
FOR u pos FROM LWB url TO UPB url DO
CHAR c = url[ u pos ];
IF ( c >= "0" AND c <= "9" )
OR ( c >= "A" AND c <= "Z" )
OR ( c >= "a" AND c <= "z" )
THEN
# no need to encode this character #
result[ r pos +:= 1 ] := c
ELSE
# must encode #
INT c code = ABS c;
result[ r pos +:= 1 ] := "%";
result[ r pos +:= 1 ] := hex digits[ ( c code OVER 16 ) + 1 ];
result[ r pos +:= 1 ] := hex digits[ ( c code MOD 16 ) + 1 ]
FI
OD;
result[ 1 : r pos ]
FI; # encode url #
# task test case #
print( ( encode url( "http://foo bar/" ), newline ) )
END
- Output:
http%3A%2F%2Ffoo%20bar%2F
Apex
EncodingUtil.urlEncode('http://foo bar/', 'UTF-8')
http%3A%2F%2Ffoo+bar%2F
AppleScript
AST URL encode "http://foo bar/"
- Output:
"http%3A%2F%2Ffoo%20bar%2F"
Applesoft BASIC
100 URL$ = "http://foo bar/"
110 GOSUB 140"URL ENCODE URL$ RETURNS R$
120 PRINT R$;
130 END
140 LET R$ = ""
150 LET L = LEN (URL$)
160 IF NOT L THEN RETURN
170 LET H$ = "0123456789ABCDEF"
180 FOR I = 1 TO L
190 LET C$ = MID$ (URL$,I,1)
200 LET C = ASC (C$)
210 IF C < ASC ("0") OR C > ASC ("Z") + 32 OR C > ASC ("9") AND C < ASC ("A") OR C > ASC ("Z") AND C < ASC ("A") + 32 THEN H = INT (C / 16):C$ = "%" + MID$ (H$,H + 1,1) + MID$ (H$,C - H * 16 + 1,1)
220 LET R$ = R$ + C$
230 NEXT I
240 RETURN
Arturo
encoded: encode.url.slashes "http://foo bar/"
print encoded
- Output:
http%3A%2F%2Ffoo+bar%2F
AutoHotkey
MsgBox, % UriEncode("http://foo bar/")
; Modified from http://goo.gl/0a0iJq
UriEncode(Uri, Reserved:="!#$&'()*+,/:;=?@[]") {
Unreserved := "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~"
VarSetCapacity(Var, StrPut(Uri, "UTF-8"), 0)
StrPut(Uri, &Var, "UTF-8")
While (Code := NumGet(Var, A_Index - 1, "UChar")) {
If InStr(Unreserved . Reserved, Chr(Code)) {
Encoded .= Chr(Code)
}
Else {
Encoded .= Format("%{:02X}", Code)
}
}
Return Encoded
}
AWK
BEGIN {
for (i = 0; i <= 255; i++)
ord[sprintf("%c", i)] = i
}
# Encode string with application/x-www-form-urlencoded escapes.
function escape(str, c, len, res) {
len = length(str)
res = ""
for (i = 1; i <= len; i++) {
c = substr(str, i, 1);
if (c ~ /[0-9A-Za-z]/)
#if (c ~ /[-._*0-9A-Za-z]/)
res = res c
#else if (c == " ")
# res = res "+"
else
res = res "%" sprintf("%02X", ord[c])
}
return res
}
# Escape every line of input.
{ print escape($0) }
The array ord[]
uses idea from Character codes#AWK.
To follow the rules for HTML 5, uncomment the two lines that convert " " to "+", and use the regular expression that preserves "-._*".
Bash
urlencode() {
local LC_ALL=C # support unicode: loop bytes, not characters
local c i n=${#1}
for (( i=0; i<n; i++ )); do
c="${1:i:1}"
case "$c" in
#' ') printf '+' ;; # HTML5 variant
[[:alnum:].~_-]) printf '%s' "$c" ;;
*) printf '%%%02x' "'$c" ;;
esac
done
}
urlencode "http://foo bar/"
- Output:
http%3a%2f%2ffoo%20bar%2f
To produce upper-case hex codes, replace %02x
with %02X
See also: BashFAQ/071
BBC BASIC
PRINT FNurlencode("http://foo bar/")
END
DEF FNurlencode(url$)
LOCAL c%, i%
WHILE i% < LEN(url$)
i% += 1
c% = ASCMID$(url$, i%)
IF c%<&30 OR c%>&7A OR c%>&39 AND c%<&41 OR c%>&5A AND c%<&61 THEN
url$ = LEFT$(url$,i%-1) + "%" + RIGHT$("0"+STR$~c%,2) + MID$(url$,i%+1)
ENDIF
ENDWHILE
= url$
- Output:
http%3A%2F%2Ffoo%20bar%2F
Bracmat
( ( encode
= encoded exceptions octet string
. !arg:(?exceptions.?string)
& :?encoded
& @( !string
: ?
( %@?octet ?
& !encoded
( !octet
: ( ~<0:~>9
| ~<A:~>Z
| ~<a:~>z
)
| @(!exceptions:? !octet ?)
& !octet
| "%" d2x$(asc$!octet)
)
: ?encoded
& ~
)
)
| str$!encoded
)
& out$"without exceptions:
"
& out$(encode$(."http://foo bar/"))
& out$(encode$(."mailto:Ivan"))
& out$(encode$(."Aim <ivan.aim@email.com>"))
& out$(encode$(."mailto:Irma"))
& out$(encode$(."User <irma.user@mail.com>"))
& out$(encode$(."http://foo.bar.com/~user-name/_subdir/*~.html"))
& out$"
with RFC 3986 rules:
"
& out$(encode$("-._~"."http://foo bar/"))
& out$(encode$("-._~"."mailto:Ivan"))
& out$(encode$("-._~"."Aim <ivan.aim@email.com>"))
& out$(encode$("-._~"."mailto:Irma"))
& out$(encode$("-._~"."User <irma.user@mail.com>"))
& out$(encode$("-._~"."http://foo.bar.com/~user-name/_subdir/*~.html"))
);
- Output:
without exceptions: http%3A%2F%2Ffoo%20bar%2F mailto%3AIvan Aim%20%3Civan%2Eaim%40email%2Ecom%3E mailto%3AIrma User%20%3Cirma%2Euser%40mail%2Ecom%3E http%3A%2F%2Ffoo%2Ebar%2Ecom%2F%7Euser%2Dname%2F%5Fsubdir%2F%2A%7E%2Ehtml with RFC 3986 rules: http%3A%2F%2Ffoo%20bar%2F mailto%3AIvan Aim%20%3Civan.aim%40email.com%3E mailto%3AIrma User%20%3Cirma.user%40mail.com%3E http%3A%2F%2Ffoo.bar.com%2F~user-name%2F_subdir%2F%2A~.html
C
#include <stdio.h>
#include <ctype.h>
char rfc3986[256] = {0};
char html5[256] = {0};
/* caller responsible for memory */
void encode(const char *s, char *enc, char *tb)
{
for (; *s; s++) {
if (tb[*s]) sprintf(enc, "%c", tb[*s]);
else sprintf(enc, "%%%02X", *s);
while (*++enc);
}
}
int main()
{
const char url[] = "http://foo bar/";
char enc[(strlen(url) * 3) + 1];
int i;
for (i = 0; i < 256; i++) {
rfc3986[i] = isalnum(i)||i == '~'||i == '-'||i == '.'||i == '_'
? i : 0;
html5[i] = isalnum(i)||i == '*'||i == '-'||i == '.'||i == '_'
? i : (i == ' ') ? '+' : 0;
}
encode(url, enc, rfc3986);
puts(enc);
return 0;
}
C#
using System;
namespace URLEncode
{
internal class Program
{
private static void Main(string[] args)
{
Console.WriteLine(Encode("http://foo bar/"));
}
private static string Encode(string uri)
{
return Uri.EscapeDataString(uri);
}
}
}
- Output:
http%3A%2F%2Ffoo%20bar%2F
C++
using Qt 4.6 as a library
#include <QByteArray>
#include <iostream>
int main( ) {
QByteArray text ( "http://foo bar/" ) ;
QByteArray encoded( text.toPercentEncoding( ) ) ;
std::cout << encoded.data( ) << '\n' ;
return 0 ;
}
- Output:
http%3A%2F%2Ffoo%20bar%2F
Clojure
Using Java's URLEncoder:
(import 'java.net.URLEncoder)
(URLEncoder/encode "http://foo bar/" "UTF-8")
- Output:
"http%3A%2F%2Ffoo+bar%2F"
ColdFusion
Common Lisp
(defun needs-encoding-p (char)
(not (digit-char-p char 36)))
(defun encode-char (char)
(format nil "%~2,'0X" (char-code char)))
(defun url-encode (url)
(apply #'concatenate 'string
(map 'list (lambda (char)
(if (needs-encoding-p char)
(encode-char char)
(string char)))
url)))
(url-encode "http://foo bar/")
- Output:
"http%3A%2F%2Ffoo%20bar%2F"
Crystal
The standard library URI
class provides methods for both the RFC 3986 and HTML 5 standards. The RFC 3986 method defaults to replacing space characters with %20
but will replace them with +
instead if the optional parameter space_to_plus
is true. The HTML 5 method has the opposite default behavior.
require "uri"
puts URI.encode("http://foo bar/")
puts URI.encode("http://foo bar/", space_to_plus: true)
puts URI.encode_www_form("http://foo bar/")
puts URI.encode_www_form("http://foo bar/", space_to_plus: false)
- Output:
http://foo%20bar/ http://foo+bar/ http%3A%2F%2Ffoo+bar%2F http%3A%2F%2Ffoo%20bar%2F
D
import std.stdio, std.uri;
void main() {
writeln(encodeComponent("http://foo bar/"));
}
- Output:
http%3A%2F%2Ffoo%20bar%2F
Delphi
function EncodeURL(URL: string): string;
var I: integer;
begin
Result:='';
for I:=1 to Length(URL) do
if URL[I] in ['0'..'9', 'A'..'Z', 'a'..'z'] then Result:=Result+URL[I]
else Result:=Result+'%'+IntToHex(byte(URL[I]),2);
end;
procedure EncodeAndShowURL(Memo: TMemo; URL: string);
var ES: string;
begin
Memo.Lines.Add('Unencoded URL: '+URL);
ES:=EncodeURL(URL);
Memo.Lines.Add('Encoded URL: '+ES);
Memo.Lines.Add('');
end;
procedure ShowEncodedURLs(Memo: TMemo);
begin
EncodeAndShowURL(Memo,'http://foo bar/');
EncodeAndShowURL(Memo,'https://rosettacode.org/wiki/URL_encoding');
EncodeAndShowURL(Memo,'https://en.wikipedia.org/wiki/Pikes_Peak_granite');
end;
- Output:
Unencoded URL: http://foo bar/ Encoded URL: http%3A%2F%2Ffoo%20bar%2F Unencoded URL: https://rosettacode.org/wiki/URL_encoding Encoded URL: https%3A%2F%2Frosettacode%2Eorg%2Fwiki%2FURL%5Fencoding Unencoded URL: https://en.wikipedia.org/wiki/Pikes_Peak_granite Encoded URL: https%3A%2F%2Fen%2Ewikipedia%2Eorg%2Fwiki%2FPikes%5FPeak%5Fgranite Elapsed Time: 11.734 ms.
EasyLang
func$ tohex h .
for c in [ h div 16 h mod 16 ]
c += 48
if c >= 58 : c += 7
r$ &= strchar c
.
return r$
.
func$ urlenc s$ .
for c$ in strchars s$
c = strcode c$
if c >= 48 and c <= 57 or c >= 65 and c <= 90 or c >= 97 and c <= 122
#
else
c$ = "%" & tohex c
.
r$ &= c$
.
return r$
.
print urlenc "http://foo bar/"
- Output:
http%3A%2F%2Ffoo%20bar%2F
Elixir
iex(1)> URI.encode("http://foo bar/", &URI.char_unreserved?/1)
"http%3A%2F%2Ffoo%20bar%2F"
Erlang
Built in, http_uri:encode/1
accepts lists and binary:
1> http_uri:encode("http://foo bar/"). "http%3A%2F%2Ffoo%20bar%2F"
Unicode
If you are URL encoding unicode data though http_uri:encode/1
will produce incorrect results:
1> http_uri:encode("Γ©tanchΓ©itΓ© d une terrasse"). "Γ©tanchΓ©itΓ©%20d%20une%20terrasse"
You should use the built-in (non-documented) edoc_lib:escape_uri/1
instead:
1> edoc_lib:escape_uri("Γ©tanchΓ©itΓ© d une terrasse"). "%c3%a9tanch%c3%a9it%c3%a9%20d%20une%20terrasse"
And for binary you will need to take care and use unicode:characters_to_{list,binary}/1
:
1> unicode:characters_to_binary(edoc_lib:escape_uri(unicode:characters_to_list(<<"Γ©tanchΓ©itΓ© d une terrasse"/utf8>>))). <<"%c3%a9tanch%c3%a9it%c3%a9%20d%20une%20terrasse">>
F#
open System
[<EntryPoint>]
let main args =
printfn "%s" (Uri.EscapeDataString(args.[0]))
0
- Output:
>URLencoding.exe "http://foo bar/" http%3A%2F%2Ffoo%20bar%2F
Factor
Factor's built-in URL encoder doesn't encode :
or /
. However, we can write our own predicate quotation that tells (url-encode)
what characters to exclude.
USING: combinators.short-circuit unicode urls.encoding.private ;
: my-url-encode ( str -- encoded )
[ { [ alpha? ] [ "-._~" member? ] } 1|| ] (url-encode) ;
"http://foo bar/" my-url-encode print
- Output:
http%3A%2F%2Ffoo%20bar%2F
Free Pascal
function urlEncode(data: string): AnsiString;
var
ch: AnsiChar;
begin
Result := '';
for ch in data do begin
if ((Ord(ch) < 65) or (Ord(ch) > 90)) and ((Ord(ch) < 97) or (Ord(ch) > 122)) then begin
Result := Result + '%' + IntToHex(Ord(ch), 2);
end else
Result := Result + ch;
end;
end;
FreeBASIC
Dim Shared As String lookUp(256)
For cadena As Integer = 0 To 256
lookUp(cadena) = "%" + Hex(cadena)
Next cadena
Function string2url(cadena As String) As String
Dim As String cadTemp, cadDevu
For j As Integer = 1 To Len(cadena)
cadTemp = Mid(cadena, j, 1)
If Instr( "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", cadTemp) Then
cadDevu &= cadTemp
Else
cadDevu &= lookUp(Asc(cadTemp))
End If
Next j
Return cadDevu
End Function
Dim As String URL = "http://foo bar/"
Print "Supplied URL '"; URL; "'"
Print "URL encoding '"; string2url(URL); "'"
Sleep
- Output:
Supplied URL 'http://foo bar/' URL encoding 'http%3A%2F%2Ffoo%20bar%2F'
Frink
Frink has a built-in URLEncode[string, encoding="UTF-8"]
function that correctly encodes strings (including high Unicode characters) for inclusion in a URL:
println[URLEncode["http://foo bar/"]]
- Output:
http%3A%2F%2Ffoo+bar%2F
FutureBasic
In addition to the generic alphanumeric character set used in the demo code below, FB offers several special character sets for URL encoding: fn CharacterSetURLFragmentAllowedSet fn CharacterSetURLHostAllowedSet fn CharacterSetURLPasswordAllowedSet fn CharacterSetURLPathAllowedSet fn CharacterSetURLQueryAllowedSet fn CharacterSetURLUserAllowedSet Users can also create custom character strings with: fn CharacterSetWithCharactersInString( CFStringRef string ) = CFCharacterSetRef
include "NSLog.incl"
local fn PercentEncodeURLString( urlStr as CFStringRef ) as CFStringRef
CFStringRef encodedStr = fn StringByAddingPercentEncodingWithAllowedCharacters( urlStr, fn CharacterSetAlphanumericSet )
end fn = encodedStr
NSLog( @"%@", fn PercentEncodeURLString( @"http://foo bar/" ) )
NSLog( @"%@", fn PercentEncodeURLString( @"http://www.rosettacode.org/wiki/URL_encoding" ) )
HandleEvents
- Output:
http%3A%2F%2Ffoo%20bar%2F http%3A%2F%2Fwww%2Erosettacode%2Eorg%2Fwiki%2FURL%5Fencoding
Go
package main
import (
"fmt"
"net/url"
)
func main() {
fmt.Println(url.QueryEscape("http://foo bar/"))
}
- Output:
http%3A%2F%2Ffoo+bar%2F
Groovy
def normal = "http://foo bar/"
def encoded = URLEncoder.encode(normal, "utf-8")
println encoded
- Output:
http%3A%2F%2Ffoo+bar%2F
Haskell
import qualified Data.Char as Char
import Text.Printf
encode :: Char -> String
encode c
| c == ' ' = "+"
| Char.isAlphaNum c || c `elem` "-._~" = [c]
| otherwise = printf "%%%02X" c
urlEncode :: String -> String
urlEncode = concatMap encode
main :: IO ()
main = putStrLn $ urlEncode "http://foo bar/"
- Output:
http%3A%2F%2Ffoo+bar%2F
Icon and Unicon
- Output:
text = "http://foo bar/" encoded = "http%3A%2F%2Ffoo%20bar%2F"
J
J has a urlencode in the gethttp package, but this task requires that all non-alphanumeric characters be encoded.
Here's an implementation that does that:
require'strings convert'
urlencode=: rplc&((#~2|_1 47 57 64 90 96 122 I.i.@#)a.;"_1'%',.hfd i.#a.)
Example use:
urlencode 'http://foo bar/'
http%3A%2F%2Ffoo%20bar%2F
Java
Java includes the URLEncoder and URLDecoder classes for this specific task.
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
URLEncoder.encode("http://foo bar/", StandardCharsets.UTF_8)
Alternately, you could implement this with a basic for-loop.
String encode(String string) {
StringBuilder encoded = new StringBuilder();
for (char character : string.toCharArray()) {
switch (character) {
/* rfc3986 and html5 */
case '-', '.', '_', '~', '*' -> encoded.append(character);
case ' ' -> encoded.append('+');
default -> {
if (alphanumeric(character))
encoded.append(character);
else {
encoded.append("%");
encoded.append("%02x".formatted((int) character));
}
}
}
}
return encoded.toString();
}
boolean alphanumeric(char character) {
return (character >= 'A' && character <= 'Z')
|| (character >= 'a' && character <= 'z')
|| (character >= '0' && character <= '9');
}
http%3a%2f%2ffoo+bar%2f
JavaScript
Confusingly, there are 3 different URI encoding functions in JavaScript: escape()
, encodeURI()
, and encodeURIComponent()
. Each of them encodes a different set of characters. See this article and this article for more information and comparisons.
var normal = 'http://foo/bar/';
var encoded = encodeURIComponent(normal);
jq
jq has a built-in function, @uri, for "percent-encoding". It preserves the characters that RFC 3968 mandates be preserved, but also preserves the following five characters: !'()*
To address the task requirement, therefore, we can first use @uri and then convert the exceptional characters. (For versions of jq with regex support, this could be done using gsub, but here we perform the translation directly.)
Note that @uri also converts multibyte characters to multi-triplets, e.g.
"Γ‘" | @uri
produces: "%C3%A1"
def url_encode:
# The helper function checks whether the input corresponds to one of the characters: !'()*
def recode: . as $c | [33,39,40,41,42] | index($c);
def hex: if . < 10 then 48 + . else 55 + . end;
@uri
| explode
# 37 ==> "%", 50 ==> "2"
| map( if recode then (37, 50, ((. - 32) | hex)) else . end )
| implode;
Examples:
"http://foo bar/" | @uri
produces: "http%3A%2F%2Ffoo%20bar%2F"
"http://foo bar/" | @uri == url_encode
produces: true
To contrast the difference between "@uri" and "url_encode", we compare the characters that are unaltered:
[range(0;1024) | [.] | implode | if @uri == . then . else empty end] | join(null)
produces: "!'()*-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~"
[range(0;1024) | [.] | implode | if url_encode == . then . else empty end] | join(null)
produces: "-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~"
Julia
#version 1.0.1
import HTTP.URIs: escapeuri
dcd = "http://foo bar/"
enc = escapeuri(dcd)
println(dcd, " => ", enc)
- Output:
http://foo bar/ => http%3A%2F%2Ffoo%20bar%2F
Kotlin
// version 1.1.2
import java.net.URLEncoder
fun main(args: Array<String>) {
val url = "http://foo bar/"
println(URLEncoder.encode(url, "utf-8")) // note: encodes space to + not %20
}
- Output:
http%3A%2F%2Ffoo+bar%2F
Ksh
url_encode()
{
printf "%(url)q\n" "$*"
}
url_encode "http://foo bar/"
url_encode "https://ru.wikipedia.org/wiki/Π’ΡΠ°Π½ΡΠΏΠ°ΠΉΠ»Π΅Ρ"
url_encode "google.com/search?q=`Abdu'l-BahΓ‘"
- Output:
http%3A%2F%2Ffoo%20bar%2F https%3A%2F%2Fru.wikipedia.org%2Fwiki%2F%D0%A2%D1%80%D0%B0%D0%BD%D1%81%D0%BF%D0%B0%D0%B9%D0%BB%D0%B5%D1%80 google.com%2Fsearch%3Fq%3D%60Abdu%27l-Bah%C3%A1
langur
val urlEncode = fn(s) {
replace(
s,
by=re/[^A-Za-z0-9]/,
with=fn r:join(map(s2b(r), by=fn b:"%{{b:X02}}")),
)
}
writeln urlEncode("https://some website.com/")
- Output:
https%3A%2F%2Fsome%20website%2Ecom%2F
Lasso
bytes('http://foo bar/') -> encodeurl
-> http%3A%2F%2Ffoo%20bar%2F
Liberty BASIC
dim lookUp$( 256)
for i =0 to 256
lookUp$( i) ="%" +dechex$( i)
next i
string$ ="http://foo bar/"
print "Supplied string '"; string$; "'"
print "As URL '"; url$( string$); "'"
end
function url$( i$)
for j =1 to len( i$)
c$ =mid$( i$, j, 1)
if instr( "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", c$) then
url$ =url$ +c$
else
url$ =url$ +lookUp$( asc( c$))
end if
next j
end function
Supplied string 'http://foo bar/' As URL 'http%3A%2F%2Ffoo%20bar%2F'
Lingo
Lingo implements old-school URL encoding (with spaces encoded as "+") out of the box:
put urlencode("http://foo bar/")
-- "http%3a%2f%2ffoo+bar%2f"
For RFC 3986 URL encoding (i.e. without the "+" fuss) a custom function is needed - which might call the above function and then just replace all "+" with "%20".
LiveCode
urlEncode("http://foo bar/")
-- http%3A%2F%2Ffoo+bar%2F
Lua
function encodeChar(chr)
return string.format("%%%X",string.byte(chr))
end
function encodeString(str)
local output, t = string.gsub(str,"[^%w]",encodeChar)
return output
end
-- will print "http%3A%2F%2Ffoo%20bar%2F"
print(encodeString("http://foo bar/"))
M2000 Interpreter
Module Checkit {
Function decodeUrl$(a$) {
DIM a$()
a$()=Piece$(a$, "%")
if len(a$())=1 then =str$(a$):exit
k=each(a$(),2)
acc$=str$(a$(0))
While k {
acc$+=str$(Chr$(Eval("0x"+left$(a$(k^),2)))+Mid$(a$(k^),3))
}
=string$(acc$ as utf8dec)
}
Group Parse$ {
all$, c=1
tc$=""
Enum UrlType {None=0, RFC3986, HTML5}
variation
TypeData=("","-._~","-._*")
Function Next {
.tc$<=mid$(.all$,.c,1)
.c++
=.tc$<>""
}
Value {
=.tc$
}
Function DecodeOne$ {
if .tc$="" then exit
if .tc$ ~"[A-Za-z0-9]" then =.tc$ : exit
If .tc$=" " Then =if$(.variation=.HTML5->"+","%20") :exit
if instr(.TypeData#val$(.variation),.tc$)>0 then =.tc$ :exit
="%"+hex$(asc(.tc$), 1)
}
Function Decode$ {
acc$=""
.c<=1
While .Next() {
acc$+=.DecodeOne$()
}
=acc$
}
Set () {
\\ using optional argument
var=.None
Read a$, ? var
a$=chr$(string$(a$ as utf8enc))
.variation<=var
.all$<=a$
.c<=1
}
}
\\ MAIN
Parse$()="http://foo bar/"
Print Quote$(Parse.Decode$())
Parse.variation=Parse.HTML5
Print Quote$(Parse.Decode$())
Parse.variation=Parse.RFC3986
Print Quote$(Parse.Decode$())
Parse$(Parse.RFC3986) ={mailto:"Irma User" <irma.user@mail.com>}
Print Quote$(Parse.Decode$())
Parse$(Parse.RFC3986) ={http://foo.bar.com/~user-name/_subdir/*~.html}
m=each(Parse.UrlType)
while m {
Parse.variation=eval(m)
Print Quote$(Parse.Decode$())
Print decodeUrl$(Parse.Decode$())
}
}
CheckIt
"http%3A%2F%2Ffoo%20bar%2F" "http%3A%2F%2Ffoo+bar%2F" "mailto%3A%22Irma%20User%22%20%3Cirma.user%40mail.com%3E" "http%3A%2F%2Ffoo%2Ebar%2Ecom%2F%7Euser%2Dname%2F%5Fsubdir%2F%2A%7E%2Ehtml" "http%3A%2F%2Ffoo.bar.com%2F~user-name%2F_subdir%2F%2A~.html" "http%3A%2F%2Ffoo.bar.com%2F%7Euser-name%2F_subdir%2F*%7E.html"
Maple
URL:-Escape("http://foo bar/");
- Output:
"http%3A%2F%2Ffoo%20bar%2F"
Mathematica /Wolfram Language
URLEncoding[url_] :=
StringReplace[url,
x : Except[
Join[CharacterRange["0", "9"], CharacterRange["a", "z"],
CharacterRange["A", "Z"]]] :>
StringJoin[("%" ~~ #) & /@
IntegerString[ToCharacterCode[x, "UTF8"], 16]]]
Example use:
URLEncoding["http://foo bar/"]
- Output:
http%3a%2f%2ffoo%20bar%2f
MATLAB / Octave
function u = urlencoding(s)
u = '';
for k = 1:length(s),
if isalnum(s(k))
u(end+1) = s(k);
else
u=[u,'%',dec2hex(s(k)+0)];
end;
end
end
Usage:
octave:3> urlencoding('http://foo bar/') ans = http%3A%2F%2Ffoo%20bar%2F
NetRexx
/* NetRexx */
options replace format comments java crossref symbols nobinary
/* -------------------------------------------------------------------------- */
testcase()
say
say 'RFC3986'
testcase('RFC3986')
say
say 'HTML5'
testcase('HTML5')
say
return
/* -------------------------------------------------------------------------- */
method encode(url, varn) public static
variation = varn.upper
opts = ''
opts['RFC3986'] = '-._~'
opts['HTML5'] = '-._*'
rp = ''
loop while url.length > 0
parse url tc +1 url
select
when tc.datatype('A') then do
rp = rp || tc
end
when tc == ' ' then do
if variation = 'HTML5' then
rp = rp || '+'
else
rp = rp || '%' || tc.c2x
end
otherwise do
if opts[variation].pos(tc) > 0 then do
rp = rp || tc
end
else do
rp = rp || '%' || tc.c2x
end
end
end
end
return rp
/* -------------------------------------------------------------------------- */
method testcase(variation = '') public static
url = [ -
'http://foo bar/' -
, 'mailto:"Ivan Aim" <ivan.aim@email.com>' -
, 'mailto:"Irma User" <irma.user@mail.com>' -
, 'http://foo.bar.com/~user-name/_subdir/*~.html' -
]
loop i_ = 0 to url.length - 1
say url[i_]
say encode(url[i_], variation)
end i_
return
- Output:
http://foo bar/ http%3A%2F%2Ffoo%20bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan%20Aim%22%20%3Civan%2Eaim%40email%2Ecom%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma%20User%22%20%3Cirma%2Euser%40mail%2Ecom%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo%2Ebar%2Ecom%2F%7Euser%2Dname%2F%5Fsubdir%2F%2A%7E%2Ehtml RFC3986 http://foo bar/ http%3A%2F%2Ffoo%20bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan%20Aim%22%20%3Civan.aim%40email.com%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma%20User%22%20%3Cirma.user%40mail.com%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo.bar.com%2F~user-name%2F_subdir%2F%2A~.html HTML5 http://foo bar/ http%3A%2F%2Ffoo+bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan+Aim%22+%3Civan.aim%40email.com%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma+User%22+%3Cirma.user%40mail.com%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo.bar.com%2F%7Euser-name%2F_subdir%2F*%7E.html
NewLISP
;; simple encoder
;; (source http://www.newlisp.org/index.cgi?page=Code_Snippets)
(define (url-encode str)
(replace {([^a-zA-Z0-9])} str (format "%%%2X" (char $1)) 0))
(url-encode "http://foo bar/")
Nim
import cgi
echo encodeUrl("http://foo/bar/")
- Output:
http%3A%2F%2Ffoo%2Fbar%2F
Oberon-2
MODULE URLEncoding;
IMPORT
Out := NPCT:Console,
ADT:StringBuffer,
URI := URI:String;
VAR
encodedUrl: StringBuffer.StringBuffer;
BEGIN
encodedUrl := NEW(StringBuffer.StringBuffer,512);
URI.AppendEscaped("http://foo bar/","",encodedUrl);
Out.String(encodedUrl.ToString());Out.Ln
END URLEncoding.
- Output:
http%3A%2F%2Ffoo%20bar%2F
Objeck
use FastCgi;
bundle Default {
class UrlEncode {
function : Main(args : String[]) ~ Nil {
url := "http://foo bar/";
UrlUtility->Encode(url)->PrintLine();
}
}
}
Objective-C
NSString *normal = @"http://foo bar/";
NSString *encoded = [normal stringByAddingPercentEscapesUsingEncoding:NSUTF8StringEncoding];
NSLog(@"%@", encoded);
The Core Foundation function CFURLCreateStringByAddingPercentEscapes()
provides more options.
NSString *normal = @"http://foo bar/";
NSString *encoded = [normal stringByAddingPercentEncodingWithAllowedCharacters:[NSCharacterSet alphanumericCharacterSet]];
NSLog(@"%@", encoded);
For encoding for various parts of the URL, the allowed character sets [NSCharacterSet URLUserAllowedCharacterSet]
, [NSCharacterSet URLPasswordAllowedCharacterSet]
, [NSCharacterSet URLHostAllowedCharacterSet]
, [NSCharacterSet URLPathAllowedCharacterSet]
, [NSCharacterSet URLQueryAllowedCharacterSet]
, or [NSCharacterSet URLFragmentAllowedCharacterSet]
are provided.
OCaml
Using the library ocamlnet from the interactive loop:
$ ocaml
# #use "topfind";;
# #require "netstring";;
# Netencoding.Url.encode "http://foo bar/" ;;
- : string = "http%3A%2F%2Ffoo+bar%2F"
ooRexx
The solution shown at Rexx (version 1) is a valid ooRexx program.
version 2 uses constructs not supported by ooRexx:
url.=; and $ as variable symbol
PascalABC.NET
##
function URLEncode(s: string) := System.Uri.EscapeDataString(s);
Println(URLEncode('http://foo bar/'));
- Output:
http%3A%2F%2Ffoo%20bar%2F
Perl
sub urlencode {
my $s = shift;
$s =~ s/([^-A-Za-z0-9_.!~*'() ])/sprintf("%%%02X", ord($1))/eg;
$s =~ tr/ /+/;
return $s;
}
print urlencode('http://foo bar/')."\n";
- Output:
http%3A%2F%2Ffoo+bar%2F
use URI::Escape;
my $s = 'http://foo/bar/';
print uri_escape($s);
Use standard CGI module:
use 5.10.0;
use CGI;
my $s = 'http://foo/bar/';
say $s = CGI::escape($s);
say $s = CGI::unescape($s);
Phix
-- -- demo\rosetta\encode_url.exw -- =========================== -- with javascript_semantics function nib(integer b) return b+iff(b<=9?'0':'A'-10) end function function encode_url(string s, string exclusions="", integer spaceplus=0) string res = "" for i=1 to length(s) do integer ch = s[i] if ch=' ' and spaceplus then ch = '+' elsif not find(ch,exclusions) and (ch<'0' or (ch>'9' and ch<'A') or (ch>'Z' and ch<'a') or ch>'z') then res &= '%' res &= nib(floor(ch/#10)) ch = nib(and_bits(ch,#0F)) end if res &= ch end for return res end function printf(1,"%s\n",{encode_url("http://foo bar/")}) {} = wait_key()
- Output:
http%3A%2F%2Ffoo%20bar%2F
PHP
<?php
$s = 'http://foo/bar/';
$s = rawurlencode($s);
?>
There is also urlencode()
, which also encodes spaces as "+" signs
PicoLisp
(de urlEncodeTooMuch (Str)
(pack
(mapcar
'((C)
(if (or (>= "9" C "0") (>= "Z" (uppc C) "A"))
C
(list '% (hex (char C))) ) )
(chop Str) ) ) )
Test:
: (urlEncodeTooMuch "http://foo bar/") -> "http%3A%2F%2Ffoo%20bar%2F"
Pike
Protocols.HTTP.uri_encode( "http://foo bar/" );
- Output:
http%3A%2F%2Ffoo%20bar%2F
Powershell
[uri]::EscapeDataString('http://foo bar/')
http%3A%2F%2Ffoo%20bar%2F
PureBasic
URL$ = URLEncoder("http://foo bar/")
Python
import urllib
s = 'http://foo/bar/'
s = urllib.quote(s)
There is also urllib.quote_plus()
, which also encodes spaces as "+" signs
R
R has a built-in
URLencode("http://foo bar/")
function, but it doesn't fully follow RFC guidelines, so we have to use another R package to accomplish the task:
library(RCurl)
curlEscape("http://foo bar/")
Racket
#lang racket
(require net/uri-codec)
(uri-encode "http://foo bar/")
Raku
(formerly Perl 6)
my $url = 'http://foo bar/';
say $url.subst(/<-alnum>/, *.ord.fmt("%%%02X"), :g);
- Output:
http%3A%2F%2Ffoo%20bar%2F
REALbasic
Using the built-in encoding method, which doesn't permit exceptions:
Dim URL As String = "http://foo bar/"
URL = EncodeURLComponent(URL)
Print(URL)
With optional exceptions. A "ParamArray" is an array of zero or more additional arguments passed by the caller:
Function URLEncode(Data As String, ParamArray Exceptions() As String) As String
Dim buf As String
For i As Integer = 1 To Data.Len
Dim char As String = Data.Mid(i, 1)
Select Case Asc(char)
Case 48 To 57, 65 To 90, 97 To 122, 45, 46, 95
buf = buf + char
Else
If Exceptions.IndexOf(char) > -1 Then
buf = buf + char
Else
buf = buf + "%" + Left(Hex(Asc(char)) + "00", 2)
End If
End Select
Next
Return buf
End Function
'usage
Dim s As String = URLEncode("http://foo bar/") ' no exceptions
Dim t As String = URLEncode("http://foo bar/", "!", "?", ",") ' with exceptions
REXX
version 1
/* Rexx */
do
call testcase
say
say RFC3986
call testcase RFC3986
say
say HTML5
call testcase HTML5
say
return
end
exit
/* -------------------------------------------------------------------------- */
encode:
procedure
do
parse arg url, varn .
parse upper var varn variation
drop RFC3986 HTML5
opts. = ''
opts.RFC3986 = '-._~'
opts.HTML5 = '-._*'
rp = ''
do while length(url) > 0
parse var url tc +1 url
select
when datatype(tc, 'A') then do
rp = rp || tc
end
when tc == ' ' then do
if variation = HTML5 then
rp = rp || '+'
else
rp = rp || '%' || c2x(tc)
end
otherwise do
if pos(tc, opts.variation) > 0 then do
rp = rp || tc
end
else do
rp = rp || '%' || c2x(tc)
end
end
end
end
return rp
end
exit
/* -------------------------------------------------------------------------- */
testcase:
procedure
do
parse arg variation
X = 0
url. = ''
X = X + 1; url.0 = X; url.X = 'http://foo bar/'
X = X + 1; url.0 = X; url.X = 'mailto:"Ivan Aim" <ivan.aim@email.com>'
X = X + 1; url.0 = X; url.X = 'mailto:"Irma User" <irma.user@mail.com>'
X = X + 1; url.0 = X; url.X = 'http://foo.bar.com/~user-name/_subdir/*~.html'
do i_ = 1 to url.0
say url.i_
say encode(url.i_, variation)
end i_
return
end
- Output:
http://foo bar/ http%3A%2F%2Ffoo%20bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan%20Aim%22%20%3Civan%2Eaim%40email%2Ecom%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma%20User%22%20%3Cirma%2Euser%40mail%2Ecom%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo%2Ebar%2Ecom%2F%7Euser%2Dname%2F%5Fsubdir%2F%2A%7E%2Ehtml RFC3986 http://foo bar/ http%3A%2F%2Ffoo%20bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan%20Aim%22%20%3Civan.aim%40email.com%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma%20User%22%20%3Cirma.user%40mail.com%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo.bar.com%2F~user-name%2F_subdir%2F%2A~.html HTML5 http://foo bar/ http%3A%2F%2Ffoo+bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan+Aim%22+%3Civan.aim%40email.com%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma+User%22+%3Cirma.user%40mail.com%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo.bar.com%2F%7Euser-name%2F_subdir%2F*%7E.html
version 2
/*REXX program encodes a URL text, blanks βββΊ +, preserves -._* and -._~ */
url.=; url.1= 'http://foo bar/'
url.2= 'mailto:"Ivan Aim" <ivan.aim@email.com>'
url.3= 'mailto:"Irma User" <irma.user@mail.com>'
url.4= 'http://foo.bar.com/~user-name/_subdir/*~.html'
do j=1 while url.j\==''; say
say ' original: ' url.j
say ' encoded: ' URLencode(url.j)
end /*j*/
exit /*stick a fork in it, we're all done. */
/*ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ*/
URLencode: procedure; parse arg $,,z; t1= '-._~' /*get args, null Z.*/
skip=0; t2= '-._*'
do k=1 for length($); _=substr($, k, 1) /*get a character. */
if skip\==0 then do; skip=skip-1 /*skip t1 or t2 ? */
iterate /*skip a character.*/
end
select
when datatype(_, 'A') then z=z || _ /*is alphanumeric ?*/
when _==' ' then z=z'+' /*is it a blank ?*/
when substr($, k, 4)==t1 |, /*is it t1 or t2 ?*/
substr($, k, 4)==t2 then do; skip=3 /*skip 3 characters*/
z=z || substr($, k, 4)
end
otherwise z=z'%'c2x(_) /*special character*/
end /*select*/
end /*k*/
return z
output when using the default input:
original: http://foo bar/ encoded: http%3A%2F%2Ffoo+bar%2F original: mailto:"Ivan Aim" <ivan.aim@email.com> encoded: mailto%3A%22Ivan+Aim%22+%3Civan%2Eaim%40email%2Ecom%3E original: mailto:"Irma User" <irma.user@mail.com> encoded: mailto%3A%22Irma+User%22+%3Cirma%2Euser%40mail%2Ecom%3E original: http://foo.bar.com/~user-name/_subdir/*~.html encoded: http%3A%2F%2Ffoo%2Ebar%2Ecom%2F%7Euser%2Dname%2F%5Fsubdir%2F%2A%7E%2Ehtml
Ruby
CGI.escape
encodes all characters except '-.0-9A-Z_a-z'.
require 'cgi'
puts CGI.escape("http://foo bar/").gsub("+", "%20")
# => "http%3A%2F%2Ffoo%20bar%2F"
Programs should not call URI.escape
(alias URI.encode
), because it fails to encode some characters. URI.escape
is obsolete since Ruby 1.9.2.
URI.encode_www_form_component
is a new method from Ruby 1.9.2. It obeys HTML 5 and encodes all characters except '-.0-9A-Z_a-z' and '*'.
require 'uri'
puts URI.encode_www_form_component("http://foo bar/").gsub("+", "%20")
# => "http%3A%2F%2Ffoo%20bar%2F"
Run BASIC
urlIn$ = "http://foo bar/"
for i = 1 to len(urlIn$)
a$ = mid$(urlIn$,i,1)
if (a$ >= "0" and a$ <= "9") _
or (a$ >= "A" and a$ <= "Z") _
or (a$ >= "a" and a$ <= "z") then url$ = url$ + a$ else url$ = url$ + "%"+dechex$(asc(a$))
next i
print urlIn$;" -> ";url$
http://foo bar/ -> http%3A%2F%2Ffoo%20bar%2F
Rust
const INPUT: &str = "http://foo bar/";
const MAX_CHAR_VAL: u32 = std::char::MAX as u32;
fn main() {
let mut buff = [0; 4];
println!("{}", INPUT.chars()
.map(|ch| {
match ch as u32 {
0 ..= 47 | 58 ..= 64 | 91 ..= 96 | 123 ..= MAX_CHAR_VAL => {
ch.encode_utf8(&mut buff);
buff[0..ch.len_utf8()].iter().map(|&byte| format!("%{:X}", byte)).collect::<String>()
},
_ => ch.to_string(),
}
})
.collect::<String>()
);
}
- Output:
http%3A%2F%2Ffoo%20bar%2F
Scala
import java.net.{URLDecoder, URLEncoder}
import scala.compat.Platform.currentTime
object UrlCoded extends App {
val original = """http://foo bar/"""
val encoded: String = URLEncoder.encode(original, "UTF-8")
assert(encoded == "http%3A%2F%2Ffoo+bar%2F", s"Original: $original not properly encoded: $encoded")
val percentEncoding = encoded.replace("+", "%20")
assert(percentEncoding == "http%3A%2F%2Ffoo%20bar%2F", s"Original: $original not properly percent-encoded: $percentEncoding")
assert(URLDecoder.decode(encoded, "UTF-8") == URLDecoder.decode(percentEncoding, "UTF-8"))
println(s"Successfully completed without errors. [total ${currentTime - executionStart} ms]")
}
Seed7
The library encoding.s7i defines functions to handle URL respectively percent encoding. The function toPercentEncoded encodes every character except 0-9, A-Z, a-z and the characters '-', '.', '_', '~'. The function toUrlEncoded works like toPercentEncoded and additionally encodes a space with '+'. Both functions work for byte sequences (characters beyond '\255\' raise the exception RANGE_ERROR). To encode Unicode characters it is necessary to convert them to UTF-8 with toUtf8 before.
$ include "seed7_05.s7i";
include "encoding.s7i";
const proc: main is func
begin
writeln(toPercentEncoded("http://foo bar/"));
writeln(toUrlEncoded("http://foo bar/"));
end func;
- Output:
http%3A%2F%2Ffoo%20bar%2F http%3A%2F%2Ffoo+bar%2F
Sidef
func urlencode(str) {
str.gsub!(%r"([^-A-Za-z0-9_.!~*'() ])", {|a| "%%%02X" % a.ord});
str.gsub!(' ', '+');
return str;
}
say urlencode('http://foo bar/');
- Output:
http%3A%2F%2Ffoo+bar%2F
Standard ML
fun urlEncode str =
let
fun charToHex c = "%" ^ (Int.fmt StringCvt.HEX (ord c))
fun escapeChar c = if Char.isAlphaNum c then Char.toString c else charToHex c
in
String.concat (map escapeChar (explode str))
end
- Output:
> urlEncode "http://foo bar/"; val it = "http%3A%2F%2Ffoo%20bar%2F": string
Tcl
# Encode all except "unreserved" characters; use UTF-8 for extended chars.
# See http://tools.ietf.org/html/rfc3986 Β§2.4 and Β§2.5
proc urlEncode {str} {
set uStr [encoding convertto utf-8 $str]
set chRE {[^-A-Za-z0-9._~\n]}; # Newline is special case!
set replacement {%[format "%02X" [scan "\\\0" "%c"]]}
return [string map {"\n" "%0A"} [subst [regsub -all $chRE $uStr $replacement]]]
}
Demonstrating:
puts [urlEncode "http://foo bar/"]
- Output:
http%3A%2F%2Ffoo%20bar%2F%E2%82%AC
TUSCRIPT
$$ MODE TUSCRIPT
text="http://foo bar/"
BUILD S_TABLE spez_char="::>/:</::<%:"
spez_char=STRINGS (text,spez_char)
LOOP/CLEAR c=spez_char
c=ENCODE(c,hex),c=concat("%",c),spez_char=APPEND(spez_char,c)
ENDLOOP
url_encoded=SUBSTITUTE(text,spez_char,0,0,spez_char)
print "text: ", text
PRINT "encoded: ", url_encoded
- Output:
text: http://foo bar/ encoded: http%3A%2F%2Ffoo%20bar%2F
UNIX Shell
unfortunately ksh does not support "'$c"
syntax
function urlencode
{
typeset decoded=$1 encoded= rest= c=
typeset rest2= bug='rest2=${rest}'
if [[ -z ${BASH_VERSION} ]]; then
# bug /usr/bin/sh HP-UX 11.00
typeset _decoded='xyz%26xyz'
rest="${_decoded#?}"
c="${_decoded%%${rest}}"
if (( ${#c} != 1 )); then
typeset qm='????????????????????????????????????????????????????????????????????????'
typeset bug='(( ${#rest} > 0 )) && typeset -L${#rest} rest2="${qm}" || rest2=${rest}'
fi
fi
rest="${decoded#?}"
eval ${bug}
c="${decoded%%${rest2}}"
decoded="${rest}"
while [[ -n ${c} ]]; do
case ${c} in
[-a-zA-z0-9.])
;;
' ')
c='+'
;;
*)
c=$(printf "%%%02X" "'$c")
;;
esac
encoded="${encoded}${c}"
rest="${decoded#?}"
eval ${bug}
c="${decoded%%${rest2}}"
decoded="${rest}"
done
if [[ -n ${BASH_VERSION:-} ]]; then
\echo -E "${encoded}"
else
print -r -- "${encoded}"
fi
}
VBScript
Function UrlEncode(url)
For i = 1 To Len(url)
n = Asc(Mid(url,i,1))
If (n >= 48 And n <= 57) Or (n >= 65 And n <= 90) _
Or (n >= 97 And n <= 122) Then
UrlEncode = UrlEncode & Mid(url,i,1)
Else
ChrHex = Hex(Asc(Mid(url,i,1)))
For j = 0 to (Len(ChrHex) / 2) - 1
UrlEncode = UrlEncode & "%" & Mid(ChrHex,(2*j) + 1,2)
Next
End If
Next
End Function
WScript.Echo UrlEncode("http://foo barΓ©/")
- Output:
http%3A%2F%2Ffoo%20bar%C3%A9%2F
V (Vlang)
import net.urllib
fn main() {
println(urllib.query_escape("http://foo bar/"))
}
- Output:
http%3A%2F%2Ffoo+bar%2F
Wren
import "./fmt" for Fmt
var urlEncode = Fn.new { |url|
var res = ""
for (b in url.bytes) {
if ((b >= 48 && b <= 57) || (b >= 65 && b <= 90) || (b >= 97 && b <= 122)) {
res = res + String.fromByte(b)
} else {
res = res + Fmt.swrite("\%$2X", b)
}
}
return res
}
var urls = [
"http://foo bar/",
"mailto:\"Ivan Aim\" <ivan.aim@email.com>",
"mailto:\"Irma User\" <irma.user@mail.com>",
"http://foo.bar.com/~user-name/_subdir/*~.html"
]
for (url in urls) System.print(urlEncode.call(url))
- Output:
http%3A%2F%2Ffoo%20bar%2F mailto%3A%22Ivan%20Aim%22%20%3Civan%2Eaim%40email%2Ecom%3E mailto%3A%22Irma%20User%22%20%3Cirma%2Euser%40mail%2Ecom%3E http%3A%2F%2Ffoo%2Ebar%2Ecom%2F%7Euser%2Dname%2F%5Fsubdir%2F%2A%7E%2Ehtml
XPL0
code Text=12;
string 0; \use zero-terminated strings
func Encode(S0); \Encode URL string and return its address
char S0;
char HD, S1(80); \BEWARE: very temporary string space returned
int C, I, J;
[HD:= "0123456789ABCDEF"; \hex digits
I:= 0; J:= 0;
repeat C:= S0(I); I:= I+1;
if C>=^0 & C<=^9 ! C>=^A & C<=^Z ! C>=^a & C<=^z ! C=0
then [S1(J):= C; J:= J+1] \simply pass char to S1
else [S1(J):= ^%; J:= J+1; \encode char into S1
S1(J):= HD(C>>4); J:= J+1;
S1(J):= HD(C&$0F); J:= J+1;
];
until C=0;
return S1;
];
Text(0, Encode("http://foo bar/"))
- Output:
http%3A%2F%2Ffoo%20bar%2F
Yabasic
sub encode_url$(s$, exclusions$, spaceplus)
local res$, i, ch$
for i=1 to len(s$)
ch$ = mid$(s$, i, 1)
if ch$ = " " and spaceplus then
ch$ = "+"
elsif not instr(esclusions$, ch$) and (ch$ < "0" or (ch$ > "9" and ch$ < "A") or (ch$ > "Z" and ch$ < "a") or ch$ > "z") then
res$ = res$ + "%"
ch$ = upper$(hex$(asc(ch$)))
end if
res$ = res$ + ch$
next i
return res$
end sub
print encode_url$("http://foo bar/")
zkl
Using lib cURL:
var CURL=Import("zklCurl");
CURL.urlEncode("http://foo bar/") //--> "http%3A%2F%2Ffoo%20bar%2F"
- Programming Tasks
- Solutions by Programming Task
- String manipulation
- Encodings
- 11l
- Action!
- Ada
- AWS
- ALGOL 68
- Apex
- AppleScript
- AppleScript Toolbox
- Applesoft BASIC
- Arturo
- AutoHotkey
- AWK
- Bash
- BBC BASIC
- Bracmat
- C
- C sharp
- C++
- Clojure
- ColdFusion
- Common Lisp
- Crystal
- D
- Delphi
- SysUtils,StdCtrls
- EasyLang
- Elixir
- Erlang
- F Sharp
- Factor
- Free Pascal
- FreeBASIC
- Frink
- FutureBasic
- Go
- Groovy
- Haskell
- Icon
- Unicon
- Icon Programming Library
- J
- Java
- JavaScript
- Jq
- Julia
- Kotlin
- Ksh
- Langur
- Lasso
- Liberty BASIC
- Lingo
- LiveCode
- Lua
- M2000 Interpreter
- Maple
- Mathematica
- Wolfram Language
- MATLAB
- Octave
- NetRexx
- NewLISP
- Nim
- Oberon-2
- Objeck
- Objective-C
- OCaml
- OoRexx
- PascalABC.NET
- Perl
- Phix
- PHP
- PicoLisp
- Pike
- Powershell
- PureBasic
- Python
- R
- Racket
- Raku
- REALbasic
- REXX
- Ruby
- Run BASIC
- Rust
- Scala
- Seed7
- Sidef
- Standard ML
- Tcl
- TUSCRIPT
- UNIX Shell
- VBScript
- V (Vlang)
- Wren
- Wren-fmt
- XPL0
- Yabasic
- Zkl