URL encoding
You are encouraged to solve this task according to the task description, using any language you may know.
The task is to provide a function or mechanism to convert a provided string into URL encoding representation.
In URL encoding, special characters, control characters and extended characters are converted into a percent symbol followed by a two digit hexadecimal code, So a space character encodes into %20 within the string.
For the purposes of this task, every character except 0-9, A-Z and a-z requires conversion, so the following characters all require conversion by default:
- ASCII control codes (Character ranges 00-1F hex (0-31 decimal) and 7F (127 decimal).
- ASCII symbols (Character ranges 32-47 decimal (20-2F hex))
- ASCII symbols (Character ranges 58-64 decimal (3A-40 hex))
- ASCII symbols (Character ranges 91-96 decimal (5B-60 hex))
- ASCII symbols (Character ranges 123-126 decimal (7B-7E hex))
- Extended characters with character codes of 128 decimal (80 hex) and above.
The string "http://foo bar/
" would be encoded as "http%3A%2F%2Ffoo%20bar%2F
- Lowercase escapes are legal, as in "
". - Some standards give different rules: RFC 3986, Uniform Resource Identifier (URI): Generic Syntax, section 2.3, says that "-._~" should not be encoded. HTML 5, section URL-encoded form data, says to preserve "-._*", and to encode space " " to "+". The options below provide for utilization of an exception string, enabling preservation (non encoding) of particular characters to meet specific standards.
It is permissible to use an exception string (containing a set of symbols that do not need to be converted). However, this is an optional feature and is not a requirement of this task.
- See also
<lang Ada>with AWS.URL; with Ada.Text_IO; use Ada.Text_IO; procedure Encode is
Normal : constant String := "http://foo bar/";
Put_Line (AWS.URL.Encode (Normal));
end Encode;</lang>
- Output:
<lang AutoHotkey>MsgBox, % UriEncode("http://foo bar/")
- Modified from http://goo.gl/0a0iJq
UriEncode(Uri) { VarSetCapacity(Var, StrPut(Uri, "UTF-8"), 0) StrPut(Uri, &Var, "UTF-8") f := A_FormatInteger SetFormat, IntegerFast, H While Code := NumGet(Var, A_Index - 1, "UChar") If (Code >= 0x30 && Code <= 0x39 ; 0-9 || Code >= 0x41 && Code <= 0x5A ; A-Z || Code >= 0x61 && Code <= 0x7A) ; a-z Res .= Chr(Code) Else Res .= "%" . SubStr(Code + 0x100, -1) SetFormat, IntegerFast, %f% Return, Res }</lang>
<lang AppleScript>AST URL encode "http://foo bar/"</lang>
- Output:
<lang awk>BEGIN { for (i = 0; i <= 255; i++) ord[sprintf("%c", i)] = i }
- Encode string with application/x-www-form-urlencoded escapes.
function escape(str, c, len, res) { len = length(str) res = "" for (i = 1; i <= len; i++) { c = substr(str, i, 1); if (c ~ /[0-9A-Za-z]/) #if (c ~ /[-._*0-9A-Za-z]/) res = res c #else if (c == " ") # res = res "+" else res = res "%" sprintf("%02X", ord[c]) } return res }
- Escape every line of input.
{ print escape($0) }</lang>
The array ord[]
uses idea from Character codes#AWK.
To follow the rules for HTML 5, uncomment the two lines that convert " " to "+", and use the regular expression that preserves "-._*".
<lang bbcbasic> PRINT FNurlencode("http://foo bar/")
END DEF FNurlencode(url$) LOCAL c%, i% WHILE i% < LEN(url$) i% += 1 c% = ASCMID$(url$, i%) IF c%<&30 OR c%>&7A OR c%>&39 AND c%<&41 OR c%>&5A AND c%<&61 THEN url$ = LEFT$(url$,i%-1) + "%" + RIGHT$("0"+STR$~c%,2) + MID$(url$,i%+1) ENDIF ENDWHILE = url$</lang>
- Output:
<lang bracmat>( ( encode
= encoded exceptions octet string . !arg:(?exceptions.?string) & :?encoded & @( !string : ? ( %@?octet ? & !encoded ( !octet : ( ~<0:~>9 | ~<A:~>Z | ~<a:~>z ) | @(!exceptions:? !octet ?) & !octet | "%" d2x$(asc$!octet) ) : ?encoded & ~ ) ) | str$!encoded ) & out$"without exceptions:
& out$(encode$(."http://foo bar/")) & out$(encode$(."mailto:Ivan")) & out$(encode$(."Aim <ivan.aim@email.com>")) & out$(encode$(."mailto:Irma")) & out$(encode$(."User <irma.user@mail.com>")) & out$(encode$(."http://foo.bar.com/~user-name/_subdir/*~.html")) & out$"
with RFC 3986 rules: "
& out$(encode$("-._~"."http://foo bar/")) & out$(encode$("-._~"."mailto:Ivan")) & out$(encode$("-._~"."Aim <ivan.aim@email.com>")) & out$(encode$("-._~"."mailto:Irma")) & out$(encode$("-._~"."User <irma.user@mail.com>")) & out$(encode$("-._~"."http://foo.bar.com/~user-name/_subdir/*~.html"))
); </lang>
- Output:
without exceptions: http%3A%2F%2Ffoo%20bar%2F mailto%3AIvan Aim%20%3Civan%2Eaim%40email%2Ecom%3E mailto%3AIrma User%20%3Cirma%2Euser%40mail%2Ecom%3E http%3A%2F%2Ffoo%2Ebar%2Ecom%2F%7Euser%2Dname%2F%5Fsubdir%2F%2A%7E%2Ehtml with RFC 3986 rules: http%3A%2F%2Ffoo%20bar%2F mailto%3AIvan Aim%20%3Civan.aim%40email.com%3E mailto%3AIrma User%20%3Cirma.user%40mail.com%3E http%3A%2F%2Ffoo.bar.com%2F~user-name%2F_subdir%2F%2A~.html
<lang c>#include <stdio.h>
- include <ctype.h>
char rfc3986[256] = {0}; char html5[256] = {0};
/* caller responsible for memory */ void encode(const char *s, char *enc, char *tb) { for (; *s; s++) { if (tb[*s]) sprintf(enc, "%c", tb[*s]); else sprintf(enc, "%%%02X", *s); while (*++enc); } }
int main() { const char url[] = "http://foo bar/"; char enc[(strlen(url) * 3) + 1];
int i; for (i = 0; i < 256; i++) { rfc3986[i] = isalnum(i)||i == '~'||i == '-'||i == '.'||i == '_' ? i : 0; html5[i] = isalnum(i)||i == '*'||i == '-'||i == '.'||i == '_' ? i : (i == ' ') ? '+' : 0; }
encode(url, enc, rfc3986); puts(enc);
return 0; }</lang>
using Qt 4.6 as a library <lang cpp>#include <QByteArray>
- include <iostream>
int main( ) {
QByteArray text ( "http://foo bar/" ) ; QByteArray encoded( text.toPercentEncoding( ) ) ; std::cout << encoded.data( ) << '\n' ; return 0 ;
- Output:
<lang c sharp>using System;
namespace URLEncode {
internal class Program { private static void Main(string[] args) { Console.WriteLine(Encode("http://foo bar/")); }
private static string Encode(string uri) { return Uri.EscapeDataString(uri); } }
- Output:
Using Java's URLEncoder: <lang clojure>(import 'java.net.URLEncoder) (URLEncoder/encode "http://foo bar/" "UTF-8")</lang>
- Output:
<lang d>import std.stdio, std.uri;
void main() {
writeln(encodeComponent("http://foo bar/"));
<lang elixir>iex(1)> URI.encode("http://foo bar/", &(URI.char_unreserved?/1)) "http%3A%2F%2Ffoo%20bar%2F"</lang>
Built in
33> http_uri:encode("http://foo bar/"). "http%3A%2F%2Ffoo%20bar%2F"
<lang fsharp>open System
[<EntryPoint>] let main args =
printfn "%s" (Uri.EscapeDataString(args.[0])) 0</lang>
- Output:
>URLencoding.exe "http://foo bar/" http%3A%2F%2Ffoo%20bar%2F
<lang go>package main
import (
"fmt" "net/url"
func main() {
fmt.Println(url.QueryEscape("http://foo bar/"))
- Output:
<lang Haskell>import qualified Data.Char as Char import Text.Printf
encode :: Char -> String encode c
| c == ' ' = "+" | Char.isAlphaNum c || c `elem` "-._~" = [c] | otherwise = printf "%%%02X" c
urlEncode :: String -> String urlEncode = concatMap encode
main :: IO () main = putStrLn $ urlEncode "http://foo bar/"</lang>
- Output:
Icon and Unicon
<lang Icon>link hexcvt
procedure main() write("text = ",image(u := "http://foo bar/")) write("encoded = ",image(ue := encodeURL(u))) end
procedure encodeURL(s) #: encode data for inclusion in a URL/URI static en initial { # build lookup table for everything
en := table() every en[c := !string(~(&digits++&letters))] := "%"||hexstring(ord(c),2) every /en[c := !string(&cset)] := c }
every (c := "") ||:= en[!s] # re-encode everything return c end </lang>
- Output:
text = "http://foo bar/" encoded = "http%3A%2F%2Ffoo%20bar%2F"
J has a urlencode in the gethttp package, but this task requires that all non-alphanumeric characters be encoded.
Here's an implementation that does that:
<lang j>require'strings convert' urlencode=: rplc&((#~2|_1 47 57 64 90 96 122 I.i.@#)a.;"_1'%',.hfd i.#a.)</lang>
Example use:
<lang j> urlencode 'http://foo bar/' http%3A%2F%2Ffoo%20bar%2F</lang>
The built-in URLEncoder in Java converts the space " " into a plus-sign "+" instead of "%20": <lang java>import java.io.UnsupportedEncodingException; import java.net.URLEncoder;
public class Main {
public static void main(String[] args) throws UnsupportedEncodingException { String normal = "http://foo bar/"; String encoded = URLEncoder.encode(normal, "utf-8"); System.out.println(encoded); }
- Output:
Confusingly, there are 3 different URI encoding functions in JavaScript: escape()
, encodeURI()
, and encodeURIComponent()
. Each of them encodes a different set of characters. See this article and this article for more information and comparisons.
<lang javascript>var normal = 'http://foo/bar/';
var encoded = encodeURIComponent(normal);</lang>
jq has a built-in function, @uri, for "percent-encoding". It preserves the characters that RFC 3968 mandates be preserved, but also preserves the following five characters: !'()*
To address the task requirement, therefore, we can first use @uri and then convert the exceptional characters. (For versions of jq with regex support, this could be done using gsub, but here we perform the translation directly.)
Note that @uri also converts multibyte characters to multi-triplets, e.g. <lang jq>"á" | @uri</lang> produces: "%C3%A1" <lang jq>def url_encode:
# The helper function checks whether the input corresponds to one of the characters: !'()* def recode: . as $c | [33,39,40,41,42] | index($c); def hex: if . < 10 then 48 + . else 55 + . end; @uri | explode # 37 ==> "%", 50 ==> "2" | map( if recode then (37, 50, ((. - 32) | hex)) else . end ) | implode;</lang>
<lang jq>"http://foo bar/" | @uri</lang> produces: "http%3A%2F%2Ffoo%20bar%2F"
<lang jq>"http://foo bar/" | @uri == url_encode</lang> produces: true
To contrast the difference between "@uri" and "url_encode", we compare the characters that are unaltered:
<lang jq>[range(0;1024) | [.] | implode | if @uri == . then . else empty end] | join(null)</lang> produces: "!'()*-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~"
<lang jq>[range(0;1024) | [.] | implode | if url_encode == . then . else empty end] | join(null)</lang> produces: "-.0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz~"
<lang Julia> using URIParser
dcd = "http://foo bar/" enc = escape(dcd)
println(dcd, " => ", enc) </lang>
- Output:
http://foo bar/ => http%3A%2F%2Ffoo%20bar%2F
<lang Lasso>bytes('http://foo bar/') -> encodeurl</lang> -> http%3A%2F%2Ffoo%20bar%2F
Liberty BASIC
<lang lb>
dim lookUp$( 256)
for i =0 to 256 lookUp$( i) ="%" +dechex$( i) next i
string$ ="http://foo bar/"
print "Supplied string '"; string$; "'" print "As URL '"; url$( string$); "'"
function url$( i$)
for j =1 to len( i$) c$ =mid$( i$, j, 1) if instr( "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz", c$) then url$ =url$ +c$ else url$ =url$ +lookUp$( asc( c$)) end if next j
end function </lang>
Supplied string 'http://foo bar/' As URL 'http%3A%2F%2Ffoo%20bar%2F'
<lang NetRexx>/* NetRexx */ options replace format comments java crossref symbols nobinary
/* -------------------------------------------------------------------------- */
testcase() say say 'RFC3986' testcase('RFC3986') say say 'HTML5' testcase('HTML5') say return
/* -------------------------------------------------------------------------- */ method encode(url, varn) public static
variation = varn.upper opts = opts['RFC3986'] = '-._~' opts['HTML5'] = '-._*'
rp = loop while url.length > 0 parse url tc +1 url select when tc.datatype('A') then do rp = rp || tc end when tc == ' ' then do if variation = 'HTML5' then rp = rp || '+' else rp = rp || '%' || tc.c2x end otherwise do if opts[variation].pos(tc) > 0 then do rp = rp || tc end else do rp = rp || '%' || tc.c2x end end end end
return rp
/* -------------------------------------------------------------------------- */ method testcase(variation = ) public static
url = [ - 'http://foo bar/' - , 'mailto:"Ivan Aim" <ivan.aim@email.com>' - , 'mailto:"Irma User" <irma.user@mail.com>' - , 'http://foo.bar.com/~user-name/_subdir/*~.html' - ]
loop i_ = 0 to url.length - 1 say url[i_] say encode(url[i_], variation) end i_
- Output:
http://foo bar/ http%3A%2F%2Ffoo%20bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan%20Aim%22%20%3Civan%2Eaim%40email%2Ecom%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma%20User%22%20%3Cirma%2Euser%40mail%2Ecom%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo%2Ebar%2Ecom%2F%7Euser%2Dname%2F%5Fsubdir%2F%2A%7E%2Ehtml RFC3986 http://foo bar/ http%3A%2F%2Ffoo%20bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan%20Aim%22%20%3Civan.aim%40email.com%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma%20User%22%20%3Cirma.user%40mail.com%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo.bar.com%2F~user-name%2F_subdir%2F%2A~.html HTML5 http://foo bar/ http%3A%2F%2Ffoo+bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan+Aim%22+%3Civan.aim%40email.com%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma+User%22+%3Cirma.user%40mail.com%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo.bar.com%2F%7Euser-name%2F_subdir%2F*%7E.html
<lang maple>URL:-Escape("http://foo bar/");</lang>
- Output:
<lang mathematica>URLEncoding[url_] :=
StringReplace[url, x : Except[ Join[CharacterRange["0", "9"], CharacterRange["a", "z"], CharacterRange["A", "Z"]]] :> StringJoin[("%" ~~ #) & /@ IntegerString[ToCharacterCode[x, "UTF8"], 16]]]</lang>
Example use:
<lang mathematica>URLEncoding["http://foo bar/"]</lang>
- Output:
MATLAB / Octave
<lang MATLAB>function u = urlencoding(s) u = ; for k = 1:length(s), if isalnum(s(k)) u(end+1) = s(k); else u=[u,'%',dec2hex(s(k)+0)]; end; end end</lang> Usage:
octave:3> urlencoding('http://foo bar/') ans = http%3A%2F%2Ffoo%20bar%2F
<lang NewLISP>;; simple encoder
(define (url-encode str)
(replace {([^a-zA-Z0-9])} str (format "%%%2X" (char $1)) 0))
(url-encode "http://foo bar/")</lang>
<lang nim>import cgi
echo encodeUrl("http://foo/bar/")</lang>
<lang objeck> use FastCgi;
bundle Default {
class UrlEncode { function : Main(args : String[]) ~ Nil { url := "http://foo bar/"; UrlUtility->Encode(url)->PrintLine(); } }
} </lang>
<lang objc>NSString *normal = @"http://foo bar/"; NSString *encoded = [normal stringByAddingPercentEscapesUsingEncoding:NSUTF8StringEncoding]; NSLog(@"%@", encoded);</lang>
The Core Foundation function CFURLCreateStringByAddingPercentEscapes()
provides more options.
<lang objc>NSString *normal = @"http://foo bar/"; NSString *encoded = [normal stringByAddingPercentEncodingWithAllowedCharacters:[NSCharacterSet alphanumericCharacterSet]]; NSLog(@"%@", encoded);</lang>
For encoding for various parts of the URL, the allowed character sets [NSCharacterSet URLUserAllowedCharacterSet]
, [NSCharacterSet URLPasswordAllowedCharacterSet]
, [NSCharacterSet URLHostAllowedCharacterSet]
, [NSCharacterSet URLPathAllowedCharacterSet]
, [NSCharacterSet URLQueryAllowedCharacterSet]
, or [NSCharacterSet URLFragmentAllowedCharacterSet]
are provided.
Using the library ocamlnet from the interactive loop:
<lang ocaml>$ ocaml
- #use "topfind";;
- #require "netstring";;
- Netencoding.Url.encode "http://foo bar/" ;;
- : string = "http%3A%2F%2Ffoo+bar%2F"</lang>
The solution shown at Rexx is a valid ooRexx program.
<lang perl>sub urlencode {
my $s = shift; $s =~ s/([^-A-Za-z0-9_.!~*'() ])/sprintf("%%%02X", ord($1))/eg; $s =~ tr/ /+/; return $s;
print urlencode('http://foo bar/')."\n"; </lang>
- Output:
<lang perl>use URI::Escape;
my $s = 'http://foo/bar/'; print uri_escape($s);</lang>
Use standard CGI module: <lang perl>use 5.10.0; use CGI;
my $s = 'http://foo/bar/'; say $s = CGI::escape($s); say $s = CGI::unescape($s);</lang>
Perl 6
<lang perl6>my $url = 'http://foo bar/';
say $url.subst(/<-alnum>/, *.ord.fmt("%%%02X"), :g);</lang>
- Output:
<lang php><?php
$s = 'http://foo/bar/';
$s = rawurlencode($s);
There is also urlencode()
, which also encodes spaces as "+" signs
<lang PicoLisp>(de urlEncodeTooMuch (Str)
(pack (mapcar '((C) (if (or (>= "9" C "0") (>= "Z" (uppc C) "A")) C (list '% (hex (char C))) ) ) (chop Str) ) ) )</lang>
: (urlEncodeTooMuch "http://foo bar/") -> "http%3A%2F%2Ffoo%20bar%2F"
<lang Powershell> [uri]::EscapeDataString('http://foo bar/')
http%3A%2F%2Ffoo%20bar%2F </lang>
<lang PureBasic>URL$ = URLEncoder("http://foo bar/")</lang>
<lang python>import urllib
s = 'http://foo/bar/'
s = urllib.quote(s)</lang>
There is also urllib.quote_plus()
, which also encodes spaces as "+" signs
R has a built-in
<lang R>URLencode("http://foo bar/")</lang>
function, but it doesn't fully follow RFC guidelines, so we have to use another R package to accomplish the task:
<lang R> library(RCurl) curlEscape("http://foo bar/") </lang>
<lang racket>
- lang racket
(require net/uri-codec) (uri-encode "http://foo bar/") </lang>
Using the built-in encoding method, which doesn't permit exceptions: <lang vb>
Dim URL As String = "http://foo bar/" URL = EncodeURLComponent(URL) Print(URL)
With exceptions: <lang vb> Function URLEncode(URL As String, Exceptions As String = "") As String
For i As Integer = 0 To 255 If InStr(Exceptions, Chr(i)) > 0 Then Continue For i Dim char As String = Chr(127) + Right("00" + Hex(i), 2) URL = ReplaceAll(URL, Chr(i), char) If i = 47 Then i = 57 If i = 64 Then i = 90 If i = 96 Then i = 122 If i = 126 Then i = 128 Next URL = ReplaceAll(URL, Chr(127), "%") Return URL
End Function </lang>
version 1
<lang REXX>/* Rexx */ do
call testcase say say RFC3986 call testcase RFC3986 say say HTML5 call testcase HTML5 say return
end exit
/* -------------------------------------------------------------------------- */ encode: procedure do
parse arg url, varn . parse upper var varn variation drop RFC3986 HTML5 opts. = opts.RFC3986 = '-._~' opts.HTML5 = '-._*'
rp = do while length(url) > 0 parse var url tc +1 url select when datatype(tc, 'A') then do rp = rp || tc end when tc == ' ' then do if variation = HTML5 then rp = rp || '+' else rp = rp || '%' || c2x(tc) end otherwise do if pos(tc, opts.variation) > 0 then do rp = rp || tc end else do rp = rp || '%' || c2x(tc) end end end end
return rp
end exit
/* -------------------------------------------------------------------------- */ testcase: procedure do
parse arg variation X = 0 url. = X = X + 1; url.0 = X; url.X = 'http://foo bar/' X = X + 1; url.0 = X; url.X = 'mailto:"Ivan Aim" <ivan.aim@email.com>' X = X + 1; url.0 = X; url.X = 'mailto:"Irma User" <irma.user@mail.com>' X = X + 1; url.0 = X; url.X = 'http://foo.bar.com/~user-name/_subdir/*~.html'
do i_ = 1 to url.0 say url.i_ say encode(url.i_, variation) end i_
end </lang>
- Output:
http://foo bar/ http%3A%2F%2Ffoo%20bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan%20Aim%22%20%3Civan%2Eaim%40email%2Ecom%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma%20User%22%20%3Cirma%2Euser%40mail%2Ecom%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo%2Ebar%2Ecom%2F%7Euser%2Dname%2F%5Fsubdir%2F%2A%7E%2Ehtml RFC3986 http://foo bar/ http%3A%2F%2Ffoo%20bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan%20Aim%22%20%3Civan.aim%40email.com%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma%20User%22%20%3Cirma.user%40mail.com%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo.bar.com%2F~user-name%2F_subdir%2F%2A~.html HTML5 http://foo bar/ http%3A%2F%2Ffoo+bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan+Aim%22+%3Civan.aim%40email.com%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma+User%22+%3Cirma.user%40mail.com%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo.bar.com%2F%7Euser-name%2F_subdir%2F*%7E.html
version 2
<lang rexx>/*REXX pgm encodes an URL text, blanks──►+, preserves -._* and -._~ */ url.1 = 'http://foo bar/' url.2 = 'mailto:"Ivan Aim" <ivan.aim@email.com>' url.3 = 'mailto:"Irma User" <irma.user@mail.com>' url.4 = 'http://foo.bar.com/~user-name/_subdir/*~.html' URLs = 4
do j=1 for URLs; say say url.j say URLencode(url.j) end /*j*/
exit /*stick a fork in it, we're done.*/ /*──────────────────────────────────URLENCODE subroutine────────────────*/ URLencode: procedure; parse arg yyy; t1= '-._~' ; skip=0
t2= '-._*' ; z=
do k=1 for length(yyy); _=substr(yyy,k,1) /*pickoff 1char*/ if skip\==0 then do /*skip t1 | t2?*/ skip=skip-1 /*skip a char. */ iterate end select when datatype(_,'A') then z=z || _ /*alphanumeric?*/ when _==' ' then z=z'+' /*is a blank ? */ when substr(yyy,k,4)==t1 |, /*t1 or t2 ? */ substr(yyy,k,4)==t2 then do; skip=3 /*skip 3 chars.*/ z=z || substr(yyy,k,4) end otherwise z=z'%'c2x(_) /*special char.*/ end /*select*/ end /*k*/
return z</lang>
- Output:
when using the default input
http://foo bar/ http%3A%2F%2Ffoo+bar%2F mailto:"Ivan Aim" <ivan.aim@email.com> mailto%3A%22Ivan+Aim%22+%3Civan%2Eaim%40email%2Ecom%3E mailto:"Irma User" <irma.user@mail.com> mailto%3A%22Irma+User%22+%3Cirma%2Euser%40mail%2Ecom%3E http://foo.bar.com/~user-name/_subdir/*~.html http%3A%2F%2Ffoo%2Ebar%2Ecom%2F%7Euser%2Dname%2F%5Fsubdir%2F%2A%7E%2Ehtml
encodes all characters except '-.0-9A-Z_a-z'.
<lang ruby>require 'cgi' puts CGI.escape("http://foo bar/").gsub("+", "%20")
- => "http%3A%2F%2Ffoo%20bar%2F"</lang>
Programs should not call URI.escape
(alias URI.encode
), because it fails to encode some characters. URI.escape
is obsolete since Ruby 1.9.2.
is a new method from Ruby 1.9.2. It obeys HTML 5 and encodes all characters except '-.0-9A-Z_a-z' and '*'.
<lang ruby>require 'uri' puts URI.encode_www_form_component("http://foo bar/").gsub("+", "%20")
- => "http%3A%2F%2Ffoo%20bar%2F"</lang>
<lang runbasic>urlIn$ = "http://foo bar/"
for i = 1 to len(urlIn$)
a$ = mid$(urlIn$,i,1) if (a$ >= "0" and a$ <= "9") _ or (a$ >= "A" and a$ <= "Z") _ or (a$ >= "a" and a$ <= "z") then url$ = url$ + a$ else url$ = url$ + "%"+dechex$(asc(a$))
next i print urlIn$;" -> ";url$</lang>
http://foo bar/ -> http%3A%2F%2Ffoo%20bar%2F
<lang scala>import java.net.{URLDecoder, URLEncoder}
import scala.compat.Platform.currentTime
object UrlCoded extends App {
val original = """http://foo bar/""" val encoded: String = URLEncoder.encode(original, "UTF-8")
assert(encoded == "http%3A%2F%2Ffoo+bar%2F", s"Original: $original not properly encoded: $encoded")
val percentEncoding = encoded.replace("+", "%20") assert(percentEncoding == "http%3A%2F%2Ffoo%20bar%2F", s"Original: $original not properly percent-encoded: $percentEncoding")
assert(URLDecoder.decode(encoded, "UTF-8") == URLDecoder.decode(percentEncoding, "UTF-8"))
println(s"Successfully completed without errors. [total ${currentTime - executionStart} ms]")
The library encoding.s7i defines functions to handle URL respectively percent encoding. The function toPercentEncoded encodes every character except 0-9, A-Z, a-z and the characters '-', '.', '_', '~'. The function toUrlEncoded works like toPercentEncoded and additionally encodes a space with '+'. Both functions work for byte sequences (characters beyond '\255\' raise the exception RANGE_ERROR). To encode Unicode characters it is necessary to convert them to UTF-8 with striToUtf8 before.<lang seed7>$ include "seed7_05.s7i";
include "encoding.s7i";
const proc: main is func
begin writeln(toPercentEncoded("http://foo bar/")); writeln(toUrlEncoded("http://foo bar/"));
end func;</lang>
- Output:
http%3A%2F%2Ffoo%20bar%2F http%3A%2F%2Ffoo+bar%2F
<lang ruby>func urlencode(str) {
str.gsub!(%r"([^-A-Za-z0-9_.!~*'() ])", {|a| "%%%02X" % a.ord}); str.gsub!(' ', '+'); return str;
say urlencode('http://foo bar/');</lang>
- Output:
<lang tcl># Encode all except "unreserved" characters; use UTF-8 for extended chars.
- See http://tools.ietf.org/html/rfc3986 §2.4 and §2.5
proc urlEncode {str} {
set uStr [encoding convertto utf-8 $str] set chRE {[^-A-Za-z0-9._~\n]}; # Newline is special case! set replacement {%[format "%02X" [scan "\\\0" "%c"]]} return [string map {"\n" "%0A"} [subst [regsub -all $chRE $uStr $replacement]]]
}</lang> Demonstrating: <lang tcl>puts [urlEncode "http://foo bar/"]</lang>
- Output:
<lang tuscript> $$ MODE TUSCRIPT text="http://foo bar/" BUILD S_TABLE spez_char="::>/:</::<%:" spez_char=STRINGS (text,spez_char) LOOP/CLEAR c=spez_char c=ENCODE(c,hex),c=concat("%",c),spez_char=APPEND(spez_char,c) ENDLOOP url_encoded=SUBSTITUTE(text,spez_char,0,0,spez_char) print "text: ", text PRINT "encoded: ", url_encoded </lang>
- Output:
text: http://foo bar/ encoded: http%3A%2F%2Ffoo%20bar%2F
UNIX Shell
<lang bash>function urlencode { typeset decoded=$1 encoded= rest= c= typeset rest2= bug='rest2=${rest}'
if [[ -z ${BASH_VERSION} ]]; then # bug /usr/bin/sh HP-UX 11.00 typeset _decoded='xyz%26xyz' rest="${_decoded#?}" c="${_decoded%%${rest}}" if (( ${#c} != 1 )); then typeset qm='????????????????????????????????????????????????????????????????????????' typeset bug='(( ${#rest} > 0 )) && typeset -L${#rest} rest2="${qm}" || rest2=${rest}' fi fi
rest="${decoded#?}" eval ${bug} c="${decoded%%${rest2}}" decoded="${rest}"
while [[ -n ${c} ]]; do case ${c} in [-a-zA-z0-9.]) ;; ' ') c='+' ;; *) c=$(printf "%%%02X" "'$c") ;; esac
rest="${decoded#?}" eval ${bug} c="${decoded%%${rest2}}" decoded="${rest}" done
if [[ -n ${BASH_VERSION:-} ]]; then \echo -E "${encoded}" else print -r -- "${encoded}" fi } </lang>
<lang VBScript>Function UrlEncode(url) For i = 1 To Len(url) n = Asc(Mid(url,i,1)) If (n >= 48 And n <=57) Or (n >= 65 And n <= 90) _ Or (n >= 97 And n <= 122) Then UrlEncode = UrlEncode & Mid(url,i,1) Else UrlEncode = UrlEncode & "%" & Hex(Asc(Mid(url,i,1))) End If Next End Function
WScript.Echo UrlEncode("http://foo bar/")</lang>
- Output:
<lang XPL0>code Text=12; string 0; \use zero-terminated strings
func Encode(S0); \Encode URL string and return its address char S0; char HD, S1(80); \BEWARE: very temporary string space returned int C, I, J; [HD:= "0123456789ABCDEF"; \hex digits I:= 0; J:= 0; repeat C:= S0(I); I:= I+1;
if C>=^0 & C<=^9 ! C>=^A & C<=^Z ! C>=^a & C<=^z ! C=0 then [S1(J):= C; J:= J+1] \simply pass char to S1 else [S1(J):= ^%; J:= J+1; \encode char into S1 S1(J):= HD(C>>4); J:= J+1; S1(J):= HD(C&$0F); J:= J+1; ];
until C=0; return S1; ];
Text(0, Encode("http://foo bar/"))</lang>
- Output:
Using lib cURL: <lang zkl>var CURL=Import("zklCurl"); CURL.urlEncode("http://foo bar/") //--> "http%3A%2F%2Ffoo%20bar%2F"</lang>
- Programming Tasks
- Solutions by Programming Task
- String manipulation
- Ada
- AutoHotkey
- AppleScript
- AppleScript Toolbox
- Bracmat
- C
- C++
- C sharp
- Clojure
- D
- Elixir
- Erlang
- F Sharp
- Go
- Haskell
- Icon
- Unicon
- Icon Programming Library
- J
- Java
- JavaScript
- Jq
- Julia
- Lasso
- Liberty BASIC
- NetRexx
- Maple
- Mathematica
- Octave
- Nim
- Objeck
- Objective-C
- OCaml
- OoRexx
- Perl
- Perl 6
- PicoLisp
- Powershell
- PureBasic
- Python
- R
- Racket
- REALbasic
- Ruby
- Scala
- Seed7
- Sidef
- Tcl
- UNIX Shell
- VBScript
- XPL0
- Zkl