URL encoding: Difference between revisions

3,448 bytes added ,  1 month ago
m (Bash: pure bash solution, no dependencies)
 
(18 intermediate revisions by 10 users not shown)
Line 26:
;Variations:
* Lowercase escapes are legal, as in "<code><nowiki>http%3a%2f%2ffoo%20bar%2f</nowiki></code>".
* Special characters have different encodings for different standards:
* Some standards give different rules: RFC 3986, ''Uniform Resource Identifier (URI): Generic Syntax'', section 2.3, says that "-._~" should not be encoded. HTML 5, section [http://www.whatwg.org/specs/web-apps/current-work/multipage/association-of-controls-and-forms.html#url-encoded-form-data 4.10.22.5 URL-encoded form data], says to preserve "-._*", and to encode space " " to "+". The options below provide for utilization of an exception string, enabling preservation (non encoding) of particular characters to meet specific standards.
** RFC 3986, ''Uniform Resource Identifier (URI): Generic Syntax'', section 2.3, says to preserve "-._~".
 
** HTML 5, section [http://www.whatwg.org/specs/web-apps/current-work/multipage/association-of-controls-and-forms.html#url-encoded-form-data 4.10.22.5 URL-encoded form data], says to preserve "-._*", and to encode space " " to "+".
** [https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/encodeURI#description encodeURI] function in Javascript will preserve "-._~" (RFC 3986) and ";,/?:@&=+$!*'()#".
 
;Options:
Line 242 ⟶ 244:
<pre>"http%3A%2F%2Ffoo%20bar%2F"</pre>
 
=={{header|Applesoft BASIC}}==
<syntaxhighlight lang="gwbasic"> 100 URL$ = "http://foo bar/"
110 GOSUB 140"URL ENCODE URL$ RETURNS R$
120 PRINT R$;
130 END
140 LET R$ = ""
150 LET L = LEN (URL$)
160 IF NOT L THEN RETURN
170 LET H$ = "0123456789ABCDEF"
180 FOR I = 1 TO L
190 LET C$ = MID$ (URL$,I,1)
200 LET C = ASC (C$)
210 IF C < ASC ("0") OR C > ASC ("Z") + 32 OR C > ASC ("9") AND C < ASC ("A") OR C > ASC ("Z") AND C < ASC ("A") + 32 THEN H = INT (C / 16):C$ = "%" + MID$ (H$,H + 1,1) + MID$ (H$,C - H * 16 + 1,1)
220 LET R$ = R$ + C$
230 NEXT I
240 RETURN</syntaxhighlight>
=={{header|Arturo}}==
 
Line 255 ⟶ 273:
 
; Modified from http://goo.gl/0a0iJq
UriEncode(Uri, Reserved:="!#$&'()*+,/:;=?@[]") {
Unreserved := "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_.~"
{
VarSetCapacity(Var, StrPut(Uri, "UTF-8"), 0)
StrPut(Uri, &Var, "UTF-8")
While (Code := NumGet(Var, A_Index - 1, "UChar")) {
f := A_FormatInteger
If InStr(Unreserved . Reserved, Chr(Code)) {
SetFormat, IntegerFast, H
Encoded .= Chr(Code)
While Code := NumGet(Var, A_Index - 1, "UChar")
}
If (Code >= 0x30 && Code <= 0x39 ; 0-9
Else {
|| Code >= 0x41 && Code <= 0x5A ; A-Z
||Encoded Code >.= 0x61 &&Format("%{:02X}", Code <= 0x7A) ; a-z
}
Res .= Chr(Code)
}
Else
Return Encoded
Res .= "%" . SubStr(Code + 0x100, -1)
SetFormat, IntegerFast, %f%
Return, Res
}</syntaxhighlight>
 
Line 304 ⟶ 320:
=={{header|Bash}}==
<syntaxhighlight lang="bash">urlencode() {
local LC_ALL=C # support unicode: loop bytes, not characters
local c i n=${#1}
for (( i=0; i<n; i++ )); do
Line 530 ⟶ 546:
}</syntaxhighlight>
<pre>http%3A%2F%2Ffoo%20bar%2F</pre>
 
=={{header|Delphi}}==
{{works with|Delphi|6.0}}
{{libheader|SysUtils,StdCtrls}}
 
 
<syntaxhighlight lang="Delphi">
 
function EncodeURL(URL: string): string;
var I: integer;
begin
Result:='';
for I:=1 to Length(URL) do
if URL[I] in ['0'..'9', 'A'..'Z', 'a'..'z'] then Result:=Result+URL[I]
else Result:=Result+'%'+IntToHex(byte(URL[I]),2);
end;
 
procedure EncodeAndShowURL(Memo: TMemo; URL: string);
var ES: string;
begin
Memo.Lines.Add('Unencoded URL: '+URL);
ES:=EncodeURL(URL);
Memo.Lines.Add('Encoded URL: '+ES);
Memo.Lines.Add('');
end;
 
procedure ShowEncodedURLs(Memo: TMemo);
begin
EncodeAndShowURL(Memo,'http://foo bar/');
EncodeAndShowURL(Memo,'https://rosettacode.org/wiki/URL_encoding');
EncodeAndShowURL(Memo,'https://en.wikipedia.org/wiki/Pikes_Peak_granite');
end;
 
 
 
</syntaxhighlight>
{{out}}
<pre>
Unencoded URL: http://foo bar/
Encoded URL: http%3A%2F%2Ffoo%20bar%2F
 
Unencoded URL: https://rosettacode.org/wiki/URL_encoding
Encoded URL: https%3A%2F%2Frosettacode%2Eorg%2Fwiki%2FURL%5Fencoding
 
Unencoded URL: https://en.wikipedia.org/wiki/Pikes_Peak_granite
Encoded URL: https%3A%2F%2Fen%2Ewikipedia%2Eorg%2Fwiki%2FPikes%5FPeak%5Fgranite
 
Elapsed Time: 11.734 ms.
</pre>
 
 
=={{header|Elixir}}==
Line 640 ⟶ 706:
http%3A%2F%2Ffoo+bar%2F
</pre>
 
 
=={{header|FutureBasic}}==
<pre>
In addition to the generic alphanumeric character set used in the demo code below, FB offers several special character sets for URL encoding:
 
fn CharacterSetURLFragmentAllowedSet
fn CharacterSetURLHostAllowedSet
fn CharacterSetURLPasswordAllowedSet
fn CharacterSetURLPathAllowedSet
fn CharacterSetURLQueryAllowedSet
fn CharacterSetURLUserAllowedSet
 
Users can also create custom character strings with:
 
fn CharacterSetWithCharactersInString( CFStringRef string ) = CFCharacterSetRef
 
</pre>
<syntaxhighlight lang="futurebasic">
include "NSLog.incl"
 
local fn PercentEncodeURLString( urlStr as CFStringRef ) as CFStringRef
CFStringRef encodedStr = fn StringByAddingPercentEncodingWithAllowedCharacters( urlStr, fn CharacterSetAlphanumericSet )
end fn = encodedStr
 
NSLog( @"%@", fn PercentEncodeURLString( @"http://foo bar/" ) )
NSLog( @"%@", fn PercentEncodeURLString( @"http://www.rosettacode.org/wiki/URL_encoding" ) )
 
HandleEvents
</syntaxhighlight>
{{out}}
<pre>
http%3A%2F%2Ffoo%20bar%2F
http%3A%2F%2Fwww%2Erosettacode%2Eorg%2Fwiki%2FURL%5Fencoding
</pre>
 
 
=={{header|Go}}==
Line 656 ⟶ 758:
http%3A%2F%2Ffoo+bar%2F
</pre>
 
 
=={{header|Groovy}}==
Line 729 ⟶ 832:
 
=={{header|Java}}==
Java includes the ''URLEncoder'' and ''URLDecoder'' classes for this specific task.
 
<syntaxhighlight lang="java">
The built-in URLEncoder in Java converts the space " " into a plus-sign "+" instead of "%20":
<syntaxhighlight lang="java">import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
import java.nio.charset.StandardCharsets;
 
</syntaxhighlight>
public class Main
<syntaxhighlight lang="java">
{
URLEncoder.encode("http://foo bar/", StandardCharsets.UTF_8)
public static void main(String[] args) throws UnsupportedEncodingException
</syntaxhighlight>
{
Alternately, you could implement this with a basic for-loop.
String normal = "http://foo bar/";
<syntaxhighlight lang="java">
String encoded = URLEncoder.encode(normal, "utf-8");
String encode(String string) {
System.out.println(encoded);
StringBuilder encoded = new StringBuilder();
for (char character : string.toCharArray()) {
switch (character) {
/* rfc3986 and html5 */
case '-', '.', '_', '~', '*' -> encoded.append(character);
case ' ' -> encoded.append('+');
default -> {
if (alphanumeric(character))
encoded.append(character);
else {
encoded.append("%");
encoded.append("%02x".formatted((int) character));
}
}
}
}
return encoded.toString();
}</syntaxhighlight>
}
 
boolean alphanumeric(char character) {
{{out}}
return (character >= 'A' && character <= 'Z')
<pre>http%3A%2F%2Ffoo+bar%2F</pre>
|| (character >= 'a' && character <= 'z')
|| (character >= '0' && character <= '9');
}
</syntaxhighlight>
<pre>
http%3a%2f%2ffoo+bar%2f
</pre>
 
=={{header|JavaScript}}==
Line 841 ⟶ 966:
 
=={{header|langur}}==
<syntaxhighlight lang="langur">val .urlEncode = fn(.s) {
{{works with|langur|0.9.6}}
replace(
<syntaxhighlight lang="langur">val .urlEncode = f(.s) replace(
.s, re/[^A-Za-z0-9]/,
ffn(.s2) for{ .bjoin in"", s2b(.s2)map {fn _for.b: ~= $"%\{{.b:X02;}}", s2b .s2 },
)
)
}
 
val .original = "https://some website.com/"
 
writeln .original
writeln .urlEncode(.original)</syntaxhighlight>
 
This should work with non-ASCII characters as well (assuming that's valid).
 
We pass a custom anonymous function to replace(), which uses the the s2b() function (returning UTF-8 bytes from a string), and a for loop adding them all.
 
writeln .urlEncode("https://some website.com/")</syntaxhighlight>
The :X02 is an interpolation modifier, generating a hexadecimal code of at least 2 characters, padded with zeroes.
 
{{out}}
<pre>https://some website.com%3A%2F%2Fsome%20website%2Ecom%2F</pre>
https%3A%2F%2Fsome%20website%2Ecom%2F</pre>
 
=={{header|Lasso}}==
Line 1,623 ⟶ 1,739:
works like ''toPercentEncoded'' and additionally encodes a space with '+'.
Both functions work for byte sequences (characters beyond '\255\' raise the exception RANGE_ERROR).
To encode Unicode characters it is necessary to convert them to UTF-8 with ''striToUtf8'' before[https://seed7.<syntaxhighlightsourceforge.net/libraries/unicode.htm#toUtf8(in_string) lang="seed7">$toUtf8] include "seed7_05before.s7i";
<syntaxhighlight lang="seed7">$ include "seed7_05.s7i";
include "encoding.s7i";
 
Line 1,630 ⟶ 1,747:
writeln(toPercentEncoded("http://foo bar/"));
writeln(toUrlEncoded("http://foo bar/"));
end func;</syntaxhighlight>{{out}}
{{out}}
http%3A%2F%2Ffoo%20bar%2F
http%3A%2F%2Ffoo+bar%2F
Line 1,753 ⟶ 1,871:
<pre>http%3A%2F%2Ffoo%20bar%C3%A9%2F</pre>
 
=={{header|V (Vlang)}}==
<syntaxhighlight lang="v (vlang)">import net.urllib
fn main() {
println(urllib.query_escape("http://foo bar/"))
Line 1,765 ⟶ 1,883:
=={{header|Wren}}==
{{libheader|Wren-fmt}}
<syntaxhighlight lang="ecmascriptwren">import "./fmt" for Fmt
 
var urlEncode = Fn.new { |url|
890

edits