CloudFlare suffered a massive security issue affecting all of its customers, including Rosetta Code. All passwords not changed since February 19th 2017 have been expired, and session cookie longevity will be reduced until late March.--Michael Mol (talk) 05:15, 25 February 2017 (UTC)

Extract file extension

From Rosetta Code
Extract file extension is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Filename extensions are a rudimentary but commonly used way of identifying files types.

Task[edit]

Write a function or program that

  • takes one string argument representing the path/URL to a file
  • returns the filename extension according to the below specification, or an empty string if the filename has no extension.


If your programming language (or standard library) has built-in functionality for extracting a filename extension, show how it would be used and how exactly its behavior differs from this specification.

Specification

For the purposes of this task, a filename extension

  • occurs at the very end of the filename
  • consists of a period, followed solely by one or more ASCII letters or digits (A-Z, a-z, 0-9)
Test cases
Input Output Comment
http://example.com/download.tar.gz .gz
CharacterModel.3DS .3DS
.desktop .desktop
document empty string
document.txt_backup empty string, because _ is not a letter or number
/etc/pam.d/login empty string, as the period is in the parent directory name rather than the filename

ALGOL 68[edit]

Works with: ALGOL 68G version Any - tested with release 2.8.win32
# extracts a file-extension from the end of a pathname. The file extension is #
# defined as a dot followed by one or more letters or digits #
OP EXTENSION = ( STRING pathname )STRING:
IF LWB pathname >= UPB pathname THEN
# the pathname has 0 or 1 characters and so has no extension #
""
ELIF NOT isalnum( pathname[ UPB pathname ] ) THEN
# the final character is not a letter or digit - no extension #
""
ELSE
# could have an extension #
INT pos := UPB pathname;
WHILE pos > LWB pathname AND isalnum( pathname[ pos ] ) DO
pos -:= 1
OD;
IF pathname[ pos ] = "." THEN
# the character before the letters and digits was a "." #
pathname[ pos : ]
ELSE
# no "." before the letters and digits - no extension #
""
FI
FI ; # EXTENSION #
 
# test the EXTENSION operator #
PROC test extension = ( STRING pathname, STRING expected extension )VOID:
BEGIN
STRING extension = EXTENSION pathname;
write( ( ( pathname
+ " got extension: ("
+ extension
+ ") "
+ IF extension = expected extension THEN "" ELSE "NOT" FI
+ " as expected"
)
, newline
)
)
END ; # text extension #
 
 
main:
( test extension( "http://example.com/download.tar.gz", ".gz" )
; test extension( "CharacterModel.3DS", ".3DS" )
; test extension( ".desktop", ".desktop" )
; test extension( "document", "" )
; test extension( "document.txt_backup", "" )
; test extension( "/etc/pam.d/login", "" )
)
Output:
http://example.com/download.tar.gz got extension: (.gz)  as expected
CharacterModel.3DS got extension: (.3DS)  as expected
.desktop got extension: (.desktop)  as expected
document got extension: ()  as expected
document.txt_backup got extension: ()  as expected
/etc/pam.d/login got extension: ()  as expected

ALGOL W[edit]

begin
 % extracts a file-extension from the end of a pathname.  %
 % The file extension is defined as a dot followed by one or more letters  %
 % or digits. As Algol W only has fixed length strings we limit the  %
 % extension to 32 characters and the pathname to 256 (the longest string  %
 % allowed by Algol W)  %
string(32) procedure extension( string(256) value pathname ) ;
begin
 
integer pathPos;
 
 % position to the previous character in the pathname  %
procedure prev  ; pathPos := pathPos - 1;
 % get the character as pathPos from pathname  %
string(1) procedure ch ; pathname( pathPos // 1 );
 % checks for a letter or digit - assumes the letters are contiguous  %
 % in the character set - not true for EBCDIC  %
logical procedure isLetterOrDigit( string(1) value c ) ;
( c <= "z" and c >= "a" ) or ( c <= "Z" and c >= "A" )
or ( c <= "9" and c >= "0" ) ;
 
 % find the length of the pathname with trailing blanks removed  %
pathPos := 255;
while pathPos >= 0 and ch = " " do prev;
 
 % extract the extension if possible  %
if pathPos <= 0
then ""  % no extension: 0 or 1 character pathname  %
else if not isLetterOrDigit( ch )
then ""  % no extension: last character not a letter/digit  %
else begin
while pathPos > 0 and isLetterOrDigit( ch ) do prev;
if ch not = "."
then ""  % no extension: letters/digits not preceeded by "."  %
else begin
 % have an extension  %
string(32) ext;
ext := " ";
 % algol W substring lengths must be compile-time constants  %
 % hence the loop to copy the extension characters  %
for charPos := 0 until 31 do begin
if pathPos <= 255 then begin
ext( charPos // 1 ) := pathname( pathPos // 1 );
pathPos := pathPos + 1
end
end for_charPos ;
ext
end
end
 
end extension ;
 
 
 % test the extension procedure  %
procedure testExtension( string(256) value pathname
 ; string(32) value expectedExtension
) ;
begin
string(32) ext;
ext := extension( pathname );
write( pathname( 0 // 40 )
, " -> ("
, ext( 0 // 16 )
, ") "
, if ext = expectedExtension then "" else "NOT"
, " as expected"
)
end ; % text extension %
 
testExtension( "http://example.com/download.tar.gz", ".gz" );
testExtension( "CharacterModel.3DS", ".3DS" );
testExtension( ".desktop", ".desktop" );
testExtension( "document", "" );
testExtension( "document.txt_backup", "" );
testExtension( "/etc/pam.d/login", "" );
 
end.
Output:
http://example.com/download.tar.gz       -> (.gz             )  as expected
CharacterModel.3DS                       -> (.3DS            )  as expected
.desktop                                 -> (.desktop        )  as expected
document                                 -> (                )  as expected
document.txt_backup                      -> (                )  as expected
/etc/pam.d/login                         -> (                )  as expected

AWK[edit]

The following code shows two methods.

The first one was provided by an earlier contributor and shows a little more awk syntax and builtins (albeit with a bug fixed: it was testing for underscores in the extension but not other characters such as hyphens). It can be adjusted to allow any character in the extension other than /, \, : or . by replacing [^a-zA-Z0-9] with [\\/\\\\:\\.].

 
BEGIN {
arr[++i] = "picture.jpg"
arr[++i] = "http://mywebsite.com/picture/image.png"
arr[++i] = "myuniquefile.longextension"
arr[++i] = "IAmAFileWithoutExtension"
arr[++i] = "/path/to.my/file"
arr[++i] = "file.odd_one"
 
for (j=1; j<=i; j++) {
printf("%-40s '%s'\n",arr[j],extract_ext(arr[j]))
}
exit(0)
}
function extract_ext(fn, sep1,sep2,tmp) {
while (fn ~ (sep1 = ":|\\\\|\\/")) { # ":" or "\" or "/"
fn = substr(fn,match(fn,sep1)+1)
}
while (fn ~ (sep2 = "\\.")) { # "."
fn = substr(fn,match(fn,sep2)+1)
tmp = 1
}
if (fn ~ /[^a-zA-Z0-9]/ || tmp == 0) {
return("")
}
return(fn)
}
 

The second method is shorter and dispenses with the need to search for and remove the path components first. It too can be modified to allow all valid extensions (not just those described in the specification), by replacing \\.[A-Za-z0-9]+$ with \\.[^\\/\\\\:\\.]+$.

 
BEGIN {
arr[++i] = "picture.jpg"
arr[++i] = "http://mywebsite.com/picture/image.png"
arr[++i] = "myuniquefile.longextension"
arr[++i] = "IAmAFileWithoutExtension"
arr[++i] = "/path/to.my/file"
arr[++i] = "file.odd_one"
 
for (j=1; j<=i; j++) {
printf("%-40s '%s'\n",arr[j],extract_ext(arr[j]))
}
exit(0)
}
function extract_ext(fn, pos) {
pos = match(fn, "\\.[^\\/\\\\:\\.]+$")
if (pos == 0) {
return ("")
} else {
return (substr(fn,pos+1))
}
}
 

Both examples give the output:

picture.jpg                              'jpg'
http://mywebsite.com/picture/image.png   'png'
myuniquefile.longextension               'longextension'
IAmAFileWithoutExtension                 ''
/path/to.my/file                         ''
file.odd_one                             ''

C[edit]

#include <assert.h>
#include <ctype.h>
#include <string.h>
#include <stdio.h>
 
/* Returns a pointer to the extension of 'string'.
* If no extension is found, returns a pointer to the end of 'string'. */

char* file_ext(const char *string)
{
assert(string != NULL);
char *ext = strrchr(string, '.');
 
if (ext == NULL)
return (char*) string + strlen(string);
 
for (char *iter = ext + 1; *iter != '\0'; iter++) {
if (!isalnum((unsigned char)*iter))
return (char*) string + strlen(string);
}
 
return ext;
}
 
int main(void)
{
const char *testcases[][2] = {
{"http://example.com/download.tar.gz", ".gz"},
{"CharacterModel.3DS", ".3DS"},
{".desktop", ".desktop"},
{"document", ""},
{"document.txt_backup", ""},
{"/etc/pam.d/login", ""}
};
 
int exitcode = 0;
for (size_t i = 0; i < sizeof(testcases) / sizeof(testcases[0]); i++) {
const char *ext = file_ext(testcases[i][0]);
if (strcmp(ext, testcases[i][1]) != 0) {
fprintf(stderr, "expected '%s' for '%s', got '%s'\n",
testcases[i][1], testcases[i][0], ext);
exitcode = 1;
}
}
return exitcode;
}

C++[edit]

#include <string>
#include <algorithm>
#include <iostream>
#include <vector>
#include <regex>
 
std::string findExtension ( const std::string & filename ) {
auto position = filename.find_last_of ( '.' ) ;
if ( position == std::string::npos )
return "" ;
else {
std::string extension ( filename.substr( position + 1 ) ) ;
if (std::regex_search (extension, std::regex("[^A-Za-z0-9]") ))
return "" ;
else
return extension ;
}
}
 
int main( ) {
std::vector<std::string> filenames {"picture.jpg" , "http://mywebsite.com/picture/image.png" ,
"myuniquefile.longextension" , "IAmAFileWithoutExtension" , "/path/to.my/file" ,
"file.odd_one", "thisismine." } ;
std::vector<std::string> extensions( filenames.size( ) ) ;
std::transform( filenames.begin( ) , filenames.end( ) , extensions.begin( ) , findExtension ) ;
for ( int i = 0 ; i < filenames.size( ) ; i++ )
std::cout << filenames[i] << " has extension : " << extensions[i] << " !\n" ;
return 0 ;
}
 
Output:
picture.jpg has extension : jpg !
http://mywebsite.com/picture/image.png has extension : png !
myuniquefile.longextension has extension : longextension !
IAmAFileWithoutExtension has extension :  !
/path/to.my/file has extension :  !
file.odd_one has extension :  !
thisismine. has extension :  !

C#[edit]

public static string FindExtension(string filename) {
int indexOfDot = filename.Length;
for (int i = filename.Length - 1; i >= 0; i--) {
char c = filename[i];
if (c == '.') {
indexOfDot = i;
break;
}
if (c >= '0' && c <= '9') continue;
if (c >= 'A' && c <= 'Z') continue;
if (c >= 'a' && c <= 'z') continue;
break;
}
//The dot must be followed by at least one other character,
//so if the last character is a dot, return the empty string
return indexOfDot + 1 == filename.Length ? "" : filename.Substring(indexOfDot);
}

Using regular expressions (C# 6)

public static string FindExtension(string filename) => Regex.Match(filename, @"\.[A-Za-z0-9]+$").Value;

Emacs Lisp[edit]

(file-name-extension "foo.txt")
=>
"txt"

No extension is distinguished from empty extension but an (or ... "") can give "" for both if desired

(file-name-extension "foo.") => ""
(file-name-extension "foo") => nil

An Emacs backup ~ or .~NUM~ are not part of the extension, but otherwise any characters are allowed.

(file-name-extension "foo.txt~")        => "txt"
(file-name-extension "foo.txt.~1.234~") => "txt"

Forth[edit]

This example needs updating due to a modification in the task. Please examine and fix the code if needed, then remove this message.
Details: The format of a suffix has been clarified, and the test-cases have been replaced with new ones.
: invalid? ( c -- f )
toupper dup [char] A [char] Z 1+ within
swap [char] 0 [char] 9 1+ within or 0= ;
: extension ( addr1 u1 -- addr2 u2 )
dup 0= if exit then
2dup over +
begin 1- 2dup <= while dup c@ invalid? until then
\ no '.' found
2dup - 0> if 2drop dup /string exit then
\ invalid char
dup c@ [char] . <> if 2drop dup /string exit then
swap -
\ '.' is last char
2dup 1+ = if drop dup then
/string ;
 
: type.quoted ( addr u -- )
[char] ' emit type [char] ' emit ;
: test ( addr u -- )
2dup type.quoted ." => " extension type.quoted cr ;
: tests
s" picture.jpg" test
s" http://mywebsite.com/picture/image.png" test
s" myuniquefile.longextension" test
s" IAmAFileWithoutExtension" test
s" /path/to.my/file" test
s" file.odd_one" test
s" IDontHaveAnExtension." test ;
Output:
cr tests
'picture.jpg' => '.jpg'
'http://mywebsite.com/picture/image.png' => '.png'
'myuniquefile.longextension' => '.longextension'
'IAmAFileWithoutExtension' => ''
'/path/to.my/file' => ''
'file.odd_one' => ''
'IDontHaveAnExtension.' => ''
 ok

Fortran[edit]

The plan is to scan backwards from the end of the text until a non-extensionish character is encountered. If it is a period, then a valid file extension has been spanned. Otherwise, no extension. Yet again the "no specification" on the possibility of shortcut evaluation of compound logical expressions prevents the structured use of a DO WHILE(L1 > 0 & TEXT(L1:L1)etc) loop because the possible evaluation of both parts of the expression means that the second part may attempt to access character zero of a text. So, the compound expression has to be broken into two separate parts.

The source incorporates a collection of character characterisations via suitable spans of a single sequence of characters. Unfortunately, the PARAMETER statement does not allow its constants to appear in EQUIVALENCE statements, so the text is initialised by DATA statements, and thus loses the protection of read-only given to constants defined via PARAMETER statements. The statements are from a rather more complex text scanning scheme, as all that are needed here are the symbols of GOODEXT.

The text scan could instead check for a valid character via something like ("a" <= C & C <= "z") | ("A" <= C & C <= "Z") | (0 <= C & C <= "9") but this is not just messy but unreliable - in EBCDIC for example there are gaps in the sequence of letters that are occupied by other symbols. So instead, a test via INDEX into a sequence of all the valid symbols. If one was in a hurry, for eight-bit character codes, an array GOODEXT of 256 logical values could be indexed by the numerical value of the character.
      MODULE TEXTGNASH	!Some text inspection.
CHARACTER*10 DIGITS !Integer only.
CHARACTER*11 DDIGITS !With a full stop masquerading as a decimal point.
CHARACTER*13 SDDIGITS !Signed decimal digits.
CHARACTER*4 EXPONENTISH !With exponent parts.
CHARACTER*17 NUMBERISH !The complete mix.
CHARACTER*16 HEXLETTERS !Extended for base sixteen.
CHARACTER*62 DIGILETTERS !File nameish but no .
CHARACTER*26 LITTLELETTERS,BIGLETTERS !These are well-known.
CHARACTER*52 LETTERS !The union thereof.
CHARACTER*66 NAMEISH !Allowing digits and . and _ as well.
CHARACTER*3 ODDITIES !And allow these in names also.
CHARACTER*1 CHARACTER(72) !Prepare a work area.
EQUIVALENCE !Whose components can be fingered.
1 (CHARACTER( 1),EXPONENTISH,NUMBERISH), !Start with numberish symbols that are not nameish.
2 (CHARACTER( 5),SDDIGITS), !Since the sign symbols are not nameish.
3 (CHARACTER( 7),DDIGITS,NAMEISH), !Computerish names might incorporate digits and a .
4 (CHARACTER( 8),DIGITS,HEXLETTERS,DIGILETTERS), !A proper name doesn't start with a digit.
5 (CHARACTER(18),BIGLETTERS,LETTERS), !Just with a letter.
6 (CHARACTER(44),LITTLELETTERS), !The second set.
7 (CHARACTER(70),ODDITIES) !Tack this on the end.
DATA EXPONENTISH /"eEdD"/ !These on the front.
DATA SDDIGITS /"+-.0123456789"/ !Any of these can appear in a floating point number.
DATA BIGLETTERS /"ABCDEFGHIJKLMNOPQRSTUVWXYZ"/ !Simple.
DATA LITTLELETTERS /"abcdefghijklmnopqrstuvwxyz"/ !Subtly different.
DATA ODDITIES /"_:#"/ !Allow these in names also. This strains := usage!
 
CHARACTER*62 GOODEXT !These are all the characters allowed
EQUIVALENCE (CHARACTER(8),GOODEXT)
c PARAMETER (GOODEXT = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" !for an approved
c 1 //"abcdefghijklmnopqrstuvwxyz" !file "extension" part
c 2 //"0123456789") !Of a file name.
INTEGER MEXT !A fixed bound.
PARAMETER (MEXT = 28) !This should do.
CONTAINS
CHARACTER*(MEXT) FUNCTION FEXT(FNAME) !Return the file extension part.
CHARACTER*(*) FNAME !May start with the file's path name blather.
INTEGER L1,L2 !Fingers to the text.
L2 = LEN(FNAME) !The last character of the file name.
L1 = L2 !Starting at the end...
10 IF (L1.GT.0) THEN !Damnit, can't rely on DO WHILE(safe & test)
IF (INDEX(GOODEXT,FNAME(L1:L1)).GT.0) THEN !So do the two parts explicitly.
L1 = L1 - 1 !Well, that was a valid character for an extension.
GO TO 10 !So, move back one and try again.
END IF !Until the end of valid stuff.
IF (FNAME(L1:L1).EQ.".") THEN !Stopped here. A proper introduction?
L1 = L1 - 1 !Yes. Include the period.
GO TO 20 !And escape.
END IF !Otherwise, not valid stuff.
END IF !Keep on moving back.
L1 = L2 !If we're here, no period was found.
20 FEXT = FNAME(L1 + 1:L2) !The text of the extension.
END FUNCTION FEXT !Possibly, blank.
END MODULE TEXTGNASH !Enough for this.
 
PROGRAM POKE
USE TEXTGNASH
 
WRITE (6,*) FEXT("Picture.jpg")
WRITE (6,*) FEXT("http://mywebsite.com/picture/image.png")
WRITE (6,*) FEXT("myuniquefile.longextension")
WRITE (6,*) FEXT("IAmAFileWithoutExtension")
WRITE (6,*) FEXT("/path/to.my/file")
WRITE (6,*) FEXT("file.odd_one")
WRITE (6,*)
WRITE (6,*) "Now for the new test collection..."
WRITE (6,*) FEXT("http://example.com/download.tar.gz")
WRITE (6,*) FEXT("CharacterModel.3DS")
WRITE (6,*) FEXT(".desktop")
WRITE (6,*) FEXT("document")
WRITE (6,*) FEXT("document.txt_backup")
WRITE (6,*) FEXT("/etc/pam.d/login")
WRITE (6,*) "Approved characters: ",GOODEXT
END

The output cheats a little, in that trailing spaces appear just as blankly as no spaces. The result of FEXT could be presented to TRIM (if that function is available), or the last non-blank could be found. With F2003, a scheme to enable character variables to be redefined to take on a current length is available, and so trailing spaces could no longer appear. This facility would also solve the endlessly annoying question of "how long is long enough", manifested in parameter MEXT being what might be a perfect solution. Once, three was the maximum extension length (not counting the period), then perhaps six, but now, what?

 .jpg
 .png
 .longextension




 Now for the new test collection...
 .gz
 .3DS
 .desktop



 Approved characters:
 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz

Note that if FEXT were presented with a file name containing trailing spaces, it would declare no extension to be present.

FreeBASIC[edit]

' FB 1.05.0 Win64
 
Function isAlphaNum(s As String) As Boolean
Return ("a" <= s AndAlso s <= "z") OrElse ("A" <= s AndAlso s <= "Z") OrElse("0" <= s AndAlso s <= "9")
End Function
 
Function extractFileExt(filePath As String) As String
If filePath = "" Then Return ""
Dim index As Integer = InstrRev(filePath, ".")
If index = 0 Then Return ""
Dim ext As String = Mid(filePath, index + 1)
If ext = "" Then Return ""
For i As Integer = 1 To Len(ext)
If Not isAlphaNum(Mid(ext, i, 1)) Then Return ""
Next
Return ext
End Function
 
Dim filePaths(1 To 6) As String = _
{ _
"http://example.com/download.tar.gz", _
"CharacterModel.3DS", _
".desktop", _
"document", _
"document.txt_backup", _
"/etc/pam.d/login" _
}
 
Print "File path"; Tab(40); "Extension"
Print "========="; Tab(40); "========="
Print
For i As Integer = 1 To 6
Print filePaths(i); Tab(40);
Dim ext As String = extractFileExt(filePaths(i))
If ext = "" Then
Print "(empty string)"
Else
Print ext
End If
Next
Print
Print "Press any key to quit"
Sleep
Output:
File path                              Extension
=========                              =========

http://example.com/download.tar.gz     gz
CharacterModel.3DS                     3DS
.desktop                               desktop
document                               (empty string)
document.txt_backup                    (empty string)
/etc/pam.d/login                       (empty string)

Go[edit]

package main
 
import "fmt"
 
func Ext(path string) string {
for i := len(path) - 1; i >= 0; i-- {
c := path[i]
switch {
case c == '.':
return path[i:]
case '0' <= c && c <= '9':
case 'A' <= c && c <= 'Z':
case 'a' <= c && c <= 'z':
default:
return ""
}
}
return ""
}
 
func main() {
type testcase struct {
input string
output string
}
 
tests := []testcase{
{"http://example.com/download.tar.gz", ".gz"},
{"CharacterModel.3DS", ".3DS"},
{".desktop", ".desktop"},
{"document", ""},
{"document.txt_backup", ""},
{"/etc/pam.d/login", ""},
}
 
for _, testcase := range tests {
ext := Ext(testcase.input)
if ext != testcase.output {
panic(fmt.Sprintf("expected %q for %q, got %q",
testcase.output, testcase.input, ext))
}
}
}

Haskell[edit]

module FileExtension
where
 
myextension :: String -> String
myextension s
|not $ elem '.' s = ""
|elem '/' extension || elem '_' extension = ""
|otherwise = '.' : extension
where
extension = reverse ( takeWhile ( /= '.' ) $ reverse s )
 
Output:
map myextension ["http://example.com/download.tar.gz", "CharacterModel.3DS", ".desktop", "document", "document.txt_backup", "/etc/pam.d/login"]
[".gz",".3DS",".desktop","","",""]

J[edit]

This example needs updating due to a modification in the task. Please examine and fix the code if needed, then remove this message.
Details: The format of a suffix has been clarified, and the test-cases have been replaced with new ones.

Implementation:

require'regex'
ext=: '[.][a-zA-Z0-9]+$'&rxmatch ;@rxfrom ]

Obviously most of the work here is done by the regex implementation (pcre, if that matters - and this particular kind of expression tends to be a bit more concise expressed in perl than in J...).

Perhaps of interest is that this is an example of a J fork - here we have three verbs separated by spaces. Unlike a unix system fork (which spins up child process which is an almost exact clone of the currently running process), a J fork is three independently defined verbs. The two verbs on the edge get the fork's argument and the verb in the middle combines those two results.

The left verb uses rxmatch to find the beginning position of the match and its length. The right verb is the identity function. The middle verb extracts the desired characters from the original argument. (For a non-match, the length of the "match" is zero so the empty string is extracted.)


Alternative non-regex Implementation

ext=: #~ [: +./\ e.&'.' *. [: -. [: +./\. -.@e.&('.',AlphaNum_j_)

Task examples:

   ext 'picture.jpg'
.jpg
ext 'http://mywebsite.com/picture/image.png'
.png
Examples=: 'picture.jpg';'http://mywebsite.com/picture/image.png';'myuniquefile.longextension';'IAmAFileWithoutExtension';'/path/to.my/file';'file.odd_one'
ext each Examples
┌────┬────┬──────────────┬┬┬┐
│.jpg│.png│.longextension││││
└────┴────┴──────────────┴┴┴┘

Java[edit]

public class Test {
 
public static void main(String[] args) {
String[] filenames = { "http://example.com/download.tar.gz",
"CharacterModel.3DS",
".desktop",
"document",
"document.txt_backup",
"/etc/pam.d/login"
};
 
for (String filename : filenames) {
String ext = "null";
int idx = filename.lastIndexOf('.');
if (idx != -1) {
String tmp = filename.substring(idx);
if (tmp.matches("\\.[a-zA-Z0-9]+")) {
ext = tmp;
}
}
System.out.println(filename + " -> " + ext);
}
}
}
Output:
http://example.com/download.tar.gz -> .gz
CharacterModel.3DS -> .3DS
.desktop -> .desktop
document -> null
document.txt_backup -> null
/etc/pam.d/login -> null

jq[edit]

This example needs updating due to a modification in the task. Please examine and fix the code if needed, then remove this message.
Details: The format of a suffix has been clarified, and the test-cases have been replaced with new ones.

Pending resolution of the inconsistency in the task description as of this writing, the following definitions exclude the delimiting period.

In the first section, a version intended for jq version 1.4 is presented. A simpler definition using "match", a regex feature of subsequent versions of jq, is then given.

Works with: jq version 1.4
def file_extension:
def alphanumeric: explode | unique
| reduce .[] as $i
(true;
if . then $i | (97 <= . and . <= 122) or (65 <= . and . <= 90) or (48 <= . and . <= 57)
else false
end );
rindex(".") as $ix
| if $ix then .[1+$ix:] as $ext
| if $ext|alphanumeric then $ext # or ".\($ext)" if the period is wanted
else ""
end
else ""
end;
Works with: jq version 1.5
def file_extension:
match( "\\.([a-zA-Z0-9]*$)" ) // false
| if . then .captures[0].string else "" end ;

Examples:

Using either version above gives the same results.

"picture.jpg",
"myuniquefile.longextension",
"http://mywebsite.com/picture/image.png",
"myuniquefile.longextension",
"IAmAFileWithoutExtension",
"/path/to.my/file",
"file.odd_one"
| "\(.) has extension: \"\(file_extension)\""
Output:
$ jq -r -n -f Extract_file_extension.jq
picture.jpg has extension: "jpg"
myuniquefile.longextension has extension: "longextension"
http://mywebsite.com/picture/image.png has extension: "png"
myuniquefile.longextension has extension: "longextension"
IAmAFileWithoutExtension has extension: ""
/path/to.my/file has extension: ""
file.odd_one has extension: ""

Kotlin[edit]

// version 1.0.6
 
val r = Regex("[^a-zA-Z0-9]") // matches any non-alphanumeric character
 
fun extractFileExtension(path: String): String {
if (path.isEmpty()) return ""
var fileName = path.substringAfterLast('/')
if (path == fileName) fileName = path.substringAfterLast('\\')
val splits = fileName.split('.')
if (splits.size == 1) return ""
val ext = splits.last()
return if (r.containsMatchIn(ext)) "" else "." + ext
}
 
fun main(args: Array<String>) {
val paths = arrayOf(
"http://example.com/download.tar.gz",
"CharacterModel.3DS",
".desktop",
"document",
"document.txt_backup",
"/etc/pam.d/login",
"c:\\programs\\myprogs\\myprog.exe", // using back-slash as delimiter
"c:\\programs\\myprogs\\myprog.exe_backup" // ditto
)
for (path in paths) {
val ext = extractFileExtension(path)
println("${path.padEnd(37)} -> ${if (ext.isEmpty()) "(empty string)" else ext}")
}
}
Output:
http://example.com/download.tar.gz    -> .gz
CharacterModel.3DS                    -> .3DS
.desktop                              -> .desktop
document                              -> (empty string)
document.txt_backup                   -> (empty string)
/etc/pam.d/login                      -> (empty string)
c:\programs\myprogs\myprog.exe        -> .exe
c:\programs\myprogs\myprog.exe_backup -> (empty string)

Lua[edit]

-- Lua pattern docs at http://www.lua.org/manual/5.1/manual.html#5.4.1
function fileExt (filename) return filename:match("(%.%w+)$") or "" end
 
local testCases = {
"http://example.com/download.tar.gz",
"CharacterModel.3DS",
".desktop",
"document",
"document.txt_backup",
"/etc/pam.d/login"
}
for _, example in pairs(testCases) do
print(example .. ' -> "' .. fileExt(example) .. '"')
end
Output:
http://example.com/download.tar.gz -> ".gz"
CharacterModel.3DS -> ".3DS"
.desktop -> ".desktop"
document -> ""
document.txt_backup -> ""
/etc/pam.d/login -> ""

Oforth[edit]

If extension is not valid, returns null, not "". Easy to change if "" is required.

: fileExt( s -- t ) 
| i |
s lastIndexOf('.') dup ->i ifNull: [ null return ]
s extract(i 1+, s size) conform(#isAlpha) ifFalse: [ null return ]
s extract(i, s size)
;
Output:
>"http://example.com/download.tar.gz" fileExt .
.gz ok
>
ok
>"CharacterModel.3DS" fileExt .
.3DS ok
>
ok
>".desktop" fileExt .
.desktop ok
>"document" fileExt .
null ok
>"document.txt_backup" fileExt .
null ok
>"/etc/pam.d/login" fileExt .
null ok
>

Perl[edit]

Translation of: Perl 6
sub extension {
my $path = shift;
$path =~ / \. [a-z0-9]+ $ /xi;
$& // '';
}

Testing:

printf "%-35s %-11s\n", $_, "'".extension($_)."'"
for qw[
http://example.com/download.tar.gz
CharacterModel.3DS
.desktop
document
document.txt_backup
/etc/pam.d/login
];
Output:
http://example.com/download.tar.gz  '.gz'      
CharacterModel.3DS                  '.3DS'     
.desktop                            '.desktop' 
document                            ''         
document.txt_backup                 ''         
/etc/pam.d/login                    ''

Perl 6[edit]

The built-in IO::Path class has an .extension method:

say $path.IO.extension;

Contrary to this task's specification, it

  • doesn't include the dot in the output
  • doesn't restrict the extension to letters and numbers.


Here's a custom implementation which does satisfy the task requirements:

sub extension (Str $path --> Str) {
$path.match(/:i ['.' <[a..z0..9]>+]? $ /).Str
}

Testing:

printf "%-35s %-11s %-12s\n", $_, extension($_).perl, $_.IO.extension.perl
for <
http://example.com/download.tar.gz
CharacterModel.3DS
.desktop
document
document.txt_backup
/etc/pam.d/login
>;
Output:
http://example.com/download.tar.gz  ".gz"       "gz"        
CharacterModel.3DS                  ".3DS"      "3DS"       
.desktop                            ".desktop"  "desktop"   
document                            ""          ""          
document.txt_backup                 ""          "txt_backup"
/etc/pam.d/login                    ""          ""

Phix[edit]

function getExtension(string filename)
for i=length(filename) to 1 by -1 do
integer ch = filename[i]
if ch='.' then return filename[i..$] end if
if find(ch,"\\/_") then exit end if
end for
return ""
end function
 
constant tests = {"mywebsite.com/picture/image.png",
"http://mywebsite.com/picture/image.png",
"myuniquefile.longextension",
"IAmAFileWithoutExtension",
"/path/to.my/file",
"file.odd_one",
"http://example.com/download.tar.gz",
"CharacterModel.3DS",
".desktop",
"document",
"document.txt_backup",
"/etc/pam.d/login"}
for i=1 to length(tests) do
printf(1,"%s ==> %s\n",{tests[i],getExtension(tests[i])})
end for
Output:
mywebsite.com/picture/image.png ==> .png
http://mywebsite.com/picture/image.png ==> .png
myuniquefile.longextension ==> .longextension
IAmAFileWithoutExtension ==>
/path/to.my/file ==>
file.odd_one ==>
http://example.com/download.tar.gz ==> .gz
CharacterModel.3DS ==> .3DS
.desktop ==> .desktop
document ==>
document.txt_backup ==>
/etc/pam.d/login ==>

The builtin get_file_extension() could also be used, however that routine differs from the task description in that "libglfw.so.3.1" => "so", and all results are lowercase even if the input is not.

PowerShell[edit]

function extension($file){
$ext = [System.IO.Path]::GetExtension($file)
if (-not [String]::IsNullOrEmpty($ext)) {
if($ext.IndexOf("_") -ne -1) {$ext = ""}
}
$ext
}
extension "http://example.com/download.tar.gz"
extension "CharacterModel.3DS"
extension ".desktop"
extension "document"
extension "document.txt_backup"
extension "/etc/pam.d/login"

Output:

.gz
.3DS
.desktop


Python[edit]

Uses re.search.

import re
def extractExt(url):
m = re.search(r'\.[A-Za-z0-9]+$', url)
return m.group(0) if m else ""
 

Racket[edit]

 
#lang racket
 
;; Note that for a real implementation, Racket has a
;; `filename-extension` in its standard library, but don't use it here
;; since it requires a proper name (fails on ""), returns a byte-string,
;; and handles path values so might run into problems with unicode
;; string inputs.
 
(define (string-extension x)
(cadr (regexp-match #px"(\\.[[:alnum:]]+|)$" x)))
 
(define examples '("http://example.com/download.tar.gz"
"CharacterModel.3DS"
".desktop"
"document"
"document.txt_backup"
"/etc/pam.d/login"))
 
(for ([x (in-list examples)])
(printf "~a | ~a\n" (~a x #:width 34) (string-extension x)))
 
Output:
http://example.com/download.tar.gz | .gz
CharacterModel.3DS                 | .3DS
.desktop                           | .desktop
document                           | 
document.txt_backup                | 
/etc/pam.d/login                   | 

REXX[edit]

Using this paraphrased Rosetta Code task's definition that:

a legal file extension   only   consists of mixed-case Latin letters and/or decimal digits.

/*REXX pgm extracts the file extension (defined above from the RC task) from a file name*/
@.= /*define default value for the @ array.*/
parse arg fID /*obtain any optional arguments from CL*/
if fID\=='' then @.1 = fID /*use the filename from the C.L. */
else do /*No filename given? Then use defaults.*/
@.1 = 'http://example.com/download.tar.gz'
@.2 = 'CharacterModel.3DS'
@.3 = '.desktop'
@.4 = 'document'
@.5 = 'document.txt_backup'
@.6 = '/etc/pam.d/login'
end
 
do j=1 while @.j\==''; x= /*process (all of) the file name(s). */
p=lastpos(., @.j) /*find the last position of a period. */
if p\==0 then x=substr(@.j, p+1) /*Found a dot? Then get stuff after it*/
if \datatype(x, 'A') then x= /*Not upper/lowercase letters | digits?*/
if x=='' then x= " [null]" /*use a better name for a "null" ext.*/
else x= . || x /*prefix the extension with a period. */
say 'file extension=' left(x, 20) "for file name=" @.j
end /*j*/ /*stick a fork in it, we're all done. */

output   when using the default (internal) inputs:

file extension= .gz                  for file name= http://example.com/download.tar.gz
file extension= .3DS                 for file name= CharacterModel.3DS
file extension= .desktop             for file name= .desktop
file extension=  [null]              for file name= document
file extension=  [null]              for file name= document.txt_backup
file extension=  [null]              for file name= /etc/pam.d/login

Ruby[edit]

names = 
%w(http://example.com/download.tar.gz
CharacterModel.3DS
.desktop
document
/etc/pam.d/login)
names.each{|name| p File.extname(name)}
 

output

".gz"
".3DS"
""
""
""

Apparently, the built-in method does not consider ".desktop" to be a file extension (on Linux).

Scala[edit]

package rosetta
 
object FileExt {
 
private val ext = """\.[A-Za-z0-9]+$""".r
 
def isExt(fileName: String, extensions: List[String]) =
extensions.map { _.toLowerCase }.exists { fileName.toLowerCase endsWith "." + _ }
 
def extractExt(url: String) = ext findFirstIn url getOrElse("")
 
}
 
object FileExtTest extends App {
val testExtensions: List[String] = List("zip", "rar", "7z", "gz", "archive", "A##", "tar.bz2")
 
val isExtTestFiles: Map[String, Boolean] = Map(
"MyData.a##" -> true,
"MyData.tar.Gz" -> true,
"MyData.gzip" -> false,
"MyData.7z.backup" -> false,
"MyData..." -> false,
"MyData" -> false,
"MyData_v1.0.tar.bz2" -> true,
"MyData_v1.0.bz2" -> false
)
 
val extractExtTestFiles: Map[String, String] = Map(
"http://example.com/download.tar.gz" -> ".gz",
"CharacterModel.3DS" -> ".3DS",
".desktop" -> ".desktop",
"document" -> "",
"document.txt_backup" -> "",
"/etc/pam.d/login" -> "",
"/etc/pam.d/login.a" -> ".a",
"/etc/pam.d/login." -> "",
"picture.jpg" -> ".jpg",
"http://mywebsite.com/picture/image.png"-> ".png",
"myuniquefile.longextension" -> ".longextension",
"IAmAFileWithoutExtension" -> "",
"/path/to.my/file" -> "",
"file.odd_one" -> "",
// Extra, with unicode
"café.png" -> ".png",
"file.resumé" -> "",
// with unicode combining characters
"cafe\u0301.png" -> ".png",
"file.resume\u0301" -> ""
)
println("isExt() tests:")
 
for ((file, isext) <- isExtTestFiles) {
assert(FileExt.isExt(file, testExtensions) == isext, "Assertion failed for: " + file)
println("File: " + file + " -> Extension: " + FileExt.extractExt(file))
}
println("\nextractExt() tests:")
for ((url, ext) <- extractExtTestFiles) {
assert(FileExt.extractExt(url) == ext, "Assertion failed for: " + url)
println("Url: " + url + " -> Extension: " + FileExt.extractExt(url))
}
}

output

Url: picture.jpg -> Extension: .jpg
Url: document.txt_backup -> Extension: 
Url: .desktop -> Extension: .desktop
Url: CharacterModel.3DS -> Extension: .3DS
Url: file.resumé -> Extension: 
Url: document -> Extension: 
Url: café.png -> Extension: .png
Url: /etc/pam.d/login. -> Extension: 
Url: http://mywebsite.com/picture/image.png -> Extension: .png
Url: IAmAFileWithoutExtension -> Extension: 
Url: /etc/pam.d/login -> Extension: 
Url: /etc/pam.d/login.a -> Extension: .a
Url: file.odd_one -> Extension: 
Url: /path/to.my/file -> Extension: 
Url: myuniquefile.longextension -> Extension: .longextension
Url: café.png -> Extension: .png
Url: file.resumé -> Extension: 
Url: http://example.com/download.tar.gz -> Extension: .gz

sed[edit]

-n -re 's:.*(\.[A-Za-z0-9]+)$:\1:p'

Example of use:

for F in "http://example.com/download.tar.gz" "CharacterModel.3DS" ".desktop" "document" "document.txt_backup" "/etc/pam.d/login"
do
EXT=`echo $F | sed -n -re 's:.*(\.[A-Za-z0-9]+)$:\1:p'`
echo "$F: $EXT"
done
 
Output:
http://example.com/download.tar.gz: .gz
CharacterModel.3DS: .3DS
.desktop: .desktop
document: 
document.txt_backup: 
/etc/pam.d/login: 

Sidef[edit]

func extension(filename) {
filename.match(/(\.[a-z0-9]+)\z/i).to_s
}
 
var files = [
'http://example.com/download.tar.gz',
'CharacterModel.3DS',
'.desktop',
'document',
'document.txt_backup',
'/etc/pam.d/login',
]
 
files.each {|f|
printf("%-36s -> %-11s\n", f.dump, extension(f).dump)
}
Output:
"http://example.com/download.tar.gz" -> ".gz"      
"CharacterModel.3DS"                 -> ".3DS"     
".desktop"                           -> ".desktop" 
"document"                           -> ""         
"document.txt_backup"                -> ""         
"/etc/pam.d/login"                   -> ""         

Tcl[edit]

Tcl's built in file extension command already almost knows how to do this, except it accepts any character after the dot. Just for fun, we'll enhance the builtin with a new subcommand with the limitation specified for this problem.

proc assert {expr} {    ;# for "static" assertions that throw nice errors
if {![uplevel 1 [list expr $expr]]} {
set msg "{$expr}"
catch {append msg " {[uplevel 1 [list subst -noc $expr]]}"}
tailcall throw {ASSERT ERROR} $msg
}
}
 
proc file_ext {file} {
set res ""
regexp -nocase {\.[a-z0-9]+$} $file res
return $res
}
 
set map [namespace ensemble configure file -map]
dict set map ext ::file_ext
namespace ensemble configure file -map $map
 
# and a test:
foreach {file ext} {
http://example.com/download.tar.gz .gz
CharacterModel.3DS .3DS
.desktop .desktop
document ""
document.txt_backup ""
/etc/pam.d/login ""
} {
set res ""
assert {[file ext $file] eq $ext}
}

TUSCRIPT[edit]

 
$$ MODE DATA
$$ testcases=*
http://example.com/download.tar.gz
CharacterModel.3DS
.desktop
document
document.txt_backup
/etc/pam.d/login
picture.jpg
http://mywebsite.com/picture/image.png
myuniquefile.longextension
IamAFileWithoutExtension
path/to.my/file
file.odd_one
thisismine
 
$$ MODE TUSCRIPT,{}
 
BUILD C_GROUP A0 = *
DATA {&a}
DATA {\0}
 
BUILD S_TABLE legaltokens=*
DATA :.{1-00}{C:A0}{]}:
 
LOOP testcase=testcases
extension=STRINGS (testcase,legaltokens,0,0)
IF (extension=="") CYCLE
PRINT testcase, " has extension ", extension
ENDLOOP
 

Output:

http://example.com/download.tar.gz has extension .gz
CharacterModel.3DS has extension .3DS
.desktop has extension .desktop
picture.jpg has extension .jpg
http://mywebsite.com/picture/image.png has extension .png
myuniquefile.longextension has extension .longextension

VBScript[edit]

Function fileExt(fname)
Set fso = CreateObject("Scripting.FileSystemObject")
Set regex = new regExp
Dim ret
 
regex.pattern = "^[A-Za-z0-9]+$" 'Only alphanumeric characters are allowed
If regex.test(fso.GetExtensionName(fname)) = False Then
ret = ""
Else
ret = "." & fso.GetExtensionName(fname)
End If
fileExt = ret
End Function
 
'Real Start of Program
arr_t = Array("http://example.com/download.tar.gz", _
"CharacterModel.3DS", _
".desktop", _
"document", _
"document.txt_backup", _
"/etc/pam.d/login")
 
For Each name In arr_t
Wscript.Echo "NAME:",name
Wscript.Echo " EXT:","<" & fileExt(name) & ">"
Next
Output:
NAME: http://example.com/download.tar.gz
 EXT: <.gz>
NAME: CharacterModel.3DS
 EXT: <.3DS>
NAME: .desktop
 EXT: <.desktop>
NAME: document
 EXT: <>
NAME: document.txt_backup
 EXT: <>
NAME: /etc/pam.d/login
 EXT: <>

zkl[edit]

The File object has a method splitFileName that does just that, returning a list of the parts. The method knows about the OS it was compiled on (Unix, Windows).

fcn extractFileExtension(name){
var [const] valid=Walker.chain(".",["a".."z"],["A".."Z"],["0".."9")).pump(String);
ext:=File.splitFileName(name)[-1];
if(ext - valid) ext="";
ext
}
foreach nm in (T("http://example.com/download.tar.gz","CharacterModel.3DS",
".desktop","document",
"document.txt_backup","/etc/pam.d/login")){
println("%35s : %s".fmt(nm,extractFileExtension(nm)));
}
Output:

Note: on Unix, .desktop is a hidden file, not an extension.

 http://example.com/download.tar.gz : .gz
                 CharacterModel.3DS : .3DS
                           .desktop : 
                           document : 
                document.txt_backup : 
                   /etc/pam.d/login :