Search a list: Difference between revisions

From Rosetta Code
Content added Content deleted
(C++)
Line 114: Line 114:
Washington is not in haystack
Washington is not in haystack
5 Bush
5 Bush
</pre>

=={{header|C++}}==
{{works with|g++|4.1.2 20061115 (prerelease) (Debian 4.1.1-21)}}

The following code shows three different ways to solve the task.

<cpp>
#include <string>
#include <algorithm>
#include <iterator>
#include <cstddef>
#include <exception>
#include <iostream>

// an exception to throw (actually, throwing an exception in this case is generally considered bad style, but it's part of the task)
class not_found: public std::exception
{
public:
not_found(std::string const& s): text(s + " not found") {}
char const* what() const throw() { return text.c_str(); }
~not_found() throw() {}
private:
std::string text;
};

// needle search function, C-style interface version using standard library
std::size_t get_index(std::string* haystack, int haystack_size, std::string needle)
{
std::size_t index = std::find(haystack, haystack+haystack_size, needle) - haystack;
if (index == haystack_size)
throw not_found(needle);
else
return index;
}

// needle search function, completely generic style, needs forward iterators
// (works with any container, but inefficient if not random-access-iterator)
template<typename FwdIter>
typename std::iterator_traits<FwdIter>::difference_type fwd_get_index(FwdIter first, FwdIter last, std::string needle)
{
FwdIter elem = std::find(first, last, needle);
if (elem == last)
throw not_found(needle);
else
return std::distance(first, elem);
}

// needle search function, implemented directly, needs only input iterator, works efficiently with all sequences
template<typename InIter>
typename std::iterator_traits<InIter>::difference_type generic_get_index(InIter first, InIter last, std::string needle)
{
typename std::iterator_traits<InIter>::difference_type index = 0;
while (first != last && *first != needle)
{
++index;
++first;
}
if (first == last)
throw not_found(needle);
else
return index;
}

// ----------------------------------------------------------------------------------------------------------------------------------

// a sample haystack (content copied from Haskell example)
std::string haystack[] = { "Zig", "Zag", "Wally", "Ronald", "Bush", "Krusty", "Charlie", "Bush", "Bozo" };

// some useful helper functions
template<typename T, std::size_t sz> T* begin(T (&array)[sz]) { return array; }
template<typename T, std::size_t sz> T* end(T (&array)[sz]) { return array + sz; }
template<typename T, std::size_t sz> std::size_t size(T (&array)[sz]) { return sz; }

// test function searching a given needle with each of the methods
void test(std::string const& needle)
{
std::cout << "-- C style interface --\n";
try
{
std::size_t index = get_index(haystack, size(haystack), needle);
std::cout << needle << " found at index " << index << "\n";
}
catch(std::exception& exc) // better catch standard exceptions as well; me might e.g. run out of memory
{
std::cout << exc.what() << "\n";
}

std::cout << "-- generic interface, first version --\n";
try
{
std::size_t index = fwd_get_index(begin(haystack), end(haystack), needle);
std::cout << needle << " found at index " << index << "\n";
}
catch(std::exception& exc) // better catch standard exceptions as well; me might e.g. run out of memory
{
std::cout << exc.what() << "\n";
}

std::cout << "-- generic interface, second version --\n";
try
{
std::size_t index = generic_get_index(begin(haystack), end(haystack), needle);
std::cout << needle << " found at index " << index << "\n";
}
catch(std::exception& exc) // better catch standard exceptions as well; me might e.g. run out of memory
{
std::cout << exc.what() << "\n";
}
}

int main()
{
std::cout << "\n=== Word which only occurs once ===\n";
test("Wally");
std::cout << "\n=== Word occuring multiple times ===\n";
test("Bush");
std::cout << "\n=== Word not occuring at all ===\n";
test("Goofy");
}
</cpp>

Output (note that in C++, indices start at 0):
<pre>

=== Word which only occurs once ===
-- C style interface --
Wally found at index 2
-- generic interface, first version --
Wally found at index 2
-- generic interface, second version --
Wally found at index 2

=== Word occuring multiple times ===
-- C style interface --
Bush found at index 4
-- generic interface, first version --
Bush found at index 4
-- generic interface, second version --
Bush found at index 4

=== Word not occuring at all ===
-- C style interface --
Goofy not found
-- generic interface, first version --
Goofy not found
-- generic interface, second version --
Goofy not found
</pre>
</pre>


Line 134: Line 282:


*Main> map (ap (,) (flip findIndex haystack . (==))) needles
*Main> map (ap (,) (flip findIndex haystack . (==))) needles
[("Washington",Nothing),("Bush",Just 4)]
[("Washington",Nothing),("Bush",Just 4)]


=={{header|J}}==
=={{header|J}}==

Revision as of 16:20, 2 October 2008



Task
Search a list
You are encouraged to solve this task according to the task description, using any language you may know.

Find the index of a string (needle) in an array of strings (haystack), or else raise an exception if the needle is missing. If there is more then one occurrence then return smallest i such that haystack[i] = needle.

Ada

<ada> with Ada.Strings.Unbounded; use Ada.Strings.Unbounded; with Ada.Text_IO; use Ada.Text_IO;

procedure Test_List_Index is

  Not_In : exception;
  
  type List is array (Positive range <>) of Unbounded_String;
  
  function Index (Haystack : List; Needle : String) return Positive is
  begin
     for Index in Haystack'Range loop
        if Haystack (Index) = Needle then
           return Index;
        end if;
     end loop;
     raise Not_In;
  end Index;
     -- Functions to create lists
  function "+" (X, Y : String) return List is
  begin
     return (1 => To_Unbounded_String (X), 2 => To_Unbounded_String (Y));
  end "+";
  
  function "+" (X : List; Y : String) return List is
  begin
     return X & (1 => To_Unbounded_String (Y));
  end "+";
  
  Haystack : List := "Zig"+"Zag"+"Wally"+"Ronald"+"Bush"+"Krusty"+"Charlie"+"Bush"+"Bozo";
  procedure Check (Needle : String) is
  begin
     Put (Needle);
     Put_Line ("at" & Positive'Image (Index (Haystack, Needle)));
  exception
     when Not_In => Put_Line (" is not in");
  end Check;

begin

  Check ("Washington");
  Check ("Bush");

end Test_List_Index; </ada> Sample output:

Washington is not in
Bushat 5

ALGOL 68

Using a FORMAT "value error" exception

FORMAT hay stack := $c("Zig","Zag","Wally","Ronald","Bush","Krusty","Charlie","Bush","Bozo")$;

FILE needle exception; STRING ref needle;
associate(needle exception, ref needle);

PROC index = (FORMAT haystack, REF STRING needle)INT:(
  INT out;
  ref needle := needle;
  getf(needle exception,(haystack, out));
  out
);

[]STRING needles = ("Washington","Bush");
FOR i TO UPB needles DO
  STRING needle := needles[i];
  on value error(needle exception, (REF FILE f)BOOL: value error);
    printf(($d" "gl$,index(hay stack, needle), needle));
    end on value error;
  value error:
    printf(($g" "gl$,needle, "is not in haystack"));
  end on value error: reset(needle exception)
OD

Output:

Washington is not in haystack
5 Bush

Using a manual FOR loop with no exception

[]STRING hay stack = ("Zig","Zag","Wally","Ronald","Bush","Krusty","Charlie","Bush","Bozo");

PROC index = ([]STRING hay stack, STRING needle)INT:(
  INT index;
  FOR i FROM LWB hay stack TO UPB hay stack DO
    index := i;
    IF hay stack[index] = needle THEN
      found
    FI
  OD;
  else:
    LWB hay stack - 1
  EXIT
  found:
    index
);

[]STRING needles = ("Washington","Bush");
FOR i TO UPB needles DO
  STRING needle := needles[i];
  INT result = index(hay stack, needle);
  IF result >= LWB hay stack THEN
    printf(($d" "gl$, result, needle))
  ELSE
    printf(($g" "gl$,needle, "is not in haystack"))
  FI
OD

Output:

Washington is not in haystack
5 Bush

C++

Works with: g++ version 4.1.2 20061115 (prerelease) (Debian 4.1.1-21)

The following code shows three different ways to solve the task.

<cpp>

  1. include <string>
  2. include <algorithm>
  3. include <iterator>
  4. include <cstddef>
  5. include <exception>
  6. include <iostream>

// an exception to throw (actually, throwing an exception in this case is generally considered bad style, but it's part of the task) class not_found: public std::exception { public:

 not_found(std::string const& s): text(s + " not found") {}
 char const* what() const throw() { return text.c_str(); }
 ~not_found() throw() {}

private:

 std::string text;

};

// needle search function, C-style interface version using standard library std::size_t get_index(std::string* haystack, int haystack_size, std::string needle) {

 std::size_t index = std::find(haystack, haystack+haystack_size, needle) - haystack;
 if (index == haystack_size)
   throw not_found(needle);
 else
   return index;

}

// needle search function, completely generic style, needs forward iterators // (works with any container, but inefficient if not random-access-iterator) template<typename FwdIter>

typename std::iterator_traits<FwdIter>::difference_type fwd_get_index(FwdIter first, FwdIter last, std::string needle)

{

 FwdIter elem = std::find(first, last, needle);
 if (elem == last)
   throw not_found(needle);
 else
   return std::distance(first, elem);

}

// needle search function, implemented directly, needs only input iterator, works efficiently with all sequences template<typename InIter>

typename std::iterator_traits<InIter>::difference_type generic_get_index(InIter first, InIter last, std::string needle)

{

 typename std::iterator_traits<InIter>::difference_type index = 0;
 while (first != last && *first != needle)
 {
   ++index;
   ++first;
 }
 if (first == last)
   throw not_found(needle);
 else
   return index;

}

// ----------------------------------------------------------------------------------------------------------------------------------

// a sample haystack (content copied from Haskell example) std::string haystack[] = { "Zig", "Zag", "Wally", "Ronald", "Bush", "Krusty", "Charlie", "Bush", "Bozo" };

// some useful helper functions template<typename T, std::size_t sz> T* begin(T (&array)[sz]) { return array; } template<typename T, std::size_t sz> T* end(T (&array)[sz]) { return array + sz; } template<typename T, std::size_t sz> std::size_t size(T (&array)[sz]) { return sz; }

// test function searching a given needle with each of the methods void test(std::string const& needle) {

 std::cout << "-- C style interface --\n";
 try
 {
   std::size_t index = get_index(haystack, size(haystack), needle);
   std::cout << needle << " found at index " << index << "\n";
 }
 catch(std::exception& exc) // better catch standard exceptions as well; me might e.g. run out of memory
 {
   std::cout << exc.what() << "\n";
 }
 std::cout << "-- generic interface, first version --\n";
 try
 {
   std::size_t index = fwd_get_index(begin(haystack), end(haystack), needle);
   std::cout << needle << " found at index " << index << "\n";
 }
 catch(std::exception& exc) // better catch standard exceptions as well; me might e.g. run out of memory
 {
   std::cout << exc.what() << "\n";
 }
 std::cout << "-- generic interface, second version --\n";
 try
 {
   std::size_t index = generic_get_index(begin(haystack), end(haystack), needle);
   std::cout << needle << " found at index " << index << "\n";
 }
 catch(std::exception& exc) // better catch standard exceptions as well; me might e.g. run out of memory
 {
   std::cout << exc.what() << "\n";
 }

}

int main() {

 std::cout << "\n=== Word which only occurs once ===\n";
 test("Wally");
 std::cout << "\n=== Word occuring multiple times ===\n";
 test("Bush");
 std::cout << "\n=== Word not occuring at all ===\n";
 test("Goofy");

} </cpp>

Output (note that in C++, indices start at 0):


=== Word which only occurs once ===
-- C style interface --
Wally found at index 2
-- generic interface, first version --
Wally found at index 2
-- generic interface, second version --
Wally found at index 2

=== Word occuring multiple times ===
-- C style interface --
Bush found at index 4
-- generic interface, first version --
Bush found at index 4
-- generic interface, second version --
Bush found at index 4

=== Word not occuring at all ===
-- C style interface --
Goofy not found
-- generic interface, first version --
Goofy not found
-- generic interface, second version --
Goofy not found

Haskell

Libraries and data:

import Data.List

haystack=["Zig","Zag","Wally","Ronald","Bush","Krusty","Charlie","Bush","Bozo"]
needles = ["Washington","Bush"]

I use 'lambda' notation for readability.

Find 'just' an index:
*Main> map (\x -> (x,findIndex (==x) haystack)) needles
[("Washington",Nothing),("Bush",Just 4)]

Want to know if there are there more Bushes hiding in the haystack?

*Main> map (\x -> (x,findIndices (==x) haystack)) needles
[("Washington",[]),("Bush",[4,7])]

To be complete. Here is the 'point free' version of the task:

import Control.Monad
import Control.Arrow
*Main> map (ap (,) (flip findIndex haystack . (==))) needles
[("Washington",Nothing),("Bush",Just 4)]

J

J has a general and optimized lookup function, i.. For example:

   H =: ;:'Zig Zag Wally Ronald Bush Krusty Charlie Bush Bozo'   NB.  Haystack
   N =: ;:'Washington Bush'                                      NB.  Needles
   
   H i. N 
9 3

Note that the arguments to i. can be anything (ie either or both may be scalars, lists, multidimensional arrays, etc).

To format output similar to the other examples, one might write:

   H ;:^:_1@(](>@{:@]|."_1(,.>@{.))i.({;(~:_1+#))1|.'is not in haystack';":&.>@i.@#@[) N
Washington is not in haystack
4 Bush                       

To elaborate a bit: Array-oriented languages (like J) consume the input and produce the output in toto.

That is, all the results are produced simultaneously; consequently, throwing an exception for any part of the input would prohibit producing any output at all.

And while it is both possible and simple to treat the input item by item, this is significantly slower and loses the great advantage of array processing.

Therefore these languages generally produce a special, but conforming, output for "bad" inputs (in this case, an index past the end of the list). Then the functions which consume these outputs may be left untouched (as the special outputs are already in their domain) or may be extended simply.

In this case, there is only one function which formats and prints the results, and its treatment of "good" and "bad" outputs is identical (it cannot distinguish the two). It is simply that the outputs of previous functions have been arranged such that the results are conformable.

Java

for Lists, they have an indexOf() method: <java>import java.util.List; import java.util.Arrays;

List<String> haystack = Arrays.asList("Zig","Zag","Wally","Ronald","Bush","Krusty","Charlie","Bush","Bozo");

for (String needle : new String[]{"Washington","Bush"}) {

   int index = haystack.indexOf(needle);
   if (index < 0)
       System.out.println(needle + " is not in haystack");
   else
       System.out.println(index + " " + needle);

}</java>

for arrays, you have to do it manually: <java>String[] haystack = {"Zig","Zag","Wally","Ronald","Bush","Krusty","Charlie","Bush","Bozo"};

OUTERLOOP: for (String needle : new String[]{"Washington","Bush"}) {

   for (int i = 0; i < haystack.length; i++)
       if (needle.equals(haystack[i])) {
           System.out.println(i + " " + needle);
           continue OUTERLOOP;
       }
   System.out.println(needle + " is not in haystack");

}</java>

Output:

Washington is not in haystack
4 Bush

MAXScript

haystack=#("Zig","Zag","Wally","Ronald","Bush","Krusty","Charlie","Bush","Bozo")

for needle in #("Washington","Bush") do
(
    index = findItem haystack needle
    
    if index == 0 then
    (
        format "% is not in haystack\n" needle
    )
    else
    (
        format "% %\n" index needle
    )
)

Output:

Washington is not in haystack
5 Bush

Python

<python> haystack=["Zig","Zag","Wally","Ronald","Bush","Krusty","Charlie","Bush","Bozo"]

for needle in ("Washington","Bush"):

 try:
   print haystack.index(needle), needle
 except ValueError, value_error:
   print needle,"is not in haystack"

</python> Output:

Washington is not in haystack
4 Bush