Rosetta Code/Find bare lang tags: Difference between revisions
Content added Content deleted
(Added Haskell solution) |
(→{{header|Haskell}}: Cleaned up code significantly and added Media Wiki bonus) |
||
Line 100: | Line 100: | ||
There are actually many different Regex packages available for Haskell. For this example, I chose TDFA, a very fast POSIX ERE engine. To change engines, simply change the import statement. If you use a Perl-style RE engine, you'll have to modify the expressions slightly. |
There are actually many different Regex packages available for Haskell. For this example, I chose TDFA, a very fast POSIX ERE engine. To change engines, simply change the import statement. If you use a Perl-style RE engine, you'll have to modify the expressions slightly. |
||
This solution can be compiled into a program that will either take space-delimited list of files as its argument, or take input from STDIN if no arguments are provided. |
This solution can be compiled into a program that will either take space-delimited list of files as its argument, or take input from STDIN if no arguments are provided. Additionally, if you specify the -w flag in the first argument, it will take a list of Rosetta Code wiki pages and search them. Note that the page names must be as they appear in your URL bar -- underscores in place of spaces. |
||
<lang Haskell>import System.Environment |
<lang Haskell>import System.Environment |
||
import Network.HTTP |
|||
import Text.Printf |
import Text.Printf |
||
import Text.Regex.TDFA |
import Text.Regex.TDFA |
||
Line 109: | Line 110: | ||
import qualified Data.Map as Map |
import qualified Data.Map as Map |
||
{-| Takes a string and cuts out the text matched in the MatchText array. -} |
|||
splitByMatches :: String -> [MatchText String] -> [String] |
splitByMatches :: String -> [MatchText String] -> [String] |
||
splitByMatches str matches = foldr |
splitByMatches str matches = foldr splitHead [str] matches |
||
⚫ | |||
let before = take (matchOffset).head $ acc |
|||
where before = take (matchOffset).head$ acc |
|||
after = drop (matchOffset + matchLen).head$ acc |
|||
matchOffset = fst.snd.(!0)$ match |
|||
matchLen = snd.snd.(!0)$ match |
|||
⚫ | |||
) [str] matches |
|||
{-| Takes a string and splits it into the different languages used. All text |
|||
⚫ | |||
⚫ | |||
splitByLanguage str = Map.fromList.zip langs $ splitByMatches str allMatches |
|||
⚫ | |||
⚫ | |||
⚫ | |||
{-| Takes a string and counts the number of time a valid, but bare, lang tag |
{-| Takes a string and counts the number of time a valid, but bare, lang tag |
||
appears. It does not attempt to ignore valid tags inside lang blocks. -} |
appears. It does not attempt to ignore valid tags inside lang blocks. -} |
||
countBareLangTags :: String -> Int |
countBareLangTags :: String -> Int |
||
countBareLangTags = matchCount (makeRegex "<lang[[:space:]]*>" :: Regex) |
countBareLangTags = matchCount (makeRegex "<lang[[:space:]]*>" :: Regex) |
||
{-| Takes a string and counts the number of bare lang tags per section of the |
|||
⚫ | |||
⚫ | |||
countByLanguage str = Map.fromList.filter ((>0).snd)$ zip langs counts |
|||
where counts = map countBareLangTags.splitByMatches str$ allMatches |
|||
⚫ | |||
⚫ | |||
⚫ | |||
main = do |
main = do |
||
Line 142: | Line 144: | ||
content <- readFile (head args) |
content <- readFile (head args) |
||
return ([content],[""]) |
return ([content],[""]) |
||
else if (args !! 0) == "-w" then do |
|||
-- If there's more than one argument and the first one is the -w option, |
|||
-- use the rest of the arguments as page titles and load them from the wiki. |
|||
contents <- mapM getPageContent.tail$ args |
|||
return (contents, if length args > 2 then tail args else [""]) |
|||
else do |
else do |
||
-- Otherwise, read all the files and display their file names. |
-- Otherwise, read all the files and display their file names. |
||
contents <- mapM readFile args |
contents <- mapM readFile args |
||
return (contents, args) |
return (contents, args) |
||
let |
let tagsPerLang = map countByLanguage contents |
||
let tagsWithFiles = zipWith |
let tagsWithFiles = zipWith addFileToTags files tagsPerLang |
||
let |
let combinedFiles = Map.unionsWith combine tagsWithFiles |
||
printBareTags |
printBareTags combinedFiles |
||
where |
where addFileToTags file = Map.map (flip (,) [file]) |
||
combine cur next = (fst cur + fst next, snd cur ++ snd next) |
|||
insertItem = Map.insertWith (\(newC,newF) (oldC,oldF) -> (oldC+newC,oldF++newF)) |
|||
printBareTags :: Map.Map String (Int,[String]) -> IO () |
printBareTags :: Map.Map String (Int,[String]) -> IO () |
||
Line 158: | Line 164: | ||
let numBare = Map.foldr ((+).fst) 0 tags |
let numBare = Map.foldr ((+).fst) 0 tags |
||
printf "%d bare language tags:\n\n" numBare |
printf "%d bare language tags:\n\n" numBare |
||
mapM_ (\(lang,(count,files)) -> |
|||
printf "%d in %s%s\n" count |
|||
if lang == "" then "no language" else lang |
(if lang == "" then "no language" else lang) |
||
(filesString files) |
|||
) (Map.toAscList tags) |
|||
filesString :: [String] -> String |
filesString :: [String] -> String |
||
filesString [] = "" |
filesString [] = "" |
||
filesString ("":rest) = filesString rest |
|||
filesString files = " ("++listString files++")" |
filesString files = " ("++listString files++")" |
||
where listString [file] = "[["++file++"]]" |
where listString [file] = "[["++file++"]]" |
||
listString (file:files) = "[["++file++"]], "++listString files |
listString (file:files) = "[["++file++"]], "++listString files |
||
getPageContent :: String -> IO String |
|||
getPageContent title = do |
|||
response <- simpleHTTP.getRequest$ url |
|||
getResponseBody response |
|||
where url = "http://rosettacode.org/mw/index.php?action=raw&title="++title</lang> |
|||
Here are the input files I used to test: |
Here are the input files I used to test: |
||
Line 211: | Line 226: | ||
2 in Perl ([[example1.wiki]], [[example2.wiki]]) |
2 in Perl ([[example1.wiki]], [[example2.wiki]]) |
||
</nowiki></pre> |
</nowiki></pre> |
||
Additionally, I tested with [[100_doors]] and [[Huffman_coding]]. The following resulted: |
|||
<pre> |
|||
5 bare language tags: |
|||
1 in no language ([[100_doors]]) |
|||
1 in C ([[Huffman_coding]]) |
|||
1 in CoffeeScript ([[Huffman_coding]]) |
|||
1 in Perl ([[Huffman_coding]]) |
|||
1 in PostScript ([[100_doors]]) |
|||
</pre> |
|||
=={{header|Perl}}== |
=={{header|Perl}}== |