Rosetta Code/Find bare lang tags: Difference between revisions

Content added Content deleted
(Added Haskell solution)
(→‎{{header|Haskell}}: Cleaned up code significantly and added Media Wiki bonus)
Line 100: Line 100:
There are actually many different Regex packages available for Haskell. For this example, I chose TDFA, a very fast POSIX ERE engine. To change engines, simply change the import statement. If you use a Perl-style RE engine, you'll have to modify the expressions slightly.
There are actually many different Regex packages available for Haskell. For this example, I chose TDFA, a very fast POSIX ERE engine. To change engines, simply change the import statement. If you use a Perl-style RE engine, you'll have to modify the expressions slightly.


This solution can be compiled into a program that will either take space-delimited list of files as its argument, or take input from STDIN if no arguments are provided. The Media Wiki API bonus is not attempted.
This solution can be compiled into a program that will either take space-delimited list of files as its argument, or take input from STDIN if no arguments are provided. Additionally, if you specify the -w flag in the first argument, it will take a list of Rosetta Code wiki pages and search them. Note that the page names must be as they appear in your URL bar -- underscores in place of spaces.


<lang Haskell>import System.Environment
<lang Haskell>import System.Environment
import Network.HTTP
import Text.Printf
import Text.Printf
import Text.Regex.TDFA
import Text.Regex.TDFA
Line 109: Line 110:
import qualified Data.Map as Map
import qualified Data.Map as Map


{-| Takes a string and cuts out the text matched in the MatchText array. -}
splitByMatches :: String -> [MatchText String] -> [String]
splitByMatches :: String -> [MatchText String] -> [String]
splitByMatches str matches = foldr (\match acc ->
splitByMatches str matches = foldr splitHead [str] matches
where splitHead match acc = before:after:(tail acc)
let before = take (matchOffset).head $ acc
after = drop (matchOffset + matchLen).head $ acc
where before = take (matchOffset).head$ acc
matchOffset = fst.snd.(!0) $ match
after = drop (matchOffset + matchLen).head$ acc
matchLen = snd.snd.(!0) $ match
matchOffset = fst.snd.(!0)$ match
matchLen = snd.snd.(!0)$ match
in before:after:(tail acc)
) [str] matches

{-| Takes a string and splits it into the different languages used. All text
before the language headers is put into the key "" -}
splitByLanguage :: String -> Map.Map String String
splitByLanguage str = Map.fromList.zip langs $ splitByMatches str allMatches
where langs = "":(map (fst.(!1)) allMatches)
allMatches = matchAllText (makeRegex headerRegex :: Regex) str
headerRegex = "==[[:space:]]*{{[[:space:]]*header[[:space:]]*\\|[[:space:]]*([^ }]*)[[:space:]]*}}[^=]*=="


{-| Takes a string and counts the number of time a valid, but bare, lang tag
{-| Takes a string and counts the number of time a valid, but bare, lang tag
appears. It does not attempt to ignore valid tags inside lang blocks. -}
appears. It does not attempt to ignore valid tags inside lang blocks. -}
countBareLangTags :: String -> Int
countBareLangTags :: String -> Int
countBareLangTags = matchCount (makeRegex "<lang[[:space:]]*>" :: Regex)
countBareLangTags = matchCount (makeRegex "<lang[[:space:]]*>" :: Regex)

{-| Takes a string and counts the number of bare lang tags per section of the
text. All tags before the first section are put into the key "". -}
countByLanguage :: String -> Map.Map String Int
countByLanguage str = Map.fromList.filter ((>0).snd)$ zip langs counts
where counts = map countBareLangTags.splitByMatches str$ allMatches
langs = "":(map (fst.(!1)) allMatches)
allMatches = matchAllText (makeRegex headerRegex :: Regex) str
headerRegex = "==[[:space:]]*{{[[:space:]]*header[[:space:]]*\\|[[:space:]]*([^ }]*)[[:space:]]*}}[^=]*=="


main = do
main = do
Line 142: Line 144:
content <- readFile (head args)
content <- readFile (head args)
return ([content],[""])
return ([content],[""])
else if (args !! 0) == "-w" then do
-- If there's more than one argument and the first one is the -w option,
-- use the rest of the arguments as page titles and load them from the wiki.
contents <- mapM getPageContent.tail$ args
return (contents, if length args > 2 then tail args else [""])
else do
else do
-- Otherwise, read all the files and display their file names.
-- Otherwise, read all the files and display their file names.
contents <- mapM readFile args
contents <- mapM readFile args
return (contents, args)
return (contents, args)
let bareTagMaps = map (Map.map countBareLangTags.splitByLanguage) $ contents
let tagsPerLang = map countByLanguage contents
let tagsWithFiles = zipWith (\tags file -> Map.map (addFile file) tags) bareTagMaps files
let tagsWithFiles = zipWith addFileToTags files tagsPerLang
let allBareTags = foldl combineMaps Map.empty tagsWithFiles
let combinedFiles = Map.unionsWith combine tagsWithFiles
printBareTags allBareTags
printBareTags combinedFiles
where addFile file count = (count, if count>0 && file/="" then [file] else [])
where addFileToTags file = Map.map (flip (,) [file])
combineMaps = Map.foldrWithKey insertItem
combine cur next = (fst cur + fst next, snd cur ++ snd next)
insertItem = Map.insertWith (\(newC,newF) (oldC,oldF) -> (oldC+newC,oldF++newF))
printBareTags :: Map.Map String (Int,[String]) -> IO ()
printBareTags :: Map.Map String (Int,[String]) -> IO ()
Line 158: Line 164:
let numBare = Map.foldr ((+).fst) 0 tags
let numBare = Map.foldr ((+).fst) 0 tags
printf "%d bare language tags:\n\n" numBare
printf "%d bare language tags:\n\n" numBare
flip mapM_ (Map.toAscList tags) (\(lang,(count,files)) ->
mapM_ (\(lang,(count,files)) ->
if count <= 0 then return () else printf "%d in %s%s\n" count (
printf "%d in %s%s\n" count
if lang == "" then "no language" else lang) (filesString files))
(if lang == "" then "no language" else lang)
(filesString files)
) (Map.toAscList tags)


filesString :: [String] -> String
filesString :: [String] -> String
filesString [] = ""
filesString [] = ""
filesString ("":rest) = filesString rest
filesString files = " ("++listString files++")"
filesString files = " ("++listString files++")"
where listString [file] = "[["++file++"]]"
where listString [file] = "[["++file++"]]"
listString (file:files) = "[["++file++"]], "++listString files</lang>
listString (file:files) = "[["++file++"]], "++listString files

getPageContent :: String -> IO String
getPageContent title = do
response <- simpleHTTP.getRequest$ url
getResponseBody response
where url = "http://rosettacode.org/mw/index.php?action=raw&title="++title</lang>


Here are the input files I used to test:
Here are the input files I used to test:
Line 211: Line 226:
2 in Perl ([[example1.wiki]], [[example2.wiki]])
2 in Perl ([[example1.wiki]], [[example2.wiki]])
</nowiki></pre>
</nowiki></pre>

Additionally, I tested with [[100_doors]] and [[Huffman_coding]]. The following resulted:
<pre>
5 bare language tags:

1 in no language ([[100_doors]])
1 in C ([[Huffman_coding]])
1 in CoffeeScript ([[Huffman_coding]])
1 in Perl ([[Huffman_coding]])
1 in PostScript ([[100_doors]])
</pre>


=={{header|Perl}}==
=={{header|Perl}}==