Rosetta Code/Find bare lang tags: Difference between revisions

→‎{{header|Haskell}}: Cleaned up code significantly and added Media Wiki bonus
(Added Haskell solution)
(→‎{{header|Haskell}}: Cleaned up code significantly and added Media Wiki bonus)
Line 100:
There are actually many different Regex packages available for Haskell. For this example, I chose TDFA, a very fast POSIX ERE engine. To change engines, simply change the import statement. If you use a Perl-style RE engine, you'll have to modify the expressions slightly.
 
This solution can be compiled into a program that will either take space-delimited list of files as its argument, or take input from STDIN if no arguments are provided. The MediaAdditionally, Wikiif APIyou bonusspecify isthe not-w attemptedflag in the first argument, it will take a list of Rosetta Code wiki pages and search them. Note that the page names must be as they appear in your URL bar -- underscores in place of spaces.
 
<lang Haskell>import System.Environment
import Network.HTTP
import Text.Printf
import Text.Regex.TDFA
Line 109 ⟶ 110:
import qualified Data.Map as Map
 
{-| Takes a string and cuts out the text matched in the MatchText array. -}
splitByMatches :: String -> [MatchText String] -> [String]
splitByMatches str matches = foldr (\matchsplitHead acc[str] ->matches
inwhere splitHead match acc = before:after:(tail acc)
let before = take (matchOffset).head $ acc
after where before = droptake (matchOffset + matchLen).head $ acc
matchOffset after = fst.snd.drop (!0)matchOffset + matchLen).head$ matchacc
matchLen matchOffset = sndfst.snd.(!0) $ match
matchLen = snd.snd.(!0)$ match
in before:after:(tail acc)
) [str] matches
 
{-| Takes a string and splits it into the different languages used. All text
before the language headers is put into the key "" -}
splitByLanguage :: String -> Map.Map String String
splitByLanguage str = Map.fromList.zip langs $ splitByMatches str allMatches
where langs = "":(map (fst.(!1)) allMatches)
allMatches = matchAllText (makeRegex headerRegex :: Regex) str
headerRegex = "==[[:space:]]*{{[[:space:]]*header[[:space:]]*\\|[[:space:]]*([^ }]*)[[:space:]]*}}[^=]*=="
 
{-| Takes a string and counts the number of time a valid, but bare, lang tag
appears. It does not attempt to ignore valid tags inside lang blocks. -}
countBareLangTags :: String -> Int
countBareLangTags = matchCount (makeRegex "<lang[[:space:]]*>" :: Regex)
 
{-| Takes a string and counts the number of bare lang tags per section of the
text. All tags before the languagefirst headerssection isare put into the key "". -}
splitByLanguagecountByLanguage :: String -> Map.Map String StringInt
countByLanguage str = Map.fromList.filter ((>0).snd)$ zip langs counts
where counts = map countBareLangTags.splitByMatches str$ allMatches
where langs = "":(map (fst.(!1)) allMatches)
allMatches = matchAllText (makeRegex headerRegex :: Regex) str
headerRegex = "==[[:space:]]*{{[[:space:]]*header[[:space:]]*\\|[[:space:]]*([^ }]*)[[:space:]]*}}[^=]*=="
 
main = do
Line 142 ⟶ 144:
content <- readFile (head args)
return ([content],[""])
else if (args !! 0) == "-w" then do
-- If there's more than one argument and the first one is the -w option,
-- use the rest of the arguments as page titles and load them from the wiki.
contents <- mapM getPageContent.tail$ args
return (contents, if length args > 2 then tail args else [""])
else do
-- Otherwise, read all the files and display their file names.
contents <- mapM readFile args
return (contents, args)
let bareTagMapstagsPerLang = map (Map.map countBareLangTags.splitByLanguage) $countByLanguage contents
let tagsWithFiles = zipWith (\tagsaddFileToTags filefiles -> Map.map (addFile file) tags) bareTagMaps filestagsPerLang
let allBareTagscombinedFiles = foldl combineMaps Map.emptyunionsWith combine tagsWithFiles
printBareTags allBareTagscombinedFiles
where addFileaddFileToTags file count = Map.map (flip (count, if count>0 && file/="" then) [file] else [])
combineMapscombine cur next = Map.foldrWithKey(fst cur + fst next, snd cur ++ snd insertItemnext)
insertItem = Map.insertWith (\(newC,newF) (oldC,oldF) -> (oldC+newC,oldF++newF))
printBareTags :: Map.Map String (Int,[String]) -> IO ()
Line 158 ⟶ 164:
let numBare = Map.foldr ((+).fst) 0 tags
printf "%d bare language tags:\n\n" numBare
flip mapM_ (Map.toAscList tags) (\(lang,(count,files)) ->
if count <= 0 then return () else printf "%d in %s%s\n" count (
(if lang == "" then "no language" else lang) (filesString files))
(filesString files)
) (Map.toAscList tags)
 
filesString :: [String] -> String
filesString [] = ""
filesString ("":rest) = filesString rest
filesString files = " ("++listString files++")"
where listString [file] = "[["++file++"]]"
listString (file:files) = "[["++file++"]], "++listString files</lang>
 
getPageContent :: String -> IO String
getPageContent title = do
response <- simpleHTTP.getRequest$ url
getResponseBody response
where url = "http://rosettacode.org/mw/index.php?action=raw&title="++title</lang>
 
Here are the input files I used to test:
Line 211 ⟶ 226:
2 in Perl ([[example1.wiki]], [[example2.wiki]])
</nowiki></pre>
 
Additionally, I tested with [[100_doors]] and [[Huffman_coding]]. The following resulted:
<pre>
5 bare language tags:
 
1 in no language ([[100_doors]])
1 in C ([[Huffman_coding]])
1 in CoffeeScript ([[Huffman_coding]])
1 in Perl ([[Huffman_coding]])
1 in PostScript ([[100_doors]])
</pre>
 
=={{header|Perl}}==