Rosetta Code/Find bare lang tags: Difference between revisions

Line 100:

There are actually many different Regex packages available for Haskell. For this example, I chose TDFA, a very fast POSIX ERE engine. To change engines, simply change the import statement. If you use a Perl-style RE engine, you'll have to modify the expressions slightly.

This solution can be compiled into a program that will either take space-delimited list of files as its argument, or take input from STDIN if no arguments are provided. ~~The~~ ~~Media~~ ~~Wiki~~ ~~API~~ ~~bonus~~ is ~~not~~ ~~attempted~~.

This solution can be compiled into a program that will either take space-delimited list of files as its argument, or take input from STDIN if no arguments are provided. Additionally, if you specify the -w flag in the first argument, it will take a list of Rosetta Code wiki pages and search them. Note that the page names must be as they appear in your URL bar -- underscores in place of spaces.

<lang Haskell>import System.Environment

import Network.HTTP

import Text.Printf

import Text.Regex.TDFA

Line 109:

Line 110:

import qualified Data.Map as Map

{-| Takes a string and cuts out the text matched in the MatchText array. -}

splitByMatches :: String -> [MatchText String] -> [String]

splitByMatches str matches = foldr ~~(\match~~ ~~acc~~ ->

splitByMatches str matches = foldr splitHead [str] matches

⚫

where splitHead match acc = before:after:(tail acc)

let before = take (matchOffset).head $ acc

~~after~~ = ~~drop~~ (matchOffset ~~+ matchLen~~).head $ acc

where before = take (matchOffset).head$ acc

~~matchOffset~~ = ~~fst.snd.~~(~~!0)~~ $ ~~match~~

after = drop (matchOffset + matchLen).head$ acc

~~matchLen~~ = ~~snd~~.snd.(!0) $ match

matchOffset = fst.snd.(!0)$ match

matchLen = snd.snd.(!0)$ match

⚫

in before:after:(tail acc)

) [str] matches

{-| Takes a string and splits it into the different languages used. All text

⚫

before the ~~language~~ ~~headers~~ is put into the key "" -}

⚫

~~splitByLanguage~~ :: String -> Map.Map String ~~String~~

splitByLanguage str = Map.fromList.zip langs $ splitByMatches str allMatches

⚫

~~where~~ langs = "":(map (fst.(!1)) allMatches)

⚫

allMatches = matchAllText (makeRegex headerRegex :: Regex) str

⚫

headerRegex = "==[[:space:]]*{{[[:space:]]*header[[:space:]]*\\|[[:space:]]*([^ }]*)[[:space:]]*}}[^=]*=="

{-| Takes a string and counts the number of time a valid, but bare, lang tag

appears. It does not attempt to ignore valid tags inside lang blocks. -}

countBareLangTags :: String -> Int

countBareLangTags = matchCount (makeRegex "<lang[[:space:]]*>" :: Regex)

{-| Takes a string and counts the number of bare lang tags per section of the

⚫

text. All tags before the first section are put into the key "". -}

⚫

countByLanguage :: String -> Map.Map String Int

countByLanguage str = Map.fromList.filter ((>0).snd)$ zip langs counts

where counts = map countBareLangTags.splitByMatches str$ allMatches

⚫

langs = "":(map (fst.(!1)) allMatches)

⚫

allMatches = matchAllText (makeRegex headerRegex :: Regex) str

⚫

headerRegex = "==[[:space:]]*{{[[:space:]]*header[[:space:]]*\\|[[:space:]]*([^ }]*)[[:space:]]*}}[^=]*=="

main = do

Line 142:

Line 144:

content <- readFile (head args)

return ([content],[""])

else if (args !! 0) == "-w" then do

-- If there's more than one argument and the first one is the -w option,

-- use the rest of the arguments as page titles and load them from the wiki.

contents <- mapM getPageContent.tail$ args

return (contents, if length args > 2 then tail args else [""])

else do

-- Otherwise, read all the files and display their file names.

contents <- mapM readFile args

return (contents, args)

let ~~bareTagMaps~~ = map ~~(Map.map countBareLangTags.splitByLanguage) $~~ contents

let tagsPerLang = map countByLanguage contents

let tagsWithFiles = zipWith ~~(\tags~~ ~~file~~ ~~-> Map.map (addFile file) tags) bareTagMaps files~~

let tagsWithFiles = zipWith addFileToTags files tagsPerLang

let ~~allBareTags~~ = ~~foldl combineMaps~~ Map.~~empty~~ tagsWithFiles

let combinedFiles = Map.unionsWith combine tagsWithFiles

printBareTags ~~allBareTags~~

printBareTags combinedFiles

where ~~addFile~~ file ~~count~~ = (~~count~~, ~~if count>0 && file/="" then~~ [file~~] else [~~])

where addFileToTags file = Map.map (flip (,) [file])

~~combineMaps~~ = ~~Map.foldrWithKey~~ ~~insertItem~~

combine cur next = (fst cur + fst next, snd cur ++ snd next)

insertItem = Map.insertWith (\(newC,newF) (oldC,oldF) -> (oldC+newC,oldF++newF))

printBareTags :: Map.Map String (Int,[String]) -> IO ()

Line 158:

Line 164:

let numBare = Map.foldr ((+).fst) 0 tags

printf "%d bare language tags:\n\n" numBare

~~flip~~ mapM_ ~~(Map.toAscList tags)~~ (\(lang,(count,files)) ->

mapM_ (\(lang,(count,files)) ->

~~if count <= 0 then return () else~~ printf "%d in %s%s\n" count (

printf "%d in %s%s\n" count

if lang == "" then "no language" else lang~~) (filesString files)~~)

(if lang == "" then "no language" else lang)

(filesString files)

) (Map.toAscList tags)

filesString :: [String] -> String

filesString [] = ""

filesString ("":rest) = filesString rest

filesString files = " ("++listString files++")"

where listString [file] = "[["++file++"]]"

listString (file:files) = "[["++file++"]], "++listString files~~</lang>~~

listString (file:files) = "[["++file++"]], "++listString files

getPageContent :: String -> IO String

getPageContent title = do

response <- simpleHTTP.getRequest$ url

getResponseBody response

where url = "http://rosettacode.org/mw/index.php?action=raw&title="++title</lang>

Here are the input files I used to test:

Line 211:

Line 226:

2 in Perl ([[example1.wiki]], [[example2.wiki]])

</nowiki></pre>

Additionally, I tested with [[100_doors]] and [[Huffman_coding]]. The following resulted:

<pre>

5 bare language tags:

1 in no language ([[100_doors]])

1 in C ([[Huffman_coding]])

1 in CoffeeScript ([[Huffman_coding]])

1 in Perl ([[Huffman_coding]])

1 in PostScript ([[100_doors]])

</pre>

=={{header|Perl}}==