Word frequency: Difference between revisions

m
No edit summary
Line 2,359:
 
=={{header|FutureBasic}}==
Task said: "Feel free to explicitly state the thoughts behind the program decisions." Thus the heavy comments.
<lang futurebasic>
include "NSLog.incl"
include "Tlbx CFCharacterSet.incl"
 
local fn WordFrequency( textStr as CFStringRef, caseSensitive as Boolean, ascendingOrder as Boolean ) as CFStringRef
'~'1
CFStringRef wrd, resultStr = NULL wrd
CFDictionaryRef dict
 
// Break out capitalized words during seaarch or not as determined by the caseSensitive Boolean function input parameter
if caseSensitive == NO then textStr = fn StringLowercaseString( textStr )
 
// Trim non-alphabetic characters from string and separate individual words with a space
CFStringRef tempStr = fn ArrayComponentsJoinedByString( fn StringComponentsSeparatedByCharactersInSet( textStr, fn CharacterSetInvertedSet( fn CharacterSetLetterSet ) ), @" " )
 
CFMutableCharacterSetRef separators = fn CFCharacterSetCreateMutable( 0 )
// Prepare separators to parse string into array
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetPuntuationSet )
CFMutableCharacterSetRef separators = fn CFCharacterSetCreateMutable( 0 )MutableCharacterSetInit
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetWhitespaceAndNewlineSet )
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetPuntuationSet ) // Informally, this set is the set of all non-whitespace characters used to separate linguistic units in scripts, such as periods, dashes, parentheses, and so on.
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetWhitespaceAndNewlineSet ) // A character set containing all the whitespace and newline characters. A character set containing characters in Unicode General Category Z*, U+000A ~ U+000D, and U+0085.
 
// Create array of separated words
CFArrayRef tempArr = fn StringComponentsSeparatedByCharactersInSet( tempStr, separators )
 
CFRelease( separators )
// Create a counted set with each word and its frequency
CountedSetRef freqencies = fn CountedSetWithArray( tempArr )
EnumeratorRef enumRef = fn CountedSetObjectEnumerator( freqencies )
 
CFArrayRef array = fn EnumeratorAllObjects( enumRef )
// Enumerate each word-frequeny pain in the counted set...
EnumeratorRef enumRef = fn CountedSetObjectEnumerator( freqencies )
 
// .. and use it to create array of words in counted set
CFArrayRef array = fn EnumeratorAllObjects( enumRef )
 
// Create an empty mutable array
CFMutableArrayRef wordArr = fn MutableArrayWithCapacity( 0 )
 
// Create word couter
NSInteger totalWords = 0
// Enumerate each word, get its frequency, create its own key/value pair dictionary, add each dictionary into master array
for wrd in array
totalWords++
// Create dictionary with frequency and matching word
dict = @{ @"count":fn NumberWithUnsignedInteger( fn CountedSetCountForObject( freqencies, wrd ) ), @"object":wrd }
// Add each dictionary to the master mutable array, checking for a valid word by length
if ( fn StringLength( wrd ) != 0 )
MutableArrayAddObject( wordArr, dict )
Line 2,389 ⟶ 2,407:
next
 
// Store the total words as a global application property
AppSetProperty( @"totalWords", fn StringWithFormat( @"%d", totalWords - 1 ) )
 
// Sort the array in ascending or descending order as determined by the ascendingOrder Boolean function input parameter
SortDescriptorRef descriptors = fn SortDescriptorWithKey( @"count", ascendingOrder )
CFArrayRef sortedArray = fn ArraySortedArrayUsingDescriptors( wordArr, @[descriptors] )
 
CFMutableStringRef mutStr = fn MutableStringWithCapacity( 0 )
// Create an empty mutable string
CFMutableStringRef mutStr = fn MutableStringWithCapacity( 0 )
 
// Use each dictionary in sorted array to build the formatted output string
NSInteger count = 1
for dict in sortedArray
Line 2,400 ⟶ 2,424:
next
 
// Create output string from mutable
CFStringRef resultStr = fn StringWithFormat( @"%@", mutStr )
end fn = resultStr
 
local fn ParseTextFromWebsite( webSite as CFStringRef )
CFAbsoluteTime startTime
// Convert incoming string to URL
CFURLRef textURL
CFURLRef textURL = fn URLWithString( webSite )
CFStringRef textStr, frequencyStr
// Read contents of URL into a string
CFStringRef textStr = fn StringWithContentsOfURL( textURL, NSUTF8StringEncoding, NULL )
 
// Start timer
textURL = fn URLWithString( @"https://www.gutenberg.org/files/135/135-0.txt" )
CFAbsoluteTime startTime = fn CFAbsoluteTimeGetCurrent
textStr = fn StringWithContentsOfURL( textURL, NSUTF8StringEncoding, NULL )
// Calculate frequency of words in text and sort by occurrence
 
CFStringRef frequencyStr = fn WordFrequency( textStr, NO, NO )
startTime = fn CFAbsoluteTimeGetCurrent
// Log results and post post processing time
frequencyStr = fn WordFrequency( textStr, NO, NO )
NSLog( @"%@", frequencyStr )
NSLog( @"Total words in document: %@", fn AppProperty( @"totalWords" ) )
// Stop timer and log elapsed processing time
NSLog( @"Elapsed time: %f milliseconds.", ( fn CFAbsoluteTimeGetCurrent - startTime ) * 1000.0 )
end fn
 
// Pass url for Les Misérables on Project Gutenberg
textURL = fn URLWithStringParseTextFromWebsite( @"https://www.gutenberg.org/files/135/135-0.txt" )
 
HandleEvents
Line 2,447 ⟶ 2,479:
22910 1 isabella
 
Total words in document: 2291122910
Elapsed time: 595.407963 milliseconds.
</pre>
 
 
 
 
 
=={{header|Go}}==
729

edits