Word frequency: Difference between revisions
m
→{{header|FutureBasic}}
No edit summary |
|||
Line 2,359:
=={{header|FutureBasic}}==
Task said: "Feel free to explicitly state the thoughts behind the program decisions." Thus the heavy comments.
<lang futurebasic>
include "NSLog.incl"
local fn WordFrequency( textStr as CFStringRef, caseSensitive as Boolean, ascendingOrder as Boolean ) as CFStringRef
'~'1
CFStringRef
CFDictionaryRef dict
// Break out capitalized words during seaarch or not as determined by the caseSensitive Boolean function input parameter
if caseSensitive == NO then textStr = fn StringLowercaseString( textStr )
// Trim non-alphabetic characters from string and separate individual words with a space
CFStringRef tempStr = fn ArrayComponentsJoinedByString( fn StringComponentsSeparatedByCharactersInSet( textStr, fn CharacterSetInvertedSet( fn CharacterSetLetterSet ) ), @" " )
CFMutableCharacterSetRef separators = fn CFCharacterSetCreateMutable( 0 )▼
// Prepare separators to parse string into array
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetPuntuationSet ) // Informally, this set is the set of all non-whitespace characters used to separate linguistic units in scripts, such as periods, dashes, parentheses, and so on.
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetWhitespaceAndNewlineSet ) // A character set containing all the whitespace and newline characters. A character set containing characters in Unicode General Category Z*, U+000A ~ U+000D, and U+0085.
// Create array of separated words
CFArrayRef tempArr = fn StringComponentsSeparatedByCharactersInSet( tempStr, separators )
// Create a counted set with each word and its frequency
CountedSetRef freqencies
EnumeratorRef enumRef = fn CountedSetObjectEnumerator( freqencies )▼
CFArrayRef array = fn EnumeratorAllObjects( enumRef )▼
// Enumerate each word-frequeny pain in the counted set...
// .. and use it to create array of words in counted set
// Create an empty mutable array
CFMutableArrayRef wordArr = fn MutableArrayWithCapacity( 0 )
// Create word couter
NSInteger totalWords = 0
// Enumerate each word, get its frequency, create its own key/value pair dictionary, add each dictionary into master array
for wrd in array
totalWords++
// Create dictionary with frequency and matching word
dict = @{ @"count":fn NumberWithUnsignedInteger( fn CountedSetCountForObject( freqencies, wrd ) ), @"object":wrd }
// Add each dictionary to the master mutable array, checking for a valid word by length
if ( fn StringLength( wrd ) != 0 )
MutableArrayAddObject( wordArr, dict )
Line 2,389 ⟶ 2,407:
next
// Store the total words as a global application property
AppSetProperty( @"totalWords", fn StringWithFormat( @"%d", totalWords - 1 ) )
// Sort the array in ascending or descending order as determined by the ascendingOrder Boolean function input parameter
SortDescriptorRef descriptors = fn SortDescriptorWithKey( @"count", ascendingOrder )
CFArrayRef sortedArray
CFMutableStringRef mutStr = fn MutableStringWithCapacity( 0 )▼
// Create an empty mutable string
// Use each dictionary in sorted array to build the formatted output string
NSInteger count = 1
for dict in sortedArray
Line 2,400 ⟶ 2,424:
next
// Create output string from mutable
CFStringRef resultStr = fn StringWithFormat( @"%@", mutStr )
end fn = resultStr
local fn ParseTextFromWebsite( webSite as CFStringRef )
CFAbsoluteTime startTime▼
// Convert incoming string to URL
CFURLRef textURL = fn URLWithString( webSite )
// Read contents of URL into a string
CFStringRef textStr = fn StringWithContentsOfURL( textURL, NSUTF8StringEncoding, NULL )▼
// Start timer
textURL = fn URLWithString( @"https://www.gutenberg.org/files/135/135-0.txt" )▼
▲CFAbsoluteTime startTime = fn CFAbsoluteTimeGetCurrent
▲textStr = fn StringWithContentsOfURL( textURL, NSUTF8StringEncoding, NULL )
// Calculate frequency of words in text and sort by occurrence
CFStringRef frequencyStr = fn WordFrequency( textStr, NO, NO )▼
// Log results and post post processing time
▲frequencyStr = fn WordFrequency( textStr, NO, NO )
NSLog( @"%@", frequencyStr )
NSLog( @"Total words in document: %@", fn AppProperty( @"totalWords" ) )
// Stop timer and log elapsed processing time
NSLog( @"Elapsed time: %f milliseconds.", ( fn CFAbsoluteTimeGetCurrent - startTime ) * 1000.0 )
end fn
// Pass url for Les Misérables on Project Gutenberg
HandleEvents
Line 2,447 ⟶ 2,479:
22910 1 isabella
Total words in document:
Elapsed time: 595.407963 milliseconds.
</pre>
=={{header|Go}}==
|