Word frequency: Difference between revisions
Content added Content deleted
No edit summary |
|||
Line 2,359: | Line 2,359: | ||
=={{header|FutureBasic}}== |
=={{header|FutureBasic}}== |
||
Task said: "Feel free to explicitly state the thoughts behind the program decisions." Thus the heavy comments. |
|||
<lang futurebasic> |
<lang futurebasic> |
||
include "NSLog.incl" |
include "NSLog.incl" |
||
include "Tlbx CFCharacterSet.incl" |
|||
local fn WordFrequency( textStr as CFStringRef, caseSensitive as Boolean, ascendingOrder as Boolean ) as CFStringRef |
local fn WordFrequency( textStr as CFStringRef, caseSensitive as Boolean, ascendingOrder as Boolean ) as CFStringRef |
||
'~'1 |
'~'1 |
||
CFStringRef |
CFStringRef wrd |
||
CFDictionaryRef dict |
CFDictionaryRef dict |
||
// Break out capitalized words during seaarch or not as determined by the caseSensitive Boolean function input parameter |
|||
if caseSensitive == NO then textStr = fn StringLowercaseString( textStr ) |
if caseSensitive == NO then textStr = fn StringLowercaseString( textStr ) |
||
// Trim non-alphabetic characters from string and separate individual words with a space |
|||
CFStringRef tempStr = fn ArrayComponentsJoinedByString( fn StringComponentsSeparatedByCharactersInSet( textStr, fn CharacterSetInvertedSet( fn CharacterSetLetterSet ) ), @" " ) |
CFStringRef tempStr = fn ArrayComponentsJoinedByString( fn StringComponentsSeparatedByCharactersInSet( textStr, fn CharacterSetInvertedSet( fn CharacterSetLetterSet ) ), @" " ) |
||
⚫ | |||
// Prepare separators to parse string into array |
|||
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetPuntuationSet ) |
|||
⚫ | |||
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetWhitespaceAndNewlineSet ) |
|||
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetPuntuationSet ) // Informally, this set is the set of all non-whitespace characters used to separate linguistic units in scripts, such as periods, dashes, parentheses, and so on. |
|||
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetWhitespaceAndNewlineSet ) // A character set containing all the whitespace and newline characters. A character set containing characters in Unicode General Category Z*, U+000A ~ U+000D, and U+0085. |
|||
// Create array of separated words |
|||
CFArrayRef tempArr = fn StringComponentsSeparatedByCharactersInSet( tempStr, separators ) |
CFArrayRef tempArr = fn StringComponentsSeparatedByCharactersInSet( tempStr, separators ) |
||
CFRelease( separators ) |
|||
// Create a counted set with each word and its frequency |
|||
CountedSetRef freqencies |
CountedSetRef freqencies = fn CountedSetWithArray( tempArr ) |
||
⚫ | |||
⚫ | |||
// Enumerate each word-frequeny pain in the counted set... |
|||
⚫ | |||
// .. and use it to create array of words in counted set |
|||
⚫ | |||
// Create an empty mutable array |
|||
CFMutableArrayRef wordArr = fn MutableArrayWithCapacity( 0 ) |
CFMutableArrayRef wordArr = fn MutableArrayWithCapacity( 0 ) |
||
// Create word couter |
|||
NSInteger totalWords = 0 |
NSInteger totalWords = 0 |
||
// Enumerate each word, get its frequency, create its own key/value pair dictionary, add each dictionary into master array |
|||
for wrd in array |
for wrd in array |
||
totalWords++ |
totalWords++ |
||
// Create dictionary with frequency and matching word |
|||
dict = @{ @"count":fn NumberWithUnsignedInteger( fn CountedSetCountForObject( freqencies, wrd ) ), @"object":wrd } |
dict = @{ @"count":fn NumberWithUnsignedInteger( fn CountedSetCountForObject( freqencies, wrd ) ), @"object":wrd } |
||
// Add each dictionary to the master mutable array, checking for a valid word by length |
|||
if ( fn StringLength( wrd ) != 0 ) |
if ( fn StringLength( wrd ) != 0 ) |
||
MutableArrayAddObject( wordArr, dict ) |
MutableArrayAddObject( wordArr, dict ) |
||
Line 2,389: | Line 2,407: | ||
next |
next |
||
// Store the total words as a global application property |
|||
AppSetProperty( @"totalWords", fn StringWithFormat( @"%d", totalWords ) ) |
AppSetProperty( @"totalWords", fn StringWithFormat( @"%d", totalWords - 1 ) ) |
||
// Sort the array in ascending or descending order as determined by the ascendingOrder Boolean function input parameter |
|||
SortDescriptorRef descriptors = fn SortDescriptorWithKey( @"count", ascendingOrder ) |
SortDescriptorRef descriptors = fn SortDescriptorWithKey( @"count", ascendingOrder ) |
||
CFArrayRef sortedArray |
CFArrayRef sortedArray = fn ArraySortedArrayUsingDescriptors( wordArr, @[descriptors] ) |
||
⚫ | |||
// Create an empty mutable string |
|||
⚫ | |||
// Use each dictionary in sorted array to build the formatted output string |
|||
NSInteger count = 1 |
NSInteger count = 1 |
||
for dict in sortedArray |
for dict in sortedArray |
||
Line 2,400: | Line 2,424: | ||
next |
next |
||
// Create output string from mutable |
|||
resultStr = fn StringWithFormat( @"%@", mutStr ) |
CFStringRef resultStr = fn StringWithFormat( @"%@", mutStr ) |
||
end fn = resultStr |
end fn = resultStr |
||
local fn ParseTextFromWebsite( webSite as CFStringRef ) |
|||
⚫ | |||
// Convert incoming string to URL |
|||
CFURLRef textURL |
|||
CFURLRef textURL = fn URLWithString( webSite ) |
|||
CFStringRef textStr, frequencyStr |
|||
// Read contents of URL into a string |
|||
⚫ | |||
// Start timer |
|||
⚫ | |||
⚫ | |||
⚫ | |||
// Calculate frequency of words in text and sort by occurrence |
|||
⚫ | |||
startTime = fn CFAbsoluteTimeGetCurrent |
|||
// Log results and post post processing time |
|||
⚫ | |||
NSLog( @"%@", frequencyStr ) |
NSLog( @"%@", frequencyStr ) |
||
NSLog( @"Total words in document: %@", fn AppProperty( @"totalWords" ) ) |
NSLog( @"Total words in document: %@", fn AppProperty( @"totalWords" ) ) |
||
// Stop timer and log elapsed processing time |
|||
NSLog( @"Elapsed time: %f milliseconds.", ( fn CFAbsoluteTimeGetCurrent - startTime ) * 1000.0 ) |
NSLog( @"Elapsed time: %f milliseconds.", ( fn CFAbsoluteTimeGetCurrent - startTime ) * 1000.0 ) |
||
end fn |
|||
// Pass url for Les Misérables on Project Gutenberg |
|||
⚫ | |||
HandleEvents |
HandleEvents |
||
Line 2,447: | Line 2,479: | ||
22910 1 isabella |
22910 1 isabella |
||
Total words in document: |
Total words in document: 22910 |
||
Elapsed time: 595.407963 milliseconds. |
Elapsed time: 595.407963 milliseconds. |
||
</pre> |
</pre> |
||
=={{header|Go}}== |
=={{header|Go}}== |