Word frequency: Difference between revisions

Content added Content deleted
No edit summary
Line 2,359: Line 2,359:


=={{header|FutureBasic}}==
=={{header|FutureBasic}}==
Task said: "Feel free to explicitly state the thoughts behind the program decisions." Thus the heavy comments.
<lang futurebasic>
<lang futurebasic>
include "NSLog.incl"
include "NSLog.incl"
include "Tlbx CFCharacterSet.incl"


local fn WordFrequency( textStr as CFStringRef, caseSensitive as Boolean, ascendingOrder as Boolean ) as CFStringRef
local fn WordFrequency( textStr as CFStringRef, caseSensitive as Boolean, ascendingOrder as Boolean ) as CFStringRef
'~'1
'~'1
CFStringRef wrd, resultStr = NULL
CFStringRef wrd
CFDictionaryRef dict
CFDictionaryRef dict


// Break out capitalized words during seaarch or not as determined by the caseSensitive Boolean function input parameter
if caseSensitive == NO then textStr = fn StringLowercaseString( textStr )
if caseSensitive == NO then textStr = fn StringLowercaseString( textStr )

// Trim non-alphabetic characters from string and separate individual words with a space
CFStringRef tempStr = fn ArrayComponentsJoinedByString( fn StringComponentsSeparatedByCharactersInSet( textStr, fn CharacterSetInvertedSet( fn CharacterSetLetterSet ) ), @" " )
CFStringRef tempStr = fn ArrayComponentsJoinedByString( fn StringComponentsSeparatedByCharactersInSet( textStr, fn CharacterSetInvertedSet( fn CharacterSetLetterSet ) ), @" " )

CFMutableCharacterSetRef separators = fn CFCharacterSetCreateMutable( 0 )
// Prepare separators to parse string into array
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetPuntuationSet )
CFMutableCharacterSetRef separators = fn MutableCharacterSetInit
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetWhitespaceAndNewlineSet )
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetPuntuationSet ) // Informally, this set is the set of all non-whitespace characters used to separate linguistic units in scripts, such as periods, dashes, parentheses, and so on.
MutableCharacterSetFormUnionWithCharacterSet( separators, fn CharacterSetWhitespaceAndNewlineSet ) // A character set containing all the whitespace and newline characters. A character set containing characters in Unicode General Category Z*, U+000A ~ U+000D, and U+0085.

// Create array of separated words
CFArrayRef tempArr = fn StringComponentsSeparatedByCharactersInSet( tempStr, separators )
CFArrayRef tempArr = fn StringComponentsSeparatedByCharactersInSet( tempStr, separators )

CFRelease( separators )
// Create a counted set with each word and its frequency
CountedSetRef freqencies = fn CountedSetWithArray( tempArr )
CountedSetRef freqencies = fn CountedSetWithArray( tempArr )
EnumeratorRef enumRef = fn CountedSetObjectEnumerator( freqencies )

CFArrayRef array = fn EnumeratorAllObjects( enumRef )
// Enumerate each word-frequeny pain in the counted set...
EnumeratorRef enumRef = fn CountedSetObjectEnumerator( freqencies )

// .. and use it to create array of words in counted set
CFArrayRef array = fn EnumeratorAllObjects( enumRef )

// Create an empty mutable array
CFMutableArrayRef wordArr = fn MutableArrayWithCapacity( 0 )
CFMutableArrayRef wordArr = fn MutableArrayWithCapacity( 0 )


// Create word couter
NSInteger totalWords = 0
NSInteger totalWords = 0
// Enumerate each word, get its frequency, create its own key/value pair dictionary, add each dictionary into master array
for wrd in array
for wrd in array
totalWords++
totalWords++
// Create dictionary with frequency and matching word
dict = @{ @"count":fn NumberWithUnsignedInteger( fn CountedSetCountForObject( freqencies, wrd ) ), @"object":wrd }
dict = @{ @"count":fn NumberWithUnsignedInteger( fn CountedSetCountForObject( freqencies, wrd ) ), @"object":wrd }
// Add each dictionary to the master mutable array, checking for a valid word by length
if ( fn StringLength( wrd ) != 0 )
if ( fn StringLength( wrd ) != 0 )
MutableArrayAddObject( wordArr, dict )
MutableArrayAddObject( wordArr, dict )
Line 2,389: Line 2,407:
next
next


// Store the total words as a global application property
AppSetProperty( @"totalWords", fn StringWithFormat( @"%d", totalWords ) )
AppSetProperty( @"totalWords", fn StringWithFormat( @"%d", totalWords - 1 ) )

// Sort the array in ascending or descending order as determined by the ascendingOrder Boolean function input parameter
SortDescriptorRef descriptors = fn SortDescriptorWithKey( @"count", ascendingOrder )
SortDescriptorRef descriptors = fn SortDescriptorWithKey( @"count", ascendingOrder )
CFArrayRef sortedArray = fn ArraySortedArrayUsingDescriptors( wordArr, @[descriptors] )
CFArrayRef sortedArray = fn ArraySortedArrayUsingDescriptors( wordArr, @[descriptors] )

CFMutableStringRef mutStr = fn MutableStringWithCapacity( 0 )
// Create an empty mutable string
CFMutableStringRef mutStr = fn MutableStringWithCapacity( 0 )


// Use each dictionary in sorted array to build the formatted output string
NSInteger count = 1
NSInteger count = 1
for dict in sortedArray
for dict in sortedArray
Line 2,400: Line 2,424:
next
next


// Create output string from mutable
resultStr = fn StringWithFormat( @"%@", mutStr )
CFStringRef resultStr = fn StringWithFormat( @"%@", mutStr )
end fn = resultStr
end fn = resultStr


local fn ParseTextFromWebsite( webSite as CFStringRef )
CFAbsoluteTime startTime
// Convert incoming string to URL
CFURLRef textURL
CFURLRef textURL = fn URLWithString( webSite )
CFStringRef textStr, frequencyStr
// Read contents of URL into a string
CFStringRef textStr = fn StringWithContentsOfURL( textURL, NSUTF8StringEncoding, NULL )


// Start timer
textURL = fn URLWithString( @"https://www.gutenberg.org/files/135/135-0.txt" )
CFAbsoluteTime startTime = fn CFAbsoluteTimeGetCurrent
textStr = fn StringWithContentsOfURL( textURL, NSUTF8StringEncoding, NULL )
// Calculate frequency of words in text and sort by occurrence

CFStringRef frequencyStr = fn WordFrequency( textStr, NO, NO )
startTime = fn CFAbsoluteTimeGetCurrent
// Log results and post post processing time
frequencyStr = fn WordFrequency( textStr, NO, NO )
NSLog( @"%@", frequencyStr )
NSLog( @"%@", frequencyStr )
NSLog( @"Total words in document: %@", fn AppProperty( @"totalWords" ) )
NSLog( @"Total words in document: %@", fn AppProperty( @"totalWords" ) )
// Stop timer and log elapsed processing time
NSLog( @"Elapsed time: %f milliseconds.", ( fn CFAbsoluteTimeGetCurrent - startTime ) * 1000.0 )
NSLog( @"Elapsed time: %f milliseconds.", ( fn CFAbsoluteTimeGetCurrent - startTime ) * 1000.0 )
end fn

// Pass url for Les Misérables on Project Gutenberg
fn ParseTextFromWebsite( @"https://www.gutenberg.org/files/135/135-0.txt" )


HandleEvents
HandleEvents
Line 2,447: Line 2,479:
22910 1 isabella
22910 1 isabella


Total words in document: 22911
Total words in document: 22910
Elapsed time: 595.407963 milliseconds.
Elapsed time: 595.407963 milliseconds.
</pre>
</pre>






=={{header|Go}}==
=={{header|Go}}==