Jump to content

User:Klever: Difference between revisions

→‎VBA Examples: KWIC index
(→‎VBA Examples: KWIC index)
Line 227:
5 3 46 1 2
5 4 66 1 2 3
</pre>
 
==[[KWIC index]]==
 
<lang vb>
'KWIC index
'assumptions:
' - all titles can be held in an array in main memory
' - use the index in the array as the "catalog number"
' - disregard punctuation in titles
' - the KWIC index itself may be too large for main memory - do not store it in memory
' - the KWIC index consists of one line per title/keyword combination and consists of:
' - the catalog number
' - the title with the keyword centered in a line of given length (e.g. 80 or 120)
' (constant-pitch font assumed)
' note: long titles may be truncated at the beginning or the end of the line
 
'globals
Const MAXKEYS = 20 'max. number of keywords in a title
Const STOPWORDS = "a an and by for is it of on or the to with " 'that last space is needed!
Dim title() As String 'list of titles to be included in KWIC index
Dim ntitle As Integer 'number of titles
Dim index() As Integer 'holds title number and position of keyword in title
Dim nkeys As Long 'total number of keywords found
 
Sub ReadTitles()
' read or - in this case - set the titles
ntitle = 10
ReDim title(1 To ntitle)
title(1) = "Microsoft Visio 2003 User's Guide"
title(2) = "Microsoft Office Excel 2003 Inside Out"
title(3) = "Mastering Excel 2003 Programming with VBA"
title(4) = "Excel 2003 Formulas"
title(5) = "Excel for Scientists and Engineers"
title(6) = "Excel 2003 VBA Programmer's Reference"
title(7) = "Automated Data Analysis Using Excel"
title(8) = "Beginning Excel: What-if Data Analysis Tools"
title(9) = "How to do Everything with Microsoft Office Excel 2003"
title(10) = "Data Analysis Using SQL and Excel"
End Sub
 
Function IsStopword(aword) As Boolean
'search for aword in stopword list
'add an extra space to avoid ambiguity
IsStopword = InStr(STOPWORDS, LCase(aword) & " ") > 0
End Function
 
Sub ProcessTitles()
'find positions of keywords in titles, store in index array
'Note: we cannot use Split here because that function doesn't return
'the positions of the words it finds
nkeys = 0
For i = 1 To ntitle
atitle = title(i) & " " 'add extra space as sentinel
p1 = 1
Do While p1 <= Len(atitle)
'find next word:
'a) find next non-space
While Mid$(atitle, p1, 1) = " ": p1 = p1 + 1: Wend
'b) extend word
p2 = p1
While Mid$(atitle, p2, 1) <> " ": p2 = p2 + 1: Wend
aword = Mid$(atitle, p1, p2 - p1)
'for now we assume there is no punctuation, i.e. no words
'in parentheses, brackets or quotation marks
If Not IsStopword(aword) Then
'remember position of this keyword
'we probably should check for overflow (too many keywords) here!
nkeys = nkeys + 1
index(nkeys, 1) = i
index(nkeys, 2) = p1
End If
'continue searching
p1 = p2 + 1
Loop
Next i
End Sub
 
Function Shift(aString, pos)
'return shifted string (part beginning at position "pos" followed by part before it)
Shift = Mid$(aString, pos) & " " & Left$(aString, pos - 1)
End Function
 
Sub SortTitles()
' sort the index() array to represent shifted titles in alphabetical order
' more efficient sorting algorithms can be applied here...
switched = True
Do While switched
'scan array for two shifted strings in the wrong order and swap them
'use case-insensitive compare
switched = False
For i = 1 To nkeys - 1
string1 = LCase(Shift(title(index(i, 1)), index(i, 2)))
string2 = LCase(Shift(title(index(i + 1, 1)), index(i + 1, 2)))
If string2 < string1 Then 'switch
For j = 1 To 2
temp = index(i, j)
index(i, j) = index(i + 1, j)
index(i + 1, j) = temp
Next
switched = True
End If
Next i
Loop
End Sub
 
Sub PrintKWIC(linelength)
'print the KWIC index
spaces = Space(linelength / 2)
Debug.Print "Index number", "|"; Space((linelength - 10) / 2); "KWIC string"
Debug.Print String(linelength + 14, "-")
For i = 1 To nkeys
atitle = title(index(i, 1))
ltitle = Len(atitle)
pos = index(i, 2)
'create shifted string so that keyword is centered in the line
part2 = Mid$(atitle, pos)
part1 = Right$(spaces & Left$(atitle, pos - 1), linelength / 2)
kwicstring = Right$(part1, linelength / 2) & Left$(part2, linelength / 2)
Debug.Print index(i, 1), "|"; kwicstring
Next
End Sub
 
Sub KWIC()
'main program for KWIC index
ReadTitles
'set array
ReDim index(ntitle * MAXKEYS, 2)
'index(.,1) is catalog nr. (here, equal to title nr.)
'index(.,2) is keyword position in title
ProcessTitles
SortTitles
PrintKWIC (120) 'argument is the length of the KWIC titles (excluding catalog numbers)
End Sub
</lang>
 
Output:
<pre>
kwic
Index number | KWIC string
--------------------------------------------------------------------------------------------------------------------------------------
4 | Excel 2003 Formulas
9 | How to do Everything with Microsoft Office Excel 2003
2 | Microsoft Office Excel 2003 Inside Out
3 | Mastering Excel 2003 Programming with VBA
1 | Microsoft Visio 2003 User's Guide
6 | Excel 2003 VBA Programmer's Reference
8 | Beginning Excel: What-if Data Analysis Tools
7 | Automated Data Analysis Using Excel
10 | Data Analysis Using SQL and Excel
7 | Automated Data Analysis Using Excel
8 | Beginning Excel: What-if Data Analysis Tools
8 | Beginning Excel: What-if Data Analysis Tools
7 | Automated Data Analysis Using Excel
10 | Data Analysis Using SQL and Excel
9 | How to do Everything with Microsoft Office Excel 2003
5 | Excel for Scientists and Engineers
9 | How to do Everything with Microsoft Office Excel 2003
4 | Excel 2003 Formulas
9 | How to do Everything with Microsoft Office Excel 2003
2 | Microsoft Office Excel 2003 Inside Out
3 | Mastering Excel 2003 Programming with VBA
6 | Excel 2003 VBA Programmer's Reference
7 | Automated Data Analysis Using Excel
10 | Data Analysis Using SQL and Excel
5 | Excel for Scientists and Engineers
8 | Beginning Excel: What-if Data Analysis Tools
4 | Excel 2003 Formulas
1 | Microsoft Visio 2003 User's Guide
9 | How to do Everything with Microsoft Office Excel 2003
2 | Microsoft Office Excel 2003 Inside Out
3 | Mastering Excel 2003 Programming with VBA
9 | How to do Everything with Microsoft Office Excel 2003
2 | Microsoft Office Excel 2003 Inside Out
1 | Microsoft Visio 2003 User's Guide
9 | How to do Everything with Microsoft Office Excel 2003
2 | Microsoft Office Excel 2003 Inside Out
2 | Microsoft Office Excel 2003 Inside Out
6 | Excel 2003 VBA Programmer's Reference
3 | Mastering Excel 2003 Programming with VBA
6 | Excel 2003 VBA Programmer's Reference
5 | Excel for Scientists and Engineers
10 | Data Analysis Using SQL and Excel
8 | Beginning Excel: What-if Data Analysis Tools
1 | Microsoft Visio 2003 User's Guide
7 | Automated Data Analysis Using Excel
10 | Data Analysis Using SQL and Excel
3 | Mastering Excel 2003 Programming with VBA
6 | Excel 2003 VBA Programmer's Reference
1 | Microsoft Visio 2003 User's Guide
8 | Beginning Excel: What-if Data Analysis Tools
</pre>
 
Anonymous user
Cookies help us deliver our services. By using our services, you agree to our use of cookies.