Anonymous user
User:Klever: Difference between revisions
m
→KWIC index
No edit summary |
m (→KWIC index) |
||
(25 intermediate revisions by the same user not shown) | |||
Line 1:
{{mylangbegin}}
{{mylang|
{{mylang|BASIC|Somewhat Rusty}}
{{mylang|Fortran|Stuck in Fortran 77, WATFOR, WATFIV etc.}}
Line 12:
=VBA Examples=
Some nontrivial VBA Examples (to be moved).
In MS Office program (Word, Excel, Access...): open the Visual Basic window. Paste the code in a module. Execute it by typing a suitable command in the Immediate Window. Output will be directed to the Immediate Window unless stated otherwise...
==[[
<lang vb>
'Dijkstra globals
Const MaxGraph As Integer = 100 'max. number of nodes in graph
Const Infinity = 1E+308
Dim E(1 To MaxGraph, 1 To MaxGraph) As Double 'the edge costs (Infinity if no edge)
Dim A(1 To MaxGraph) As Double 'the distances calculated
Dim P(1 To MaxGraph) As Integer 'the previous/path array
Dim Q(1 To MaxGraph) As Boolean 'the queue
Public Sub Dijkstra(n, start)
'simple implementation of Dijkstra's algorithm
'n = number of nodes in graph
'start = index of start node
'init distances A
For j = 1 To n
A(j) = Infinity
Next j
A(start) = 0
'init P (path) to "no paths" and Q = set of all nodes
For j = 1 To n
Q(j) = True
P(j) = 0
Next j
Do While True 'loop will exit! (see below)
'find node u in Q with smallest distance to start
dist = Infinity
For i = 1 To n
If Q(i) Then
If A(i) < dist Then
dist = A(i)
u = i
End If
End If
Next i
If dist = Infinity Then Exit Do 'no more nodes available - done!
'remove u from Q
Q(u) = False
'loop over neighbors of u that are in Q
For j = 1 To n
If Q(j) And E(u, j) <> Infinity Then
'check if path to neighbor j via u is shorter than current estimated distance to j
alt = A(u) + E(u, j)
If alt < A(j) Then
'yes, replace with new distance and remember "previous" hop on the path
A(j) = alt
P(j) = u
End If
End If
Next j
Loop
End Sub
Public Function GetPath(source, target) As String
'reconstruct shortest path from source to target
'by working backwards from target using the P(revious) array
Dim path As String
If P(target) = 0 Then
GetPath = "No path"
Else
path = ""
u = target
Do While P(u) > 0
path = Format$(u) & " " & path
u = P(u)
Loop
GetPath = Format$(source) & " " & path
End If
End Function
Public Sub DijkstraTest()
'main function to solve Dijkstra's algorithm and return shortest path between
'a node and every other node in a digraph
' define problem:
' number of nodes
n = 5
' reset connection/cost per edge
For i = 1 To n
For j = 1 To n
E(i, j) = Infinity
Next j
P(i) = 0
Next i
' fill in the edge costs
E(1, 2) = 10
E(1, 3) = 50
E(1, 4) = 65
E(2, 3) = 30
E(2, 5) = 4
E(3, 4) = 20
E(3, 5) = 44
E(4, 2) = 70
E(4, 5) = 23
E(5, 1) = 6
'Solve it for every node
For v = 1 To n
Dijkstra n, v
'Print solution
Debug.Print "From", "To", "Cost", "Path"
For j = 1 To n
If v <> j Then Debug.Print v, j, IIf(A(j) = Infinity, "---", A(j)), GetPath(v, j)
Next j
Debug.Print
Next v
End Sub
</lang>
Output (using the same graph as in the Floyd-Warshall algorithm below):
<pre>
DijkstraTest
From To Cost Path
1 2 10 1 2
1 3 40 1 2 3
1 4 60 1 2 3 4
1 5 14 1 2 5
From To Cost Path
2 1 10 2 5 1
2 3 30 2 3
2 4 50 2 3 4
2 5 4 2 5
From To Cost Path
3 1 49 3 4 5 1
3 2 59 3 4 5 1 2
3 4 20 3 4
3 5 43 3 4 5
From To Cost Path
4 1 29 4 5 1
4 2 39 4 5 1 2
4 3 69 4 5 1 2 3
4 5 23 4 5
From To Cost Path
5 1 6 5 1
5 2 16 5 1 2
5 3 46 5 1 2 3
5 4 66 5 1 2 3 4
</pre>
==[[Floyd-Warshall algorithm]]==
[[File:FloydGraph.png|thumb|250px|Graph used in this and Dijkstra's algorithm]]
The [http://en.wikipedia.org/wiki/Floyd-Warshall_algorithm Floyd algorithm or Floyd-Warshall algorithm] finds the shortest path between all pairs of nodes in a weighted, directed graph. It is an example of dynamic programming.
Usage: fill in the number of nodes (n) and the edge distances or costs in sub Floyd or in sub FloydWithPaths.
Then run "Floyd" or "FloydWithPaths".
Floyd: this sub prints the lengths or costs of the shortest paths but not the paths themselves
FloydWithPaths: this sub prints the lengths and the nodes along the paths
<lang vb>
Option Compare Database
'Floyd globals
Const MaxGraph As Integer = 100 'max. number of vertices in graph
Const Infinity = 1E+308
Dim E(1 To MaxGraph, 1 To MaxGraph) As Double
Dim A(1 To MaxGraph, 1 To MaxGraph) As Double
Dim Nxt(1 To MaxGraph, 1 To MaxGraph) As Integer
Public Sub SolveFloyd(n)
'Floyd's algorithm: all-pairs shortest-paths cost
'returns the cost (distance) of the least-cost (shortest) path
'between all pairs in a labeled directed graph
'note: this sub returns only the costs, not the paths!
'
'inputs:
' n : number of vertices (maximum value is maxGraph)
' E(i,j) : cost (length,...) of edge from i to j or "Infinity" if no edge between i and j
'output:
' A(i,j): minimal cost for path from i to j
'constant:
' Infinity : very large number
For i = 1 To n
For j = 1 To n
If E(i, j) <> Infinity Then A(i, j) = E(i, j) Else A(i, j) = Infinity
Next j
A(i, i) = 0
Next i
For k = 1 To n
For i = 1 To n
For j = 1 To n
If A(i, k) + A(k, j) < A(i, j) Then A(i, j) = A(i, k) + A(k, j)
Next j
Next i
Next k
End Sub
Public Sub SolveFloydWithPaths(n)
'cf. SolveFloyd, but here we
'use matrix "Nxt" to store information about paths
For i = 1 To n
For j = 1 To n
If E(i, j) <> Infinity Then A(i, j) = E(i, j) Else A(i, j) = Infinity
Next j
A(i, i) = 0
Next i
For k = 1 To n
For i = 1 To n
For j = 1 To n
If A(i, k) + A(k, j) < A(i, j) Then
A(i, j) = A(i, k) + A(k, j)
Nxt(i, j) = k
End If
Next j
Next i
Next k
End Sub
Public Function GetPath(i, j) As String
'recursively reconstruct shortest path from i to j using A and Nxt
If A(i, j) = Infinity Then
GetPath = "No path!"
Else
tmp = Nxt(i, j)
If tmp = 0 Then
GetPath = " " 'there is an edge from i to j
Else
GetPath = GetPath(i, tmp) & Format$(tmp) & GetPath(tmp, j)
End If
End If
End Function
Public Sub Floyd()
'main function to apply Floyd's algorithm
'see description in wp:en:Floyd-Warshall algorithm
' define problem:
' number of vertices?
n = 5
' reset connection/cost per edge matrix
For i = 1 To n
For j = 1 To n
E(i, j) = Infinity
Next j
Next i
' fill in the edge costs
E(1, 2) = 10
E(1, 3) = 50
E(1, 4) = 65
E(2, 3) = 30
E(2, 5) = 4
E(3, 4) = 20
E(3, 5) = 44
E(4, 2) = 70
E(4, 5) = 23
E(5, 1) = 6
'Solve it
SolveFloyd n
'Print solution
'note: for large graphs the output may be too large for the Immediate panel
'in that case you could send the output to a text file
Debug.Print "From", "To", "Cost"
For i = 1 To n
For j = 1 To n
If i <> j Then Debug.Print i, j, IIf(A(i, j) = Infinity, "No path!", A(i, j))
Next j
Next i
End Sub
Public Sub FloydWithPaths()
'main function to solve Floyd's algorithm and return shortest path between
'any two vertices
' define problem:
' number of vertices?
n = 5
' reset connection/cost per edge matrix
For i = 1 To n
For j = 1 To n
E(i, j) = Infinity
Nxt(i, j) = 0
Next j
Next i
' fill in the edge costs
E(1, 2) = 10
E(1, 3) = 50
E(1, 4) = 65
E(2, 3) = 30
E(2, 5) = 4
E(3, 4) = 20
E(3, 5) = 44
E(4, 2) = 70
E(4, 5) = 23
E(5, 1) = 6
'Solve it
SolveFloydWithPaths n
'Print solution
'note: for large graphs the output may be too large for the Immediate panel
'in that case you could send the output to a text file
Debug.Print "From", "To", "Cost", "Via"
For i = 1 To n
For j = 1 To n
If i <> j Then Debug.Print i, j, IIf(A(i, j) = Infinity, "---", A(i, j)), GetPath(i, j)
Next j
Next i
End Sub
</lang>
Output:
<pre>Floyd
From To Cost
1 2 10
1 3 40
1 4 60
1 5 14
2 1 10
2 3 30
2 4 50
2 5 4
3 1 49
3 2 59
3 4 20
3 5 43
4 1 29
4 2 39
4 3 69
4 5 23
5 1 6
5 2 16
5 3 46
5 4 66
FloydWithPaths
From To Cost Via
1 2 10
1 3 40 2
1 4 60 2 3
1 5 14 2
2 1 10 5
2 3 30
2 4 50 3
2 5 4
3 1 49 4 5
3 2 59 4 5 1
3 4 20
3 5 43 4
4 1 29 5
4 2 39 5 1
4 3 69 5 1 2
4 5 23
5 1 6
5 2 16 1
5 3 46 1 2
5 4 66 1 2 3
</pre>
==[[KWIC index]]==
<lang vb>
'KWIC index
'assumptions:
' - all titles and catalog numbers can be held in an array in main memory
' - disregard punctuation in titles
' - the KWIC index itself may be too large for main memory - do not store it in memory
' - the KWIC index consists of one line per title/keyword combination and consists of:
' - the catalog number
' - the title with the keyword centered in a line of given length (e.g. 80 or 120)
' (constant-width font assumed)
' note: long titles may be truncated at the beginning or the end of the line
'globals
Const MAXKEYS = 20 'max. number of keywords in a title
Const STOPWORDS = "a an and by for is it of on or the to with " 'that last space is needed!
Dim title() As String 'list of titles to be included in KWIC index
Dim catno() As Integer 'list of catalog numbers
Dim ntitle As Integer 'number of titles
Dim index() As Integer 'holds title number and position of keyword in title
Dim nkeys As Long 'total number of keywords found
Sub ReadTitles()
' read or - in this case - set the titles and catalog numbers
ntitle = 10
ReDim title(1 To ntitle)
ReDim catno(1 To ntitle)
title(1) = "Microsoft Visio 2003 User's Guide"
title(2) = "Microsoft Office Excel 2003 Inside Out"
title(3) = "Mastering Excel 2003 Programming with VBA"
title(4) = "Excel 2003 Formulas"
title(5) = "Excel for Scientists and Engineers"
title(6) = "Excel 2003 VBA Programmer's Reference"
title(7) = "Automated Data Analysis Using Excel"
title(8) = "Beginning Excel: What-if Data Analysis Tools"
title(9) = "How to do Everything with Microsoft Office Excel 2003"
title(10) = "Data Analysis Using SQL and Excel"
catno(1) = 10
catno(2) = 13
catno(3) = 3435
catno(4) = 987
catno(5) = 1010
catno(6) = 1244
catno(7) = 709
catno(8) = 9088
catno(9) = 33
catno(10) = 7733
End Sub
Function IsStopword(aword) As Boolean
'search for aword in stopword list
'add an extra space to avoid ambiguity
IsStopword = InStr(STOPWORDS, LCase(aword) & " ") > 0
End Function
Sub ProcessTitles()
'find positions of keywords in titles, store in index array
'Note: we cannot use Split here because that function doesn't return
'the positions of the words it finds
nkeys = 0
For i = 1 To ntitle
atitle = title(i) & " " 'add extra space as sentinel
p1 = 1
Do While p1 <= Len(atitle)
'find next word:
'a) find next non-space
While Mid$(atitle, p1, 1) = " ": p1 = p1 + 1: Wend
'b) extend word
p2 = p1
While Mid$(atitle, p2, 1) <> " ": p2 = p2 + 1: Wend
aword = Mid$(atitle, p1, p2 - p1)
'for now we assume there is no punctuation, i.e. no words
'in parentheses, brackets or quotation marks
If Not IsStopword(aword) Then
'remember position of this keyword
'we probably should check for overflow (too many keywords) here!
nkeys = nkeys + 1
index(nkeys, 1) = i
index(nkeys, 2) = p1
End If
p1 = p2 + 1
Loop
Next i
End Sub
Function Shift(aString, pos)
'return shifted string (part beginning at position "pos" followed by part before it)
Shift = Mid$(aString, pos) & " " & Left$(aString, pos - 1)
End Function
Sub SortTitles()
' sort the index() array to represent shifted titles in alphabetical order
' more efficient sorting algorithms can be applied here...
switched = True
Do While switched
'scan array for two shifted strings in the wrong order and swap
'(swap the index entries, not the strings)
'use case-insensitive compare
switched = False
For i = 1 To nkeys - 1
string1 = LCase(Shift(title(index(i, 1)), index(i, 2)))
string2 = LCase(Shift(title(index(i + 1, 1)), index(i + 1, 2)))
If string2 < string1 Then 'swap
For j = 1 To 2
temp = index(i, j)
index(i, j) = index(i + 1, j)
index(i + 1, j) = temp
Next
switched = True
End If
Next i
Loop
End Sub
Sub PrintKWIC(linelength)
'print the KWIC index
spaces = Space(linelength / 2)
Debug.Print "Cat. number", "|"; Space((linelength - 10) / 2); "KWIC string"
Debug.Print String(linelength + 15, "-")
For i = 1 To nkeys
atitle = title(index(i, 1))
pos = index(i, 2)
'create shifted string so that keyword is centered in the line
part2 = Mid$(atitle, pos)
part1 = Right$(spaces & Left$(atitle, pos - 1), linelength / 2)
kwicstring = Right$(part1, linelength / 2) & Left$(part2, linelength / 2)
Debug.Print catno(index(i, 1)), "|"; kwicstring
Next
End Sub
Sub KWIC()
'main program for KWIC index
ReadTitles
'set array
ReDim index(ntitle * MAXKEYS, 2)
'index(.,1) is title nr.
'index(.,2) is keyword position in title
ProcessTitles
SortTitles
PrintKWIC 80 'argument is the length of the KWIC lines (excluding catalog numbers)
End Sub
</lang>
Output (note that some titles are truncated at the start or the end. An improvement could be to wrap these titles around if there is room on the other end):
<pre>
kwic
Cat. number | KWIC string
-----------------------------------------------------------------------------------------------
987 | Excel 2003 Formulas
33 | Everything with Microsoft Office Excel 2003
13 | Microsoft Office Excel 2003 Inside Out
3435 | Mastering Excel 2003 Programming with VBA
10 | Microsoft Visio 2003 User's Guide
1244 | Excel 2003 VBA Programmer's Reference
9088 | Beginning Excel: What-if Data Analysis Tools
709 | Automated Data Analysis Using Excel
7733 | Data Analysis Using SQL and Excel
709 | Automated Data Analysis Using Excel
9088 | Beginning Excel: What-if Data Analysis T
9088 | Beginning Excel: What-if Data Analysis Tools
709 | Automated Data Analysis Using Excel
7733 | Data Analysis Using SQL and Excel
33 | How to do Everything with Microsoft Office Exce
1010 | Excel for Scientists and Engineers
33 | How to do Everything with Microsoft Office Excel 2
987 | Excel 2003 Formulas
33 | to do Everything with Microsoft Office Excel 2003
13 | Microsoft Office Excel 2003 Inside Out
3435 | Mastering Excel 2003 Programming with VBA
1244 | Excel 2003 VBA Programmer's Reference
709 | Automated Data Analysis Using Excel
7733 | Data Analysis Using SQL and Excel
1010 | Excel for Scientists and Engineers
9088 | Beginning Excel: What-if Data Analysis Tools
987 | Excel 2003 Formulas
10 | Microsoft Visio 2003 User's Guide
33 | How to do Everything with Microsoft Offi
13 | Microsoft Office Excel 2003 Inside Out
3435 | Mastering Excel 2003 Programming with VB
33 | How to do Everything with Microsoft Office Excel 2003
13 | Microsoft Office Excel 2003 Inside Out
10 | Microsoft Visio 2003 User's Guide
33 | How to do Everything with Microsoft Office Excel 2003
13 | Microsoft Office Excel 2003 Inside Out
13 | Microsoft Office Excel 2003 Inside Out
1244 | Excel 2003 VBA Programmer's Reference
3435 | Mastering Excel 2003 Programming with VBA
1244 | Excel 2003 VBA Programmer's Reference
1010 | Excel for Scientists and Engineers
7733 | Data Analysis Using SQL and Excel
9088 | Beginning Excel: What-if Data Analysis Tools
10 | Microsoft Visio 2003 User's Guide
709 | Automated Data Analysis Using Excel
7733 | Data Analysis Using SQL and Excel
3435 | Mastering Excel 2003 Programming with VBA
1244 | Excel 2003 VBA Programmer's Reference
10 | Microsoft Visio 2003 User's Guide
9088 | Beginning Excel: What-if Data Analysis Tools
</pre>
==[[Hailstone sequence]]==
<lang>
Public Function Hailstone(aNumber As Long, Optional Printit As Boolean = False) As Long
'return length of Hailstone sequence for aNumber
'if optional argument Printit is true, print the sequence in the Immediate window
Dim nSteps As Long
Const NumbersPerLine = 10 'when printing, start a new line after this much numbers
nSteps = 1
If Printit Then Debug.Print aNumber,
While aNumber <> 1
If aNumber Mod 2 = 0 Then aNumber = aNumber / 2 Else aNumber = 3 * aNumber + 1
nSteps = nSteps + 1
If Printit Then Debug.Print aNumber,
If Printit And (nSteps Mod NumbersPerLine = 0) Then Debug.Print
Wend
If Printit Then Debug.Print "(Length:"; nSteps; ")"
Hailstone = nSteps
End Function
Public Sub HailstoneTest()
Dim theNumber As Long
Dim theSequenceLength As Long
Dim SeqLength As Long
Dim i as Long
'find and print the Hailstone sequence for 27 (note: the whole sequence, not just the first four and last four items!)
Debug.Print "Hailstone sequence for 27:"
theNumber = Hailstone(27, True)
'find the longest Hailstone sequence for numbers less than 100000.
theSequenceLength = 0
For i = 2 To 99999
SeqLength = Hailstone(i)
If SeqLength > theSequenceLength Then
theNumber = i
theSequenceLength = SeqLength
End If
Next i
Debug.Print theNumber; "has the longest sequence ("; theSequenceLength; ")."
End Sub
</lang>
Output:
<pre>
HailstoneTest
Hailstone sequence for 27:
27 82 41 124 62 31 94 47 142 71
214 107 322 161 484 242 121 364 182 91
274 137 412 206 103 310 155 466 233 700
350 175 526 263 790 395 1186 593 1780 890
445 1336 668 334 167 502 251 754 377 1132
566 283 850 425 1276 638 319 958 479 1438
719 2158 1079 3238 1619 4858 2429 7288 3644 1822
911 2734 1367 4102 2051 6154 3077 9232 4616 2308
1154 577 1732 866 433 1300 650 325 976 488
244 122 61 184 92 46 23 70 35 106
53 160 80 40 20 10 5 16 8 4
2 1 (Length: 112 )
77031 has the longest sequence ( 351 ).
</pre>
|