Jump to content

Matrix multiplication: Difference between revisions

m
Line 102:
 
Alternatively, for multicore CPUs, the following recursive algorithm can be used:
<u>int</u> default upb := 1;
SEMA spare cpus = LEVEL 2; # increase to match the number of CPU spare to allocate #
<u>mode</u> <u>field</u> = <u>long</u> <u>real</u>;
<u>mode</u> <u>vec</u> = [default upb]<u>field</u>;
<u>mode</u> <u>mat</u> = [default upb, default upb]<u>field</u>;
# crude exception handling #
# define an operator to find the midpoint in an array #
<u>proc</u> <u>void</u> raise index error := <u>void</u>: <u>goto</u> exception index error;
OP MID = (VEC v)INT: ( LWB v + UPB v ) OVER 2,
MID = (INT upb, MAT m)INT: ( upb LWB m + upb UPB m ) OVER 2;
PRIO MID = 8; # Operator priority - same as LWB & UPB #
<u>sema</u> idle cpus = <u>level</u> ( 8 - 1 ); # number of 8 CPU cores minus parent #
OP * = (VEC a, b)FIELD: ( # dot product #
IF LWB a/=LWB b OR UPB a/=UPB b THEN raise index error FI;
# define an operator to slice array into quarters #
IF LWB a = UPB a THEN
<u>op</u> <u>top</u> = (<u>mat</u> m)<u>int</u>: ( ⌊m + ⌈m ) %2,
a[UPB a] * b[UPB b]
<u>bot</u> = (<u>mat</u> m)<u>int</u>: <u>top</u> m + 1,
ELSE
<u>left</u> = (<u>mat</u> m)<u>int</u>: ( 2 ⌊m + 2 ⌈m ) %2,
FIELD begin, end;
<u>right</u> = (<u>mat</u> m)<u>int</u>: <u>left</u> m + 1,
[]PROC VOID thread list=(
<u>left</u> = (<u>vec</u> v)<u>int</u>: ( ⌊v + ⌈v ) %2,
VOID: begin:=a[:MID a] * b[:MID b],
<u>right</u> = (<u>vec</u> v)<u>int</u>: <u>left</u> v + 1;
VOID: end :=a[MID a+1:] * b[MID b+1:]
<u>prio</u> <u>top</u> = 8, <u>bot</u> = 8, <u>left</u> = 8, <u>right</u> = 8; # Operator priority - same as LWB & UPB #
<u>op</u> × = (<u>vec</u> a, b)<u>field</u>: ( # dot product #
(⌊a≠⌊b ∨⌈a≠⌈b |raise index error );
(⌊a = ⌈a |
a[⌈a] × b[⌈b]
|
<u>field</u> begin, end;
[]<u>proc</u> <u>void</u> thread schedule=(
<u>void</u>: begin:=a[:<u>left</u> a] × b[:<u>left</u> b],
<u>void</u>: end :=a[<u>right</u> a:] × b[<u>right</u> b:]
);
(<u>level</u> idle cpus = 0 |# use current CPU #
<u>for</u> thread <u>to</u> ⌈thread schedule <u>do</u> thread schedule[thread] <u>od</u>
|
<u>par</u> ( # run vector in parallel #
thread schedule[1], # assume parent CPU #
( ↓idle cpus; thread schedule[2]; ↑idle cpus)
)
);
IF LEVEL spare cpus <= 1 THEN ( # use existing CPU #
FOR thread TO UPB thread list DO thread list[thread] OD
) ELSE PAR ( # run in parallel #
( DOWN spare cpus; thread list[1]; UP spare cpus),
( DOWN spare cpus; thread list[2]; UP spare cpus)
) FI;
begin+end
FI)
);
OP<u>op</u> *× = (MAT<u>mat</u> a, b)MAT<u>mat</u>:( # matrix multiply #
[UPB(⌊a a,= 2⌈a UPB∧2 b]⌊b = 2 FIELD⌈b out;|
IF LWB a[⌊a, =] UPB× a ANDb[, 2 LWB⌈b] b# =dot 2 UPB bproduct THEN#
|
out := a[LWB a, ] * b[, 2 UPB b]
[⌈a, 2 ⌈b] <u>field</u> out;
ELSE
IF (2 LWB a/=LWB b OR 2 UPB a/=UPB⌊a≠⌊b b∨2 THEN⌈a≠⌈b |raise index error FI);
[]<u>struct</u>(<u>bool</u> required, <u>proc</u> <u>void</u> thread) schedule = (
[]PROC VOID thread list=(
( <u>true</u>, # calculate top left corner #
VOID: out[ :1 MID a, :2 MID b] := a[ :1 MID a, ] * b[, :2 MID b],
VOID: ( LWB a <u>void</= UPB a |u>: out[1 MID:<u>top</u> a+1:, :2 MID<u>left</u> b] := a[1 MID:<u>top</u> a+1:, ] *× b[, :2 MID<u>left</u> b]),
( ⌊a ≠ ⌈a, # calculate bottom left corner #
VOID: ( 2 LWB b /= 2 UPB b | out[ :1 MID a, 2 MID b+1:] := a[ :1 MID a, ] * b[, 2 MID b+1:]),
VOID: ( 2 LWB b <u>void</= 2 UPB b AND LWB a /= UPB a |u>: out[1 MID<u>bot</u> a+1:, 2 MID:<u>left</u> b+1:] := a[1 MID<u>bot</u> a+1:, ] *× b[, 2 MID:<u>left</u> b+1:]),
( 2 ⌊b ≠ 2 ⌈b, # calculate top right corner #
<u>void</u>: out[:<u>top</u> a, <u>right</u> b:] := a[:<u>top</u> a, ] × b[, <u>right</u> b:]),
( 2 ⌊b ≠ 2 ⌈b ∧⌊a ≠ ⌈a, # calculate bottom right corner #
<u>void</u>: out[<u>bot</u> a:, <u>right</u> b:] := a[<u>bot</u> a:, ] × b[, <u>right</u> b:])
);
IF(<u>level</u> LEVEL spareidle cpus <= 1 THEN (0 |# use existingcurrent CPU #
FOR<u>for</u> thread TO<u>to</u> UPB⌈schedule thread<u>do</u> list(required DO→schedule[thread] | thread list→schedule[thread] OD) <u>od</u>
|
) ELSE PAR ( # run in parallel #
<u>par</u> ( # run vector in parallel #
( DOWN spare cpus; thread list[1]; UP spare cpus),
( DOWN spare cpus; thread list→schedule[21];, UP# sparethread cpus)is always required, and assume parent CPU #
( DOWNrequired spare→schedule[4] | ↓idle cpus; thread list→schedule[34]; UP spare↑idle cpus),
# try to do opposite corners of matrix in parallel if CPUs are limited #
( DOWN spare cpus; thread list[4]; UP spare cpus)
( required →schedule[3] | ↓idle cpus; thread →schedule[3]; ↑idle cpus),
) FI
( required →schedule[2] | ↓idle cpus; thread →schedule[2]; ↑idle cpus)
FI;
out )
);
out
);
<u>format</u> real fmt = $g(-6,2)$; # width of 6, with no '+' sign, 2 decimals #
<u>proc</u> real matrix printf= (<u>format</u> real fmt, <u>mat</u> m)<u>void</u>:(
<u>format</u> vec fmt = $"("n(2 ⌈m-1)(f(real fmt)",")f(real fmt)")"$;
<u>format</u> matrix fmt = $x"("n(⌈m-1)(f(vec fmt)","lxx)f(vec fmt)");"$;
# finally print the result #
printf((matrix fmt,m))
);
# Some sample matrices to test #
<u>mat</u> a=((1, 1, 1, 1), # matrix A #
(2, 4, 8, 16),
(3, 9, 27, 81),
(4, 16, 64, 256));
<u>mat</u> b=(( 4 , -3 , 4/3, -1/4 ), # matrix B #
(-13/3, 19/4, -7/3, 11/24),
( 3/2, -2 , 7/6, -1/4 ),
( -1/6, 1/4, -1/6, 1/24));
<u>mat</u> c = a × b; # actual multiplication example of A x B #
print((" A x B =",new line));
real matrix printf(real fmt, c) ∎
exception index error:
putf(stand error, $x"Exception: index error."l$)
Note: In the ALGOL 68 Revised Report keywords, type and operators were permitted in a different typeface. Hence the underling reserved words above. Effectively this puts these tokens into a different name space.
 
=={{header|BASIC}}==
Cookies help us deliver our services. By using our services, you agree to our use of cookies.