Montgomery reduction

From Rosetta Code
This task has been flagged for clarification. Code on this page in its current state may be flagged incorrect once this task has been clarified. See this page's Talk page for discussion.
Montgomery reduction is a draft programming task. It is not yet considered ready to be promoted as a complete task, for reasons that should be found in its talk page.

Implement the Montgomery reduction algorithm, as explained in "Handbook of Applied Cryptography, Section 14.3.2, page 600. Montgomery reduction calculates , without having to divide by .

  • Let be a positive integer, and and integers such that , , and .
  • is usually chosen as , where = base (radix) in which the numbers in the calculation as represented in (so in ‘normal’ paper arithmetic, for computer implementations) and = number of digits in base
  • The numbers ( digits long), ( digits long), , , are known entities, a number (often represented as m_dash in code) = is precomputed.

See the Handbook of Applied Cryptography for brief introduction to theory and numerical example in radix 10. Individual chapters of the book can be viewed online as provided by the authors. The said algorithm can be found at [1] at page 600 (page 11 of pdf file)

Algorithm:

A ← T (temporary variable)
For i from 0 to (n-1) do the following:
   ui ← ai* m' mod b      // ai is the ith digit of A, ui is a single digit number in radix b
   A ← A + ui*m*bi
A ← A/bn
if A >= m,
   A ← A - m
Return (A)

11l

Translation of: Python
T Montgomery
   BigInt m
   Int n
   BigInt rrm

   F (m)
      .m = m
      .n = bits:length(m)
      .rrm = (BigInt(2) ^ (.n * 2)) % m

   F reduce(t)
      V a = t
      L 0 .< .n
         I (a % 2) == 1
            a += .m
         a I/= 2
      I a >= .m
         a -= .m
      R a

V m = BigInt(‘750791094644726559640638407699’)
V x1 = BigInt(‘540019781128412936473322405310’)
V x2 = BigInt(‘515692107665463680305819378593’)

V mont = Montgomery(m)
V t1 = x1 * mont.rrm
V t2 = x2 * mont.rrm

V r1 = mont.reduce(t1)
V r2 = mont.reduce(t2)
V r = BigInt(2) ^ mont.n

print(‘b : 2’)
print(‘n : ’mont.n)
print(‘r : ’r)
print(‘m : ’mont.m)
print(‘t1: ’t1)
print(‘t2: ’t2)
print(‘r1: ’r1)
print(‘r2: ’r2)
print()
print(‘Original x1       : ’x1)
print(‘Recovered from r1 : ’mont.reduce(r1))
print(‘Original x2       : ’x2)
print(‘Recovered from r2 : ’mont.reduce(r2))

print("\nMontgomery computation of x1 ^ x2 mod m:")
V prod = mont.reduce(mont.rrm)
V base = mont.reduce(x1 * mont.rrm)
V ex = x2
L bits:length(ex) > 0
   I (ex % 2) == 1
      prod = mont.reduce(prod * base)
   ex I/= 2
   base = mont.reduce(base * base)
print(mont.reduce(prod))
print("\nAlternate computation of x1 ^ x2 mod m:")
print(pow(x1, x2, m))
Output:
b : 2
n : 100
r : 1267650600228229401496703205376
m : 750791094644726559640638407699
t1: 323165824550862327179367294465482435542970161392400401329100
t2: 308607334419945011411837686695175944083084270671482464168730
r1: 440160025148131680164261562101
r2: 435362628198191204145287283255

Original x1       : 540019781128412936473322405310
Recovered from r1 : 540019781128412936473322405310
Original x2       : 515692107665463680305819378593
Recovered from r2 : 515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m:
151232511393500655853002423778

Alternate computation of x1 ^ x2 mod m:
151232511393500655853002423778

C

#include <stdio.h>
#include <stdlib.h>

typedef unsigned long long ulong;

const int BASE = 2;

struct Montgomery {
    ulong m;
    ulong rrm;
    int n;
};

int bitLength(ulong v) {
    int result = 0;
    while (v > 0) {
        v >>= 1;
        result++;
    }
    return result;
}

ulong modPow(ulong b, ulong e, ulong n) {
    ulong result = 1;

    if (n == 1) {
        return 0;
    }

    b = b % n;
    while (e > 0) {
        if (e % 2 == 1) {
            result = (result * b) % n;
        }
        e >>= 1;
        b = (b * b) % n;
    }

    return result;
}

struct Montgomery makeMontgomery(ulong m) {
    struct Montgomery result;

    if (m == 0 || (m & 1) == 0) {
        fprintf(stderr, "m must be greater than zero and odd");
        exit(1);
    }

    result.m = m;
    result.n = bitLength(m);
    result.rrm = (1ULL << (result.n * 2)) % m;

    return result;
}

ulong reduce(struct Montgomery mont, ulong t) {
    ulong a = t;
    int i;

    for (i = 0; i < mont.n; i++) {
        if ((a & 1) == 1) {
            a += mont.m;
        }
        a = a >> 1;
    }

    if (a >= mont.m) {
        a -= mont.m;
    }
    return a;
}

void check(ulong x1, ulong x2, ulong m) {
    struct Montgomery mont = makeMontgomery(m);
    ulong t1 = x1 * mont.rrm;
    ulong t2 = x2 * mont.rrm;

    ulong r1 = reduce(mont, t1);
    ulong r2 = reduce(mont, t2);
    ulong r = 1ULL << mont.n;

    printf("b :  %d\n", BASE);
    printf("n :  %d\n", mont.n);
    printf("r :  %llu\n", r);
    printf("m :  %llu\n", mont.m);
    printf("t1:  %llu\n", t1);
    printf("t2:  %llu\n", t2);
    printf("r1:  %llu\n", r1);
    printf("r2:  %llu\n", r2);
    printf("\n");
    printf("Original x1       : %llu\n", x1);
    printf("Recovered from r1 : %llu\n", reduce(mont, r1));
    printf("Original x2       : %llu\n", x2);
    printf("Recovered from r2 : %llu\n", reduce(mont, r2));

    printf("\nMontgomery computation of x1 ^ x2 mod m :\n");
    ulong prod = reduce(mont, mont.rrm);
    ulong base = reduce(mont, x1 * mont.rrm);
    ulong exp = x2;
    while (bitLength(exp) > 0) {
        if ((exp & 1) == 1) {
            prod = reduce(mont, prod * base);
        }
        exp >>= 1;
        base = reduce(mont, base * base);
    }
    printf("%llu\n", reduce(mont, prod));
    printf("\nAlternate computation of x1 ^ x2 mod m :\n");
    printf("%llu\n", modPow(x1, x2, m));
}

int main() {
    check(41, 11, 1549);

    return 0;
}
Output:
b :  2
n :  11
r :  2048
m :  1549
t1:  47601
t2:  12771
r1:  322
r2:  842

Original x1       : 41
Recovered from r1 : 41
Original x2       : 11
Recovered from r2 : 11

Montgomery computation of x1 ^ x2 mod m :
678

Alternate computation of x1 ^ x2 mod m :
678

C#

Translation of: D
using System;
using System.Numerics;

namespace MontgomeryReduction {
    public static class Helper {
        public static int BitLength(this BigInteger v) {
            if (v < 0) {
                v *= -1;
            }

            int result = 0;
            while (v > 0) {
                v >>= 1;
                result++;
            }

            return result;
        }
    }

    struct Montgomery {
        public static readonly int BASE = 2;

        public BigInteger m;
        public BigInteger rrm;
        public int n;

        public Montgomery(BigInteger m) {
            if (m < 0 || m.IsEven) throw new ArgumentException();

            this.m = m;
            n = m.BitLength();
            rrm = (BigInteger.One << (n * 2)) % m;
        }

        public BigInteger Reduce(BigInteger t) {
            var a = t;

            for (int i = 0; i < n; i++) {
                if (!a.IsEven) a += m;
                a = a >> 1;
            }
            if (a >= m) a -= m;
            return a;
        }
    }

    class Program {
        static void Main(string[] args) {
            var m = BigInteger.Parse("750791094644726559640638407699");
            var x1 = BigInteger.Parse("540019781128412936473322405310");
            var x2 = BigInteger.Parse("515692107665463680305819378593");

            var mont = new Montgomery(m);
            var t1 = x1 * mont.rrm;
            var t2 = x2 * mont.rrm;

            var r1 = mont.Reduce(t1);
            var r2 = mont.Reduce(t2);
            var r = BigInteger.One << mont.n;

            Console.WriteLine("b :  {0}", Montgomery.BASE);
            Console.WriteLine("n :  {0}", mont.n);
            Console.WriteLine("r :  {0}", r);
            Console.WriteLine("m :  {0}", mont.m);
            Console.WriteLine("t1:  {0}", t1);
            Console.WriteLine("t2:  {0}", t2);
            Console.WriteLine("r1:  {0}", r1);
            Console.WriteLine("r2:  {0}", r2);
            Console.WriteLine();
            Console.WriteLine("Original x1       : {0}", x1);
            Console.WriteLine("Recovered from r1 : {0}", mont.Reduce(r1));
            Console.WriteLine("Original x2       : {0}", x2);
            Console.WriteLine("Recovered from r2 : {0}", mont.Reduce(r2));

            Console.WriteLine();
            Console.WriteLine("Montgomery computation of x1 ^ x2 mod m :");
            var prod = mont.Reduce(mont.rrm);
            var @base = mont.Reduce(x1 * mont.rrm);
            var exp = x2;
            while (exp.BitLength() > 0) {
                if (!exp.IsEven) prod = mont.Reduce(prod * @base);
                exp >>= 1;
                @base = mont.Reduce(@base * @base);
            }
            Console.WriteLine(mont.Reduce(prod));
            Console.WriteLine();
            Console.WriteLine("Alternate computation of x1 ^ x2 mod m :");
            Console.WriteLine(BigInteger.ModPow(x1, x2, m));
        }
    }
}
Output:
b :  2
n :  100
r :  1267650600228229401496703205376
m :  750791094644726559640638407699
t1:  323165824550862327179367294465482435542970161392400401329100
t2:  308607334419945011411837686695175944083084270671482464168730
r1:  440160025148131680164261562101
r2:  435362628198191204145287283255

Original x1       : 540019781128412936473322405310
Recovered from r1 : 540019781128412936473322405310
Original x2       : 515692107665463680305819378593
Recovered from r2 : 515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m :
151232511393500655853002423778

Alternate computation of x1 ^ x2 mod m :
151232511393500655853002423778

C++

#include<iostream>
#include<conio.h>
using namespace std;
typedef unsigned long ulong;

int ith_digit_finder(long long n, long b, long i){
 /**
     n = number whose digits we need to extract
     b = radix in which the number if represented
     i = the ith bit (ie, index of the bit that needs to be extracted)
 **/
    while(i>0){
        n/=b;
        i--;
    }
    return (n%b);
}

long eeuclid(long m, long b, long *inverse){        /// eeuclid( modulus, num whose inv is to be found, variable to put inverse )
    /// Algorithm used from Stallings book
    long A1 = 1, A2 = 0, A3 = m,
         B1 = 0, B2 = 1, B3 = b,
         T1, T2, T3, Q;

         cout<<endl<<"eeuclid() started"<<endl;

        while(1){
            if(B3 == 0){
                *inverse = 0;
                return A3;      // A3 = gcd(m,b)
            }

            if(B3 == 1){
                *inverse = B2; // B2 = b^-1 mod m
                return B3;      // A3 = gcd(m,b)
            }

            Q = A3/B3;

            T1 = A1 - Q*B1;
            T2 = A2 - Q*B2;
            T3 = A3 - Q*B3;

            A1 = B1; A2 = B2; A3 = B3;
            B1 = T1; B2 = T2; B3 = T3;

       }
    cout<<endl<<"ending eeuclid() "<<endl;
}

long long mon_red(long m, long m_dash, long T, int n, long b = 2){
/**
    m = modulus
    m_dash = m' = -m^-1 mod b
    T = number whose modular reduction is needed, the o/p of the function is TR^-1 mod m
    n = number of bits in m (2n is the number of bits in T)
    b = radix used (for practical implementations, is equal to 2, which is the default value)
**/
    long long A,ui, temp, Ai;       // Ai is the ith bit of A, need not be llong long probably
    if( m_dash < 0 ) m_dash = m_dash + b;
    A = T;
    for(int i = 0; i<n; i++){
    ///    ui = ( (A%b)*m_dash ) % b;        // step 2.1; A%b gives ai (MISTAKE -- A%b will always give the last digit of A if A is represented in base b); hence we need the function ith_digit_finder()
        Ai = ith_digit_finder(A, b, i);
        ui = ( ( Ai % b) * m_dash ) % b;
        temp  = ui*m*power(b, i);
        A = A + temp;
    }
    A = A/power(b, n);
    if(A >= m) A = A - m;
    return A;
}

int main(){
    long a, b, c, d=0, e, inverse = 0;
    cout<<"m >> ";
    cin >> a;
    cout<<"T >> ";
    cin>>b;
    cout<<"Radix b >> ";
    cin>>c;
    eeuclid(c, a, &d);      // eeuclid( modulus, num whose inverse is to be found, address of variable which is to store inverse)
    e = mon_red(a, -d, b, length_finder(a, c), c);
    cout<<"Montgomery domain representation = "<<e;
    return 0;
}

D

Translation of: Kotlin
import std.bigint;
import std.stdio;

int bitLength(BigInt v) {
    if (v < 0) {
        v *= -1;
    }

    int result = 0;
    while (v > 0) {
        v >>= 1;
        result++;
    }

    return result;
}

/// https://en.wikipedia.org/wiki/Modular_exponentiation#Right-to-left_binary_method
BigInt modPow(BigInt b, BigInt e, BigInt n) {
    if (n == 1) return BigInt(0);
    BigInt result = 1;
    b = b % n;
    while (e > 0) {
        if (e % 2 == 1) {
            result = (result * b) % n;
        }
        e >>= 1;
        b = (b*b) % n;
    }
    return result;
}

struct Montgomery {
    BigInt m;
    int n;
    BigInt rrm;

    this(BigInt m) in {
        assert(m > 0 && (m & 1) != 0); // must be positive and odd
    } body {
        this.m = m;
        n = m.bitLength();
        rrm = (BigInt(1) << (n * 2)) % m;
    }

    BigInt reduce(BigInt t) {
        auto a = t;

        foreach(i; 0..n) {
            if ((a & 1) == 1) a += m;
            a = a >> 1;
        }
        if (a >= m) a -= m;
        return a;
    }

    enum BASE = 2;
}

void main() {
    auto m = BigInt("750791094644726559640638407699");
    auto x1 = BigInt("540019781128412936473322405310");
    auto x2 = BigInt("515692107665463680305819378593");

    auto mont = Montgomery(m);
    auto t1 = x1 * mont.rrm;
    auto t2 = x2 * mont.rrm;

    auto r1 = mont.reduce(t1);
    auto r2 = mont.reduce(t2);
    auto r = BigInt(1) << mont.n;

    writeln("b :  ", Montgomery.BASE);
    writeln("n :  ", mont.n);
    writeln("r :  ", r);
    writeln("m :  ", mont.m);
    writeln("t1:  ", t1);
    writeln("t2:  ", t2);
    writeln("r1:  ", r1);
    writeln("r2:  ", r2);
    writeln();
    writeln("Original x1       : ", x1);
    writeln("Recovered from r1 : ", mont.reduce(r1));
    writeln("Original x2       : ", x2);
    writeln("Recovered from r2 : ", mont.reduce(r2));

    writeln("\nMontgomery computation of x1 ^ x2 mod m :");
    auto prod = mont.reduce(mont.rrm);
    auto base = mont.reduce(x1 * mont.rrm);
    auto exp = x2;
    while (exp.bitLength() > 0) {
        if ((exp & 1) == 1) prod = mont.reduce(prod * base);
        exp >>= 1;
        base = mont.reduce(base * base);
    }
    writeln(mont.reduce(prod));
    writeln("\nAlternate computation of x1 ^ x2 mod m :");
    writeln(x1.modPow(x2, m));
}
Output:
b :  2
n :  100
r :  1267650600228229401496703205376
m :  750791094644726559640638407699
t1:  323165824550862327179367294465482435542970161392400401329100
t2:  308607334419945011411837686695175944083084270671482464168730
r1:  440160025148131680164261562101
r2:  435362628198191204145287283255

Original x1       : 540019781128412936473322405310
Recovered from r1 : 540019781128412936473322405310
Original x2       : 515692107665463680305819378593
Recovered from r2 : 515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m :
151232511393500655853002423778

Alternate computation of x1 ^ x2 mod m :
151232511393500655853002423778

Factor

Translation of: Sidef
Works with: Factor version 0.99 2020-08-14
USING: io kernel locals math math.bitwise math.functions
prettyprint ;

: montgomery-reduce ( m a -- n )
    over bit-length [ dup odd? [ over + ] when 2/ ] times
    swap mod ;

CONSTANT: m 750791094644726559640638407699
CONSTANT: t1 323165824550862327179367294465482435542970161392400401329100

CONSTANT: r1 440160025148131680164261562101
CONSTANT: r2 435362628198191204145287283255

CONSTANT: x1 540019781128412936473322405310
CONSTANT: x2 515692107665463680305819378593

"Original x1:       " write x1 .
"Recovered from r1: " write m r1 montgomery-reduce .
"Original x2:       " write x2 .
"Recovered from r2: " write m r2 montgomery-reduce .

nl "Montgomery computation of x1^x2 mod m:    " write

[let
    m t1 x1 / montgomery-reduce :> prod!
    m t1 montgomery-reduce :> base!
    x2 :> exponent!

    [ exponent zero? ] [
        exponent odd?
        [ m prod base * montgomery-reduce prod! ] when
        m base sq montgomery-reduce base! exponent 2/ exponent!
    ] until

    m prod montgomery-reduce .
    "Library-based computation of x1^x2 mod m: " write
    x1 x2 m ^mod .
]
Output:
Original x1:       540019781128412936473322405310
Recovered from r1: 540019781128412936473322405310
Original x2:       515692107665463680305819378593
Recovered from r2: 515692107665463680305819378593

Montgomery computation of x1^x2 mod m:    151232511393500655853002423778
Library-based computation of x1^x2 mod m: 151232511393500655853002423778

Go

package main

import (
    "fmt"
    "math/big"
    "math/rand"
    "time"
)

// mont holds numbers useful for working in Mongomery representation.
type mont struct {
    n  uint     // m.BitLen()
    m  *big.Int // modulus, must be odd
    r2 *big.Int // (1<<2n) mod m
}

// constructor
func newMont(m *big.Int) *mont {
    if m.Bit(0) != 1 {
        return nil
    }
    n := uint(m.BitLen())
    x := big.NewInt(1)
    x.Sub(x.Lsh(x, n), m)
    return &mont{n, new(big.Int).Set(m), x.Mod(x.Mul(x, x), m)}
}

// Montgomery reduction algorithm
func (m mont) reduce(t *big.Int) *big.Int {
    a := new(big.Int).Set(t)
    for i := uint(0); i < m.n; i++ {
        if a.Bit(0) == 1 {
            a.Add(a, m.m)
        }
        a.Rsh(a, 1)
    }
    if a.Cmp(m.m) >= 0 {
        a.Sub(a, m.m)
    }
    return a
}

// example use:
func main() {
    const n = 100 // bit length for numbers in example

    // generate random n-bit odd number for modulus m
    rnd := rand.New(rand.NewSource(time.Now().UnixNano()))
    one := big.NewInt(1)
    r1 := new(big.Int).Lsh(one, n-1)
    r2 := new(big.Int).Lsh(one, n-2)
    m := new(big.Int)
    m.Or(r1, m.Or(m.Lsh(m.Rand(rnd, r2), 1), one))

    // make Montgomery reduction object around m
    mr := newMont(m)

    // generate a couple more numbers in the range 0..m.
    // these are numbers we will do some computations on, mod m.
    x1 := new(big.Int).Rand(rnd, m)
    x2 := new(big.Int).Rand(rnd, m)

    // t1, t2 are examples of T, from the task description.
    // Generated this way, they will be in the range 0..m^2, and so < mR.
    t1 := new(big.Int).Mul(x1, mr.r2)
    t2 := new(big.Int).Mul(x2, mr.r2)

    // reduce.  r1 and r2 are now montgomery representations of x1 and x2.
    r1 = mr.reduce(t1)
    r2 = mr.reduce(t2)

    // this is the end of what is described in the task so far.
    fmt.Println("b:  2")
    fmt.Println("n: ", mr.n)
    fmt.Println("r: ", new(big.Int).Lsh(one, mr.n))
    fmt.Println("m: ", mr.m)
    fmt.Println("t1:", t1)
    fmt.Println("t2:", t2)
    fmt.Println("r1:", r1)
    fmt.Println("r2:", r2)

    // but now demonstrate that it works:
    fmt.Println()
    fmt.Println("Original x1:       ", x1)
    fmt.Println("Recovererd from r1:", mr.reduce(r1))
    fmt.Println("Original x2:       ", x2)
    fmt.Println("Recovererd from r2:", mr.reduce(r2))

    // and demonstrate a use:
    fmt.Println("\nMontgomery computation of x1 ^ x2 mod m:")
    // this is the modular exponentiation algorithm, except we call
    // mont.reduce instead of using a mod function.
    prod := mr.reduce(mr.r2)             // 1
    base := mr.reduce(t1.Mul(x1, mr.r2)) // x1^1
    exp := new(big.Int).Set(x2)          // not reduced
    for exp.BitLen() > 0 {
        if exp.Bit(0) == 1 {
            prod = mr.reduce(prod.Mul(prod, base))
        }
        exp.Rsh(exp, 1)
        base = mr.reduce(base.Mul(base, base))
    }
    fmt.Println(mr.reduce(prod))

    // show library-based equivalent computation as a check
    fmt.Println("\nLibrary-based computation of x1 ^ x2 mod m:")
    fmt.Println(new(big.Int).Exp(x1, x2, m))
}
Output:
b:  2
n:  100
r:  1267650600228229401496703205376
m:  750791094644726559640638407699
t1: 323165824550862327179367294465482435542970161392400401329100
t2: 308607334419945011411837686695175944083084270671482464168730
r1: 440160025148131680164261562101
r2: 435362628198191204145287283255

Original x1:        540019781128412936473322405310
Recovererd from r1: 540019781128412936473322405310
Original x2:        515692107665463680305819378593
Recovererd from r2: 515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m:
151232511393500655853002423778

Library based computation of x1 ^ x2 mod m:
151232511393500655853002423778

Java

Translation of: Kotlin
import java.math.BigInteger;

public class MontgomeryReduction {
    private static final BigInteger ZERO = BigInteger.ZERO;
    private static final BigInteger ONE = BigInteger.ONE;
    private static final BigInteger TWO = BigInteger.valueOf(2);

    public static class Montgomery {
        public static final int BASE = 2;

        BigInteger m;
        BigInteger rrm;
        int n;

        public Montgomery(BigInteger m) {
            if (m.compareTo(BigInteger.ZERO) <= 0 || !m.testBit(0)) {
                throw new IllegalArgumentException();
            }
            this.m = m;
            this.n = m.bitLength();
            this.rrm = ONE.shiftLeft(n * 2).mod(m);
        }

        public BigInteger reduce(BigInteger t) {
            BigInteger a = t;
            for (int i = 0; i < n; i++) {
                if (a.testBit(0)) a = a.add(this.m);
                a = a.shiftRight(1);
            }
            if (a.compareTo(m) >= 0) a = a.subtract(this.m);
            return a;
        }
    }

    public static void main(String[] args) {
        BigInteger m  = new BigInteger("750791094644726559640638407699");
        BigInteger x1 = new BigInteger("540019781128412936473322405310");
        BigInteger x2 = new BigInteger("515692107665463680305819378593");

        Montgomery mont = new Montgomery(m);
        BigInteger t1 = x1.multiply(mont.rrm);
        BigInteger t2 = x2.multiply(mont.rrm);

        BigInteger r1 = mont.reduce(t1);
        BigInteger r2 = mont.reduce(t2);
        BigInteger r = ONE.shiftLeft(mont.n);

        System.out.printf("b :  %s\n", Montgomery.BASE);
        System.out.printf("n :  %s\n", mont.n);
        System.out.printf("r :  %s\n", r);
        System.out.printf("m :  %s\n", mont.m);
        System.out.printf("t1:  %s\n", t1);
        System.out.printf("t2:  %s\n", t2);
        System.out.printf("r1:  %s\n", r1);
        System.out.printf("r2:  %s\n", r2);
        System.out.println();
        System.out.printf("Original x1       :  %s\n", x1);
        System.out.printf("Recovered from r1 :  %s\n", mont.reduce(r1));
        System.out.printf("Original x2       :  %s\n", x2);
        System.out.printf("Recovered from r2 :  %s\n", mont.reduce(r2));

        System.out.println();
        System.out.println("Montgomery computation of x1 ^ x2 mod m :");
        BigInteger prod = mont.reduce(mont.rrm);
        BigInteger base = mont.reduce(x1.multiply(mont.rrm));
        BigInteger exp = x2;
        while (exp.bitLength()>0) {
            if (exp.testBit(0)) prod=mont.reduce(prod.multiply(base));
            exp = exp.shiftRight(1);
            base = mont.reduce(base.multiply(base));
        }
        System.out.println(mont.reduce(prod));

        System.out.println();
        System.out.println("Library-based computation of x1 ^ x2 mod m :");
        System.out.println(x1.modPow(x2, m));
    }
}
Output:
b :  2
n :  100
r :  1267650600228229401496703205376
m :  750791094644726559640638407699
t1:  323165824550862327179367294465482435542970161392400401329100
t2:  308607334419945011411837686695175944083084270671482464168730
r1:  440160025148131680164261562101
r2:  435362628198191204145287283255

Original x1       :  540019781128412936473322405310
Recovered from r1 :  540019781128412936473322405310
Original x2       :  515692107665463680305819378593
Recovered from r2 :  515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m :
151232511393500655853002423778

Library-based computation of x1 ^ x2 mod m :
151232511393500655853002423778

Julia

Translation of: Python
""" base 2 type Montgomery numbers """
struct Montgomery2
    m::BigInt
    n::Int64
    rrm::BigInt
end

function Montgomery2(x::BigInt)
    bitlen = length(string(x, base=2))
    r = (x == 0) ? 0 : (BigInt(1) << (bitlen * 2)) % x
    Montgomery2(x, bitlen, r)
end
Montgomery2(n) = Montgomery2(BigInt(n))

function reduce(mm::Montgomery2, t)
    a = BigInt(t)
    for i in 1:mm.n
        if isodd(a)
            a += mm.m
        end
        a >>= 1
    end
    return a >= mm.m ? a - mm.m : a
end

BASE(::Montgomery2) = 2

const mmm = Montgomery2(20)

function testmontgomery2()
    m = big"750791094644726559640638407699"
    x1 = big"540019781128412936473322405310"
    x2 = big"515692107665463680305819378593"

    mont = Montgomery2(m)
    t1 = x1 * mont.rrm
    t2 = x2 * mont.rrm
    r1 = reduce(mont, t1)
    r2 = reduce(mont, t2)
    r = 1 << mont.n
    println("b : ", BASE(mont))
    println("n : ", mont.n)
    println("r : ", r)
    println("m : ", mont.m)
    println("t1: ", t1)
    println("t2: ", t2)
    println("r1: ", r1)
    println("r2: ", r2)
    println()
    println("Original x1       :", x1)
    println("Recovered from r1 :", reduce(mont, r1))
    println("Original x2       :", x2)
    println("Recovered from r2 :", reduce(mont, r2))
    println("\nMontgomery computation of x1 ^ x2 mod m:")
    prod = reduce(mont, mont.rrm)
    base = reduce(mont, x1 * mont.rrm)
    pow = x2
    while pow > 0
        if isodd(pow)
            prod = reduce(mont, prod * base)
        end
        pow >>= 1
        base = reduce(mont, base * base)
    end
    println(reduce(mont, prod))
    println("\nAlternate computation of x1 ^ x2 mod m :")
    println(powermod(x1, x2, m))
end

testmontgomery2()
Output:
b : 2
n : 100
r : 0
m : 750791094644726559640638407699
t1: 323165824550862327179367294465482435542970161392400401329100
t2: 308607334419945011411837686695175944083084270671482464168730
r1: 440160025148131680164261562101
r2: 435362628198191204145287283255

Original x1       :540019781128412936473322405310
Recovered from r1 :540019781128412936473322405310
Original x2       :515692107665463680305819378593
Recovered from r2 :515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m:
151232511393500655853002423778

Alternate computation of x1 ^ x2 mod m :
151232511393500655853002423778

Kotlin

Translation of: Go
// version 1.1.3

import java.math.BigInteger

val bigZero = BigInteger.ZERO
val bigOne  = BigInteger.ONE
val bigTwo  = BigInteger.valueOf(2L)

class Montgomery(val m: BigInteger) {
    val n:   Int
    val rrm: BigInteger

    init {
        require(m > bigZero && m.testBit(0)) // must be positive and odd
        n = m.bitLength()       
        rrm = bigOne.shiftLeft(n * 2).mod(m)
    }

    fun reduce(t: BigInteger): BigInteger {
        var a = t
        for (i in 0 until n) {
            if (a.testBit(0)) a += m
            a = a.shiftRight(1)
        }
        if (a >= m) a -= m
        return a
    }

    companion object {
        const val BASE = 2
    }
}

fun main(args: Array<String>) {
    val m  = BigInteger("750791094644726559640638407699")
    val x1 = BigInteger("540019781128412936473322405310")
    val x2 = BigInteger("515692107665463680305819378593")

    val mont = Montgomery(m)
    val t1 = x1 * mont.rrm
    val t2 = x2 * mont.rrm

    val r1 = mont.reduce(t1)
    val r2 = mont.reduce(t2)
    val r  = bigOne.shiftLeft(mont.n)

    println("b :  ${Montgomery.BASE}")
    println("n :  ${mont.n}")
    println("r :  $r")
    println("m :  ${mont.m}")   
    println("t1:  $t1")
    println("t2:  $t2")
    println("r1:  $r1")
    println("r2:  $r2")
    println()
    println("Original x1       : $x1")
    println("Recovered from r1 : ${mont.reduce(r1)}")
    println("Original x2       : $x2")
    println("Recovered from r2 : ${mont.reduce(r2)}")

    println("\nMontgomery computation of x1 ^ x2 mod m :")
    var prod = mont.reduce(mont.rrm)
    var base = mont.reduce(x1 * mont.rrm)
    var exp  = x2
    while (exp.bitLength() > 0) {
        if (exp.testBit(0)) prod = mont.reduce(prod * base)
        exp = exp.shiftRight(1)
        base = mont.reduce(base * base)
    }
    println(mont.reduce(prod)) 
    println("\nLibrary-based computation of x1 ^ x2 mod m :")
    println(x1.modPow(x2, m))
}
Output:
b :  2
n :  100
r :  1267650600228229401496703205376
m :  750791094644726559640638407699
t1:  323165824550862327179367294465482435542970161392400401329100
t2:  308607334419945011411837686695175944083084270671482464168730
r1:  440160025148131680164261562101
r2:  435362628198191204145287283255

Original x1       : 540019781128412936473322405310
Recovered from r1 : 540019781128412936473322405310
Original x2       : 515692107665463680305819378593
Recovered from r2 : 515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m :
151232511393500655853002423778

Library-based computation of x1 ^ x2 mod m :
151232511393500655853002423778

Nim

Translation of: D
Library: bignum
import bignum

# Missing functions in "bignum".

template isOdd(val: Int): bool =
  ## Needed as bignum "odd" function crashes.
  (val and 1) != 0

func exp(x, y, m: Int): Int =
  ## Missing "exp" function in "bignum".
  if m == 1: return newInt(0)
  result = newInt(1)
  var x = x mod m
  var y = y
  while y > 0:
    if y.isOdd:
      result = (result * x) mod m
    y = y shr 1
    x = (x * x) mod m


type Montgomery = object
  m: Int    # Modulus; must be odd.
  n: int    # m.bitLen().
  rrm: Int  # (1<<2n) mod m.

const Base = 2


func initMontgomery(m: Int): Montgomery =
  ## Initialize a Mongtgomery object.
  doAssert m > 0 and m.isOdd, "argument must be positive and odd."
  result.m = m
  result.n = m.bitLen
  result.rrm = newInt(1) shl culong(result.n * 2) mod m


func reduce(mont: Montgomery; t: Int): Int =
  ## Montgomery reduction algorithm.
  result = t
  for i in 0..<mont.n:
    if result.isOdd: inc result, mont.m
    result = result shr 1
  if result >= mont.m: dec result, mont.m


when isMainModule:

  let
    m = newInt("750791094644726559640638407699")
    x1 = newInt("540019781128412936473322405310")
    x2 = newInt("515692107665463680305819378593")

    mont = initMontgomery(m)
    t1 = x1 * mont.rrm
    t2 = x2 * mont.rrm

    r1 = mont.reduce(t1)
    r2 = mont.reduce(t2)
    r = newInt(1) shl culong(mont.n)

  echo "b:   ", Base
  echo "n:   ", mont.n
  echo "r:   ", r
  echo "m:   ", mont.m
  echo "t1:  ", t1
  echo "t2:  ", t2
  echo "r1:  ", r1
  echo "r2:  ", r2
  echo()
  echo "Original x1:       ", x1
  echo "Recovered from r1: ", mont.reduce(r1)
  echo "Original x2:       ", x2
  echo "Recovered from r2: ", mont.reduce(r2)

  echo "\nMontgomery computation of x1^x2 mod m:"
  var
    prod = mont.reduce(mont.rrm)
    base = mont.reduce(x1 * mont.rrm)
    e = x2
  while e > 0:
    if e.isOdd: prod = mont.reduce(prod * base)
    e = e shr 1
    base = mont.reduce(base * base)
  echo mont.reduce(prod)
  echo "\nAlternate computation of x1^x2 mod m:"
  echo x1.exp(x2, m)
Output:
b:   2
n:   100
r:   1267650600228229401496703205376
m:   750791094644726559640638407699
t1:  323165824550862327179367294465482435542970161392400401329100
t2:  308607334419945011411837686695175944083084270671482464168730
r1:  440160025148131680164261562101
r2:  435362628198191204145287283255

Original x1:       540019781128412936473322405310
Recovered from r1: 540019781128412936473322405310
Original x2:       515692107665463680305819378593
Recovered from r2: 515692107665463680305819378593

Montgomery computation of x1^x2 mod m:
151232511393500655853002423778

Alternate computation of x1^x2 mod m:
151232511393500655853002423778

Perl

Translation of: Raku
Library: ntheory
use bigint;
use ntheory qw(powmod);

sub msb {
  my ($n, $base) = (shift, 0);
  $base++ while $n >>= 1;
  $base;
}

sub montgomery_reduce {
    my($m, $a) = @_;
    for (0 .. msb($m)) {
        $a += $m if $a & 1;
        $a >>= 1
    }
    $a % $m
}

my $m  = 750791094644726559640638407699;
my $t1 = 323165824550862327179367294465482435542970161392400401329100;

my $r1 = 440160025148131680164261562101;
my $r2 = 435362628198191204145287283255;

my $x1 = 540019781128412936473322405310;
my $x2 = 515692107665463680305819378593;

printf "Original x1:       %s\n", $x1;
printf "Recovered from r1: %s\n", montgomery_reduce($m, $r1);
printf "Original x2:       %s\n", $x2;
printf "Recovered from r2: %s\n", montgomery_reduce($m, $r2);

print "\nMontgomery  computation x1**x2 mod m: ";
my $prod = montgomery_reduce($m, $t1/$x1);
my $base = montgomery_reduce($m, $t1);

for (my $exponent = $x2; $exponent >= 0; $exponent >>= 1) {
    $prod = montgomery_reduce($m, $prod * $base) if $exponent & 1;
    $base = montgomery_reduce($m, $base * $base);
    last if $exponent == 0;
}

print montgomery_reduce($m, $prod) . "\n";
printf "Built-in op computation x1**x2 mod m: %s\n", powmod($x1, $x2, $m);
Output:
Original x1:       540019781128412936473322405310
Recovered from r1: 540019781128412936473322405310
Original x2:       515692107665463680305819378593
Recovered from r2: 515692107665463680305819378593

Montgomery  computation x1**x2 mod m: 151232511393500655853002423778
Built-in op computation x1**x2 mod m: 151232511393500655853002423778

Phix

Translation of: D
Library: Phix/mpfr
with javascript_semantics 
include mpfr.e
 
enum BASE, BITLEN, MODULUS, RRM
 
function reduce(sequence mont, mpz a)
    integer n = mont[BITLEN]
    mpz m = mont[MODULUS],
        r = mpz_init_set(a)
    for i=1 to n do
        if mpz_odd(r) then
            mpz_add(r,r,m)
        end if
        {} = mpz_fdiv_q_ui(r,r,2)
    end for
    if mpz_cmp(r,m)>=0 then mpz_sub(r,r,m) end if
    return r
end function

function Montgomery(mpz m)
    if mpz_sign(m)=-1 then crash("must be positive") end if
    if not mpz_odd(m) then crash("must be odd") end if
    integer n = mpz_sizeinbase(m,2)
    mpz rrm = mpz_init(2)
    mpz_powm_ui(rrm,rrm,n*2,m)
    return {2,  -- BASE
            n,  -- BITLEN
            m,  -- MODULUS
            rrm -- 1<<(n*2) % m
           }
end function
 
mpz m = mpz_init("750791094644726559640638407699"),
    x1 = mpz_init("540019781128412936473322405310"),
    x2 = mpz_init("515692107665463680305819378593"),
    t1 = mpz_init(),
    t2 = mpz_init()
 
sequence mont = Montgomery(m)
mpz_mul(t1,x1,mont[RRM])
mpz_mul(t2,x2,mont[RRM])
mpz r1 = reduce(mont,t1),
    r2 = reduce(mont,t2),
    r = mpz_init()
mpz_ui_pow_ui(r,2,mont[BITLEN])
 
printf(1,"b :  %d\n", {mont[BASE]})
printf(1,"n :  %d\n", {mont[BITLEN]})
printf(1,"r :  %s\n", {mpz_get_str(r)})
printf(1,"m :  %s\n", {mpz_get_str(mont[MODULUS])})
printf(1,"t1:  %s\n", {mpz_get_str(t1)})
printf(1,"t2:  %s\n", {mpz_get_str(t2)})
printf(1,"r1:  %s\n", {mpz_get_str(r1)})
printf(1,"r2:  %s\n", {mpz_get_str(r2)})
printf(1,"\n")
printf(1,"Original x1       : %s\n", {mpz_get_str(x1)})
printf(1,"Recovered from r1 : %s\n", {mpz_get_str(reduce(mont,r1))})
printf(1,"Original x2       : %s\n", {mpz_get_str(x2)})
printf(1,"Recovered from r2 : %s\n", {mpz_get_str(reduce(mont,r2))})
 
printf(1,"\nMontgomery computation of x1 ^ x2 mod m :")
mpz prod = reduce(mont,mont[RRM])
mpz_mul(r,x1,mont[RRM])
mpz base = reduce(mont,r),
    expn = mpz_init_set(x2)
 
while mpz_cmp_si(expn,0)!=0 do
    if mpz_odd(expn) then
        mpz_mul(prod,prod,base)
        prod = reduce(mont,prod)
    end if
    {} = mpz_fdiv_q_ui(expn,expn,2)
    mpz_mul(base,base,base)
    base = reduce(mont,base)
end while
printf(1,"%s\n",{mpz_get_str(reduce(mont,prod))})
printf(1," alternate computation of x1 ^ x2 mod m :")
mpz_powm(r,x1,x2,m)
printf(1,"%s\n",{mpz_get_str(r)})
Output:
b :  2
n :  100
r :  1267650600228229401496703205376
m :  750791094644726559640638407699
t1:  323165824550862327179367294465482435542970161392400401329100
t2:  308607334419945011411837686695175944083084270671482464168730
r1:  440160025148131680164261562101
r2:  435362628198191204145287283255

Original x1       : 540019781128412936473322405310
Recovered from r1 : 540019781128412936473322405310
Original x2       : 515692107665463680305819378593
Recovered from r2 : 515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m :151232511393500655853002423778
 alternate computation of x1 ^ x2 mod m :151232511393500655853002423778

PicoLisp

(de **Mod (X Y N)
   (let M 1
      (loop
         (when (bit? 1 Y)
            (setq M (% (* M X) N)) )
         (T (=0 (setq Y (>> 1 Y))) M)
         (setq X (% (* X X) N)) ) ) )
(de rrm (M)
   (% (>> (- (* 2 Mbins)) 1) M) )
(de reduce (A)
   (do Mbins
      (and (bit? 1 A) (inc 'A M))
      (setq A (>> 1 A)) )
   (and (>= A M) (dec 'A M))
   A )
(let
   (M 750791094644726559640638407699
      Mbins (length (bin M))
      RRM (rrm M)
      X1 540019781128412936473322405310
      X2 515692107665463680305819378593
      T1 (* X1 RRM)
      T2 (* X2 RRM)
      R1 (reduce T1)
      R2 (reduce T2)
      R (>> (- Mbins) 1)
      Prod (reduce RRM)
      Base (reduce (* X1 RRM))
      Exp X2 )
   (println 'b ': 2)
   (println 'n ': Mbins)
   (println 'r ': R)
   (println 'm ': M)
   (println 't1 ': T1)
   (println 't2 ': T2)
   (println 'r1 ': R1)
   (println 'r2 ': R2)
   (prinl)
   (prinl "Original x1       : " X1)
   (prinl "Recovered from r1 : " (reduce R1))
   (prinl "Original x2       : " X2)
   (prinl "Recovered from r2 : " (reduce R2))
   (prinl)
   (prin "Montgomery computation of x1 \^ x2 mod m : ")
   (while (gt0 Exp)
      (and
         (bit? 1 Exp)
         (setq Prod (reduce (* Prod Base))) )
      (setq
         Exp (>> 1 Exp)
         Base (reduce (* Base Base)) ) )
   (prinl (reduce Prod))
   (prinl "Montgomery computation of x1 \^ x2 mod m : " (**Mod X1 X2 M)) )
Output:
b : 2
n : 100
r : 1267650600228229401496703205376
m : 750791094644726559640638407699
t1 : 323165824550862327179367294465482435542970161392400401329100
t2 : 308607334419945011411837686695175944083084270671482464168730
r1 : 440160025148131680164261562101
r2 : 435362628198191204145287283255

Original x1       : 540019781128412936473322405310
Recovered from r1 : 540019781128412936473322405310
Original x2       : 515692107665463680305819378593
Recovered from r2 : 515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m : 151232511393500655853002423778
Montgomery computation of x1 ^ x2 mod m : 151232511393500655853002423778

Python

TODO:
Update the output
Translation of: D
class Montgomery:
    BASE = 2

    def __init__(self, m):
        self.m = m
        self.n = m.bit_length()
        self.rrm = (1 << (self.n * 2)) % m

    def reduce(self, t):
        a = t
        for i in xrange(self.n):
            if (a & 1) == 1:
                a = a + self.m
            a = a >> 1
        if a >= self.m:
            a = a - self.m
        return a

# Main
m = 750791094644726559640638407699
x1 = 540019781128412936473322405310
x2 = 515692107665463680305819378593

mont = Montgomery(m)
t1 = x1 * mont.rrm
t2 = x2 * mont.rrm

r1 = mont.reduce(t1)
r2 = mont.reduce(t2)
r = 1 << mont.n

print(
    f"b: {Montgomery.BASE}\n"
    f"n: {mont.n}\n"
    f"r: {r}\n"
    f"m: {mont.m}\n"
    f"t1: {t1}\n"
    f"t2: {t2}\n"
    f"r1: {r1}\n"
    f"r2: {r2}\n"
    f"Original x1: {x1}\n"
    f"Recovered from r1: {mont.reduce(r1)}\n"
    f"Original x2: {x2}\n"
    f"Recovered from r2: {mont.reduce(r2)}\n"
)

print("Montgomery computation of x1 ^ x2 mod m:")
prod = mont.reduce(mont.rrm)
base = mont.reduce(x1 * mont.rrm)
exp = x2
while exp.bit_length() > 0:
    if (exp & 1) == 1:
        prod = mont.reduce(prod * base)
    exp = exp >> 1
    base = mont.reduce(base * base)
print(mont.reduce(prod))
print(f"\nAlternate computation of x1 ^ x2 mod m: {pow(x1, x2, m)}")
Output:
b :  2
n :  100
r :  1267650600228229401496703205376
m :  750791094644726559640638407699
t1:  323165824550862327179367294465482435542970161392400401329100
t2:  308607334419945011411837686695175944083084270671482464168730
r1:  440160025148131680164261562101
r2:  435362628198191204145287283255

Original x1       : 540019781128412936473322405310
Recovered from r1 : 540019781128412936473322405310
Original x2       : 515692107665463680305819378593
Recovered from r2 : 515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m:
151232511393500655853002423778

Alternate computation of x1 ^ x2 mod m :
151232511393500655853002423778

Quackery

Translation of: Factor

**mod is defined at Modular exponentiation#Quackery.

  [ 0 swap [ dup while dip 1+ 1 >> again ] drop ]                  is bits   (     n --> n )

  [ 1 & ]                                                          is odd    (     n --> b )

  [ over bits times [ dup odd if [ over + ] 1 >> ] swap mod ]      is monred (   n n --> n )

  [ 750791094644726559640638407699 ]                               is m      (       --> n )
  [ 323165824550862327179367294465482435542970161392400401329100 ] is t1     (       --> n )

  [ 440160025148131680164261562101 ]                               is r1     (       --> n )
  [ 435362628198191204145287283255 ]                               is r2     (       --> n )

  [ 540019781128412936473322405310 ]                               is x1     (       --> n )
  [ 515692107665463680305819378593 ]                               is x2     (       --> n )

  [ unrot dip
      [ dip dup
        over t1 rot / monred temp put
        t1 monred ]
    [ dup 0 != while
      dup odd if
        [ over temp take *
          m swap monred
          temp put ]
      dip [ dup * m swap monred ]
      1 >> again ]
  2drop temp take monred ]                                         is **mon  ( n n n --> n )

  say "Original x1:       " x1 echo cr
  say "Recovered from r1: " m r1 monred echo cr
  cr
  say "Original x2:       " x2 echo cr
  say "Recovered from r2: " m r2 monred echo cr
  cr
  say "Montgomery computation of x1^x2 mod m: " x1 x2 m **mon echo cr
  say "Modular exponentiation of x1^x2 mod m: " x1 x2 m **mod echo cr
Output:
Original x1:       540019781128412936473322405310
Recovered from r1: 540019781128412936473322405310

Original x2:       515692107665463680305819378593
Recovered from r2: 515692107665463680305819378593

Montgomery computation of x1^x2 mod m: 151232511393500655853002423778
Modular exponentiation of x1^x2 mod m: 151232511393500655853002423778

Racket

#lang typed/racket
(require math/number-theory)

(: montgomery-reduce-fn
   (-> Positive-Integer Natural [#:m-dash Natural]
       (Nonnegative-Integer Natural -> Integer)))

(: ith-digit (Integer Nonnegative-Integer Natural -> Nonnegative-Integer))
(define (ith-digit a i b)
  (modulo (quotient a (expt b i)) b))

(: m-dash (Integer Integer -> Natural))
(define (m-dash m b) ; for if you want to precompute it yourself
  (modular-inverse (- m) b))

(define ((montgomery-reduce-fn m b #:m-dash (m′ (m-dash m b))) T n)
  (define A
    (for/fold : Nonnegative-Integer
      ((A : Nonnegative-Integer T))
      ((i : Nonnegative-Integer (in-range n)))
      (let* ((a_i (ith-digit A i b))
             (u_i (modulo (* a_i m′) b)))
        (+ A (* u_i m (expt b i))))))
  (define A/b^n (quotient A (expt b n)))
  (if (>= A/b^n m)
      (- A/b^n m)
      A/b^n))

; ---------------------------------------------------------------------------------------------------
(module+ test
  (require typed/rackunit)
  
  (check-equal? (ith-digit 1234 0 10) 4)
  (check-equal? (ith-digit 1234 3 10) 1)
  
  ;; e.g. ripped off from {{trans|Go}}  
  (let ((b  2)
        (n  100)
        (r  1267650600228229401496703205376)
        (m  750791094644726559640638407699)
        (T1 323165824550862327179367294465482435542970161392400401329100)
        (T2 308607334419945011411837686695175944083084270671482464168730)
        (R1 440160025148131680164261562101)
        (R2 435362628198191204145287283255)
        (x1 540019781128412936473322405310)
        (x2 515692107665463680305819378593))
    (define mr (montgomery-reduce-fn m b))
    (check-equal? (mr R1 n) x1)
    (check-equal? (mr R2 n) x2)))

Tests, which are courtesy of #Go implementation, all pass.

Raku

(formerly Perl 6)

Works with: Rakudo version 2018.03
Translation of: Sidef
sub montgomery-reduce($m, $a is copy) {
    for 0..$m.msb {
        $a += $m if $a +& 1;
        $a +>= 1
    }
    $a % $m
}

my $m  = 750791094644726559640638407699;
my $t1 = 323165824550862327179367294465482435542970161392400401329100;

my $r1 = 440160025148131680164261562101;
my $r2 = 435362628198191204145287283255;

my $x1 = 540019781128412936473322405310;
my $x2 = 515692107665463680305819378593;

say "Original x1:       ", $x1;
say "Recovered from r1: ", montgomery-reduce($m, $r1);
say "Original x2:       ", $x2;
say "Recovered from r2: ", montgomery-reduce($m, $r2);

print "\nMontgomery  computation x1**x2 mod m: ";
my $prod = montgomery-reduce($m, $t1/$x1);
my $base = montgomery-reduce($m, $t1);

for $x2, {$_ +> 1} ... 0 -> $exponent {
    $prod = montgomery-reduce($m, $prod * $base) if $exponent +& 1;
    $base = montgomery-reduce($m, $base * $base);
}

say montgomery-reduce($m, $prod);
say "Built-in op computation x1**x2 mod m: ", $x1.expmod($x2, $m);
Output:
Original x1:       540019781128412936473322405310
Recovered from r1: 540019781128412936473322405310
Original x2:       515692107665463680305819378593
Recovered from r2: 515692107665463680305819378593

Montgomery  computation x1**x2 mod m: 151232511393500655853002423778
Built-in op computation x1**x2 mod m: 151232511393500655853002423778

Sidef

Translation of: zkl
func montgomeryReduce(m, a) {
    {
        a += m if a.is_odd
        a >>= 1
    } * m.as_bin.len

    a % m
}

var m  = 750791094644726559640638407699
var t1 = 323165824550862327179367294465482435542970161392400401329100

var r1 = 440160025148131680164261562101
var r2 = 435362628198191204145287283255

var x1 = 540019781128412936473322405310
var x2 = 515692107665463680305819378593

say("Original x1:        ", x1)
say("Recovererd from r1: ", montgomeryReduce(m, r1))
say("Original x2:        ", x2)
say("Recovererd from r2: ", montgomeryReduce(m, r2))

print("\nMontgomery computation of x1^x2 mod m:    ")
var prod = montgomeryReduce(m, t1/x1)
var base = montgomeryReduce(m, t1)

for (var exponent = x2; exponent ; exponent >>= 1) {
    prod = montgomeryReduce(m, prod * base) if exponent.is_odd
    base = montgomeryReduce(m, base * base)
}

say(montgomeryReduce(m, prod))
say("Library-based computation of x1^x2 mod m: ", x1.powmod(x2, m))
Output:
Original x1:        540019781128412936473322405310
Recovererd from r1: 540019781128412936473322405310
Original x2:        515692107665463680305819378593
Recovererd from r2: 515692107665463680305819378593

Montgomery computation of x1^x2 mod m:    151232511393500655853002423778
Library-based computation of x1^x2 mod m: 151232511393500655853002423778

Tcl

This example is under development. It was marked thus on 25/06/2012. Please help complete the example.
package require Tcl 8.5

proc montgomeryReduction {m mDash T n {b 2}} {
    set A $T
    for {set i 0} {$i < $n} {incr i} {
	# Could be simplified for cases b==2 and b==10
	for {set j 0;set a $A} {$j < $i} {incr j} {
	    set a [expr {$a / $b}]
	}
	set ui [expr {($a % $b) * $mDash % $b}]
	incr A [expr {$ui * $m * $b**$i}]
    }
    set A [expr {$A / ($b ** $n)}]
    return [expr {$A >= $m ? $A - $m : $A}]
}

Visual Basic .NET

Translation of: C#
Imports System.Numerics
Imports System.Runtime.CompilerServices

Module Module1

    <Extension()>
    Function BitLength(v As BigInteger) As Integer
        If v < 0 Then
            v *= -1
        End If

        Dim result = 0
        While v > 0
            v >>= 1
            result += 1
        End While
        Return result
    End Function

    Structure Montgomery
        Shared ReadOnly BASE = 2
        Dim m As BigInteger
        Dim rrm As BigInteger
        Dim n As Integer

        Sub New(m As BigInteger)
            If m < 0 OrElse m.IsEven Then
                Throw New ArgumentException()
            End If

            Me.m = m
            n = m.BitLength
            rrm = (BigInteger.One << (n * 2)) Mod m
        End Sub

        Function Reduce(t As BigInteger) As BigInteger
            Dim a = t
            For i = 1 To n
                If Not a.IsEven Then
                    a += m
                End If
                a >>= 1
            Next
            If a >= m Then
                a -= m
            End If
            Return a
        End Function
    End Structure

    Sub Main()
        Dim m = BigInteger.Parse("750791094644726559640638407699")
        Dim x1 = BigInteger.Parse("540019781128412936473322405310")
        Dim x2 = BigInteger.Parse("515692107665463680305819378593")

        Dim mont As New Montgomery(m)
        Dim t1 = x1 * mont.rrm
        Dim t2 = x2 * mont.rrm

        Dim r1 = mont.Reduce(t1)
        Dim r2 = mont.Reduce(t2)
        Dim r = BigInteger.One << mont.n

        Console.WriteLine("b :  {0}", Montgomery.BASE)
        Console.WriteLine("n :  {0}", mont.n)
        Console.WriteLine("r :  {0}", r)
        Console.WriteLine("m :  {0}", mont.m)
        Console.WriteLine("t1:  {0}", t1)
        Console.WriteLine("t2:  {0}", t2)
        Console.WriteLine("r1:  {0}", r1)
        Console.WriteLine("r2:  {0}", r2)
        Console.WriteLine()
        Console.WriteLine("Original x1       : {0}", x1)
        Console.WriteLine("Recovered from r1 : {0}", mont.Reduce(r1))
        Console.WriteLine("Original x2       : {0}", x2)
        Console.WriteLine("Recovered from r2 : {0}", mont.Reduce(r2))

        Console.WriteLine()
        Console.WriteLine("Montgomery computation of x1 ^ x2 mod m :")
        Dim prod = mont.Reduce(mont.rrm)
        Dim base = mont.Reduce(x1 * mont.rrm)
        Dim exp = x2
        While exp.BitLength > 0
            If Not exp.IsEven Then
                prod = mont.Reduce(prod * base)
            End If
            exp >>= 1
            base = mont.Reduce(base * base)
        End While
        Console.WriteLine(mont.Reduce(prod))
        Console.WriteLine()
        Console.WriteLine("Alternate computation of x1 ^ x2 mod m :")
        Console.WriteLine(BigInteger.ModPow(x1, x2, m))
    End Sub

End Module
Output:
b :  2
n :  100
r :  1267650600228229401496703205376
m :  750791094644726559640638407699
t1:  323165824550862327179367294465482435542970161392400401329100
t2:  308607334419945011411837686695175944083084270671482464168730
r1:  440160025148131680164261562101
r2:  435362628198191204145287283255

Original x1       : 540019781128412936473322405310
Recovered from r1 : 540019781128412936473322405310
Original x2       : 515692107665463680305819378593
Recovered from r2 : 515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m :
151232511393500655853002423778

Alternate computation of x1 ^ x2 mod m :
151232511393500655853002423778

Wren

Translation of: Kotlin
Library: Wren-big
import "./big" for BigInt

class Montgomery {
    static base { 2 }

    construct new(m) {
        if (m <= BigInt.zero || !m.testBit(0)) {
            Fiber.abort("Argument must be a positive, odd big integer.")
        }
        _m = m
        _n = m.bitLength.toSmall
        _rrm = (BigInt.one << (n*2)) % m
    }

    m   { _m }
    n   { _n }
    rrm { _rrm }

    reduce(t) {
        var a = t.copy()
        for (i in 0..._n) {
            if (a.testBit(0)) a = a + _m
            a = a >> 1
        }
        if (a >= _m) a = a - _m
        return a
     }
}

var m  = BigInt.new("750791094644726559640638407699")
var x1 = BigInt.new("540019781128412936473322405310")
var x2 = BigInt.new("515692107665463680305819378593")

var mont = Montgomery.new(m)
var t1 = x1 * mont.rrm
var t2 = x2 * mont.rrm

var r1 = mont.reduce(t1)
var r2 = mont.reduce(t2)
var r  = BigInt.one << (mont.n)

System.print("b :  %(Montgomery.base)")
System.print("n :  %(mont.n)")
System.print("r :  %(r)")
System.print("m :  %(mont.m)")
System.print("t1:  %(t1)")
System.print("t2:  %(t2)")
System.print("r1:  %(r1)")
System.print("r2:  %(r2)")
System.print()
System.print("Original x1       : %(x1)")
System.print("Recovered from r1 : %(mont.reduce(r1))")
System.print("Original x2       : %(x2)")
System.print("Recovered from r2 : %(mont.reduce(r2))")

System.print("\nMontgomery computation of x1 ^ x2 mod m :")
var prod = mont.reduce(mont.rrm)
var base = mont.reduce(x1 * mont.rrm)
var exp  = x2
while (exp.bitLength > 0) {
    if (exp.testBit(0)) prod = mont.reduce(prod * base)
    exp = exp >> 1
    base = mont.reduce(base * base)
}
System.print(mont.reduce(prod))
System.print("\nLibrary-based computation of x1 ^ x2 mod m :")
System.print(x1.modPow(x2, m))
Output:
b :  2
n :  100
r :  1267650600228229401496703205376
m :  750791094644726559640638407699
t1:  323165824550862327179367294465482435542970161392400401329100
t2:  308607334419945011411837686695175944083084270671482464168730
r1:  440160025148131680164261562101
r2:  435362628198191204145287283255

Original x1       : 540019781128412936473322405310
Recovered from r1 : 540019781128412936473322405310
Original x2       : 515692107665463680305819378593
Recovered from r2 : 515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m :
151232511393500655853002423778

Library-based computation of x1 ^ x2 mod m :
151232511393500655853002423778

zkl

Translation of: Go

Uses GMP (GNU Multi Precision library).

var [const] BN=Import("zklBigNum");  // libGMP

fcn montgomeryReduce(modulus,T){
   _assert_(modulus.isOdd);
   a:=BN(T);	// we'll do in place math
   do(modulus.len(2)){  // bits needed to hold modulus
      if(a.isOdd) a.add(modulus);
      a.div(2);  // a>>=1
   }
   if(a>=modulus) a.sub(modulus);
   a
}
    // magic numbers from the Go solution
//b:= 2;
//n:= 100;
//r:= BN("1267650600228229401496703205376");
m:= BN("750791094644726559640638407699");

t1:=BN("323165824550862327179367294465482435542970161392400401329100");
t2:=BN("308607334419945011411837686695175944083084270671482464168730");

r1:=BN("440160025148131680164261562101");
r2:=BN("435362628198191204145287283255");

x1:=BN("540019781128412936473322405310");
x2:=BN("515692107665463680305819378593");

    // now demonstrate that it works:
println("Original x1:       ", x1);
println("Recovererd from r1:",montgomeryReduce(m,r1));
println("Original x2:       ", x2);
println("Recovererd from r2:", montgomeryReduce(m,r2));

    // and demonstrate a use:
print("\nMontgomery computation of x1 ^ x2 mod m:    ");
    // this is the modular exponentiation algorithm, except we call
    // montgomeryReduce instead of using a mod function.
prod:=montgomeryReduce(m,t1/x1);	// 1
base:=montgomeryReduce(m,t1);		// x1^1
exp :=BN(x2);			        // not reduced
while(exp){
   if(exp.isOdd) prod=montgomeryReduce(m,prod.mul(base));
   exp.div(2);  // exp>>=1
   base=montgomeryReduce(m,base.mul(base));
}
println(montgomeryReduce(m,prod));
println("Library-based computation of x1 ^ x2 mod m: ",x1.powm(x2,m));
Output:
Original x1:       540019781128412936473322405310
Recovererd from r1:540019781128412936473322405310
Original x2:       515692107665463680305819378593
Recovererd from r2:515692107665463680305819378593

Montgomery computation of x1 ^ x2 mod m:    151232511393500655853002423778
Library-based computation of x1 ^ x2 mod m: 151232511393500655853002423778