I'm working on modernizing Rosetta Code's infrastructure. Starting with communications. Please accept this time-limited open invite to RC's Slack.. --Michael Mol (talk) 20:59, 30 May 2020 (UTC)

SHA-256 Merkle tree

From Rosetta Code
Task
SHA-256 Merkle tree
You are encouraged to solve this task according to the task description, using any language you may know.

As described in its documentation, Amazon S3 Glacier requires that all uploaded files come with a checksum computed as a Merkle Tree using SHA-256.

Specifically, the SHA-256 hash is computed for each 1MiB block of the file. And then, starting from the beginning of the file, the raw hashes of consecutive blocks are paired up and concatenated together, and a new hash is computed from each concatenation. Then these are paired up and concatenated and hashed, and the process continues until there is only one hash left, which is the final checksum. The hexadecimal representation of this checksum is the value that must be included with the AWS API call to upload the object (or complete a multipart upload).

Implement this algorithm in your language; you can use the code from the SHA-256 task for the actual hash computations. For better manageability and portability, build the tree using a smaller block size of only 1024 bytes, and demonstrate it on the RosettaCode title image with that block size. The final result should be the hexadecimal digest value a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c.

C[edit]

Library: GLib
#include <glib.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
 
guchar* sha256_merkle_tree(FILE* in, size_t block_size) {
gchar* buffer = g_malloc(block_size);
GPtrArray* hashes = g_ptr_array_new_with_free_func(g_free);
gssize digest_length = g_checksum_type_get_length(G_CHECKSUM_SHA256);
GChecksum* checksum = g_checksum_new(G_CHECKSUM_SHA256);
size_t bytes;
while ((bytes = fread(buffer, 1, block_size, in)) > 0) {
g_checksum_reset(checksum);
g_checksum_update(checksum, (guchar*)buffer, bytes);
gsize len = digest_length;
guchar* digest = g_malloc(len);
g_checksum_get_digest(checksum, digest, &len);
g_ptr_array_add(hashes, digest);
}
g_free(buffer);
guint hashes_length = hashes->len;
if (hashes_length == 0) {
g_ptr_array_free(hashes, TRUE);
g_checksum_free(checksum);
return NULL;
}
while (hashes_length > 1) {
guint j = 0;
for (guint i = 0; i < hashes_length; i += 2, ++j) {
guchar* digest1 = g_ptr_array_index(hashes, i);
guchar* digest_out = g_ptr_array_index(hashes, j);
if (i + 1 < hashes_length) {
guchar* digest2 = g_ptr_array_index(hashes, i + 1);
g_checksum_reset(checksum);
g_checksum_update(checksum, digest1, digest_length);
g_checksum_update(checksum, digest2, digest_length);
gsize len = digest_length;
g_checksum_get_digest(checksum, digest_out, &len);
} else {
memcpy(digest_out, digest1, digest_length);
}
}
hashes_length = j;
}
guchar* result = g_ptr_array_steal_index(hashes, 0);
g_ptr_array_free(hashes, TRUE);
g_checksum_free(checksum);
return result;
}
 
int main(int argc, char** argv) {
if (argc != 2) {
fprintf(stderr, "usage: %s filename\n", argv[0]);
return EXIT_FAILURE;
}
FILE* in = fopen(argv[1], "rb");
if (in) {
guchar* digest = sha256_merkle_tree(in, 1024);
fclose(in);
if (digest) {
gssize length = g_checksum_type_get_length(G_CHECKSUM_SHA256);
for (gssize i = 0; i < length; ++i)
printf("%02x", digest[i]);
printf("\n");
g_free(digest);
}
} else {
perror(argv[1]);
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

C++[edit]

Library: OpenSSL
#include <cstdlib>
#include <fstream>
#include <iomanip>
#include <iostream>
#include <sstream>
#include <vector>
#include <openssl/sha.h>
 
class sha256_exception : public std::exception {
public:
const char* what() const noexcept override {
return "SHA-256 error";
}
};
 
class sha256 {
public:
sha256() { reset(); }
sha256(const sha256&) = delete;
sha256& operator=(const sha256&) = delete;
void reset() {
if (SHA256_Init(&context_) == 0)
throw sha256_exception();
}
void update(const void* data, size_t length) {
if (SHA256_Update(&context_, data, length) == 0)
throw sha256_exception();
}
std::vector<unsigned char> digest() {
std::vector<unsigned char> digest(SHA256_DIGEST_LENGTH);
if (SHA256_Final(digest.data(), &context_) == 0)
throw sha256_exception();
return digest;
}
private:
SHA256_CTX context_;
};
 
std::string digest_to_string(const std::vector<unsigned char>& digest) {
std::ostringstream out;
out << std::hex << std::setfill('0');
for (size_t i = 0; i < digest.size(); ++i)
out << std::setw(2) << static_cast<int>(digest[i]);
return out.str();
}
 
std::vector<unsigned char> sha256_merkle_tree(std::istream& in, size_t block_size) {
std::vector<std::vector<unsigned char>> hashes;
std::vector<char> buffer(block_size);
sha256 md;
while (in) {
in.read(buffer.data(), block_size);
size_t bytes = in.gcount();
if (bytes == 0)
break;
md.reset();
md.update(buffer.data(), bytes);
hashes.push_back(md.digest());
}
if (hashes.empty())
return {};
size_t length = hashes.size();
while (length > 1) {
size_t j = 0;
for (size_t i = 0; i < length; i += 2, ++j) {
auto& digest1 = hashes[i];
auto& digest_out = hashes[j];
if (i + 1 < length) {
auto& digest2 = hashes[i + 1];
md.reset();
md.update(digest1.data(), digest1.size());
md.update(digest2.data(), digest2.size());
digest_out = md.digest();
} else {
digest_out = digest1;
}
}
length = j;
}
return hashes[0];
}
 
int main(int argc, char** argv) {
if (argc != 2) {
std::cerr << "usage: " << argv[0] << " filename\n";
return EXIT_FAILURE;
}
std::ifstream in(argv[1], std::ios::binary);
if (!in) {
std::cerr << "Cannot open file " << argv[1] << ".\n";
return EXIT_FAILURE;
}
try {
std::cout << digest_to_string(sha256_merkle_tree(in, 1024)) << '\n';
} catch (const std::exception& ex) {
std::cerr << ex.what() << "\n";
return EXIT_FAILURE;
}
return EXIT_SUCCESS;
}
Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Delphi[edit]

Library: DCPsha256
Translation of: Go
 
program SHA256_Merkle_tree;
 
{$APPTYPE CONSOLE}
 
uses
System.SysUtils,
System.Classes,
DCPsha256;
 
function SHA256(const Input: TArray<Byte>; Len: Integer = -1): TArray<Byte>;
var
Hasher: TDCP_sha256;
l: Integer;
begin
if Len < 0 then
l := length(Input)
else
l := Len;
 
Hasher := TDCP_sha256.Create(nil);
try
Hasher.Init;
Hasher.Update(Input[0], l);
SetLength(Result, Hasher.HashSize div 8);
Hasher.final(Result[0]);
finally
Hasher.Free;
end;
end;
 
function Merkle_tree(FileName: TFileName): string;
const
blockSize = 1024;
var
f: TMemoryStream;
hashes: TArray<TArray<byte>>;
bytesRead: Cardinal;
buffer: TArray<byte>;
i, index: Integer;
begin
Result := '';
if not FileExists(FileName) then
exit;
 
SetLength(buffer, blockSize);
FillChar(buffer[0], blockSize, 0);
f := TMemoryStream.Create;
f.LoadFromFile(FileName);
index := 0;
repeat
bytesRead := f.Read(buffer, blockSize);
if 0 = bytesRead then
Break;
 
Insert(SHA256(buffer, bytesRead), hashes, index);
inc(index);
until false;
f.Free;
 
SetLength(buffer, 64);
 
while Length(hashes) > 1 do
begin
var hashes2: TArray<TArray<byte>>;
index := 0;
i := 0;
while i < length(hashes) do
begin
if i < length(hashes) - 1 then
begin
buffer := copy(hashes[i], 0, length(hashes[i]));
buffer := concat(buffer, copy(hashes[i + 1], 0, length(hashes[i])));
Insert(SHA256(buffer), hashes2, index);
inc(index);
end
else
begin
Insert(hashes[i], hashes2, index);
inc(index);
end;
inc(i, 2);
end;
hashes := hashes2;
end;
 
Result := '';
for var b in hashes[0] do
begin
Result := Result + b.ToHexString(2);
end;
end;
 
begin
writeln(Merkle_tree('title.png'));
readln;
end.
Output:
A4F902CF9D51FE51EDA156A6792E1445DFF65EDF3A217A1F3334CC9CF1495C2C

Factor[edit]

Works with: Factor version 0.99 2020-08-14
USING: checksums checksums.sha fry grouping io
io.encodings.binary io.files kernel make math math.parser
namespaces sequences ;
 
: each-block ( ... size quot: ( ... block -- ... ) -- ... )
input-stream get spin (each-stream-block) ; inline
 
: >sha-256 ( seq -- newseq ) sha-256 checksum-bytes ;
 
: (hash-read) ( path encoding chunk-size -- )
'[ _ [ >sha-256 , ] each-block ] with-file-reader ;
 
! Read a file in chunks as a sequence of sha-256 hashes, so as
! not to store a potentially large file in memory all at once.
 
: hash-read ( path chunk-size -- seq )
binary swap [ (hash-read) ] { } make ;
 
: hash-combine ( seq -- newseq )
2 <groups>
[ dup length 1 > [ concat >sha-256 ] [ first ] if ] map ;
 
: merkle-hash ( path chunk-size -- str )
hash-read [ dup length 1 = ] [ hash-combine ] until first
bytes>hex-string ;
 
"title.png" 1024 merkle-hash print
Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Go[edit]

package main
 
import (
"crypto/sha256"
"fmt"
"io"
"log"
"os"
)
 
func main() {
const blockSize = 1024
f, err := os.Open("title.png")
if err != nil {
log.Fatal(err)
}
defer f.Close()
 
var hashes [][]byte
buffer := make([]byte, blockSize)
h := sha256.New()
for {
bytesRead, err := f.Read(buffer)
if err != nil {
if err != io.EOF {
log.Fatal(err)
}
break
}
h.Reset()
h.Write(buffer[:bytesRead])
hashes = append(hashes, h.Sum(nil))
}
buffer = make([]byte, 64)
for len(hashes) > 1 {
var hashes2 [][]byte
for i := 0; i < len(hashes); i += 2 {
if i < len(hashes)-1 {
copy(buffer, hashes[i])
copy(buffer[32:], hashes[i+1])
h.Reset()
h.Write(buffer)
hashes2 = append(hashes2, h.Sum(nil))
} else {
hashes2 = append(hashes2, hashes[i])
}
}
hashes = hashes2
}
fmt.Printf("%x", hashes[0])
fmt.Println()
}
Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Java[edit]

import java.io.*;
import java.security.*;
import java.util.*;
 
public class SHA256MerkleTree {
public static void main(String[] args) {
if (args.length != 1) {
System.err.println("missing file argument");
System.exit(1);
}
try (InputStream in = new BufferedInputStream(new FileInputStream(args[0]))) {
byte[] digest = sha256MerkleTree(in, 1024);
if (digest != null)
System.out.println(digestToString(digest));
} catch (Exception e) {
e.printStackTrace();
}
}
 
private static String digestToString(byte[] digest) {
StringBuilder result = new StringBuilder();
for (int i = 0; i < digest.length; ++i)
result.append(String.format("%02x", digest[i]));
return result.toString();
}
 
private static byte[] sha256MerkleTree(InputStream in, int blockSize) throws Exception {
byte[] buffer = new byte[blockSize];
int bytes;
MessageDigest md = MessageDigest.getInstance("SHA-256");
List<byte[]> digests = new ArrayList<>();
while ((bytes = in.read(buffer)) > 0) {
md.reset();
md.update(buffer, 0, bytes);
digests.add(md.digest());
}
int length = digests.size();
if (length == 0)
return null;
while (length > 1) {
int j = 0;
for (int i = 0; i < length; i += 2, ++j) {
byte[] digest1 = digests.get(i);
if (i + 1 < length) {
byte[] digest2 = digests.get(i + 1);
md.reset();
md.update(digest1);
md.update(digest2);
digests.set(j, md.digest());
} else {
digests.set(j, digest1);
}
}
length = j;
}
return digests.get(0);
}
}
Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Julia[edit]

using SHA
 
function merkletree(filename="title.png", blocksize=1024)
bytes = codeunits(read(filename, String))
len = length(bytes)
hsh = [sha256(view(bytes. i:min(i+blocksize-1, len)])) for i in 1:1024:len]
len = length(hsh)
while len > 1
hsh = [i == len ? hsh[i] : sha256(vcat(hsh[i], hsh[i + 1])) for i in 1:2:len]
len = length(hsh)
end
return bytes2hex(hsh[1])
end
 
println(merkletree())
 
Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Nim[edit]

Library: nimcrypto

To compute the digests of file blocks, we use the procedure “digest” which accepts the address of a byte array and a byte count. To compute the digests of pairs of digests, we use instead a SHA256 context and the procedures “update” and “finish”, which avoids a copy in an intermediate buffer.

 
import nimcrypto
 
const BlockSize = 1024
 
var hashes: seq[MDigest[256]]
 
let f = open("title.png")
var buffer: array[BlockSize, byte]
while true:
let n = f.readBytes(buffer, 0, BlockSize)
if n == 0: break
hashes.add sha256.digest(buffer[0].addr, n.uint)
f.close()
 
var ctx: sha256
while hashes.len != 1:
var newHashes: seq[MDigest[256]]
for i in countup(0, hashes.high, 2):
if i < hashes.high:
ctx.init()
ctx.update(hashes[i].data)
ctx.update(hashes[i + 1].data)
newHashes.add ctx.finish()
ctx.clear()
else:
newHashes.add hashes[i]
hashes= newHashes
 
echo hashes[0]
Output:
A4F902CF9D51FE51EDA156A6792E1445DFF65EDF3A217A1F3334CC9CF1495C2C

Perl[edit]

Translation of: Raku
# 20210222 Perl programming solution
 
use strict;
use warnings;
 
use Crypt::Digest::SHA256 'sha256' ;
 
my @blocks;
 
open my $fh, '<:raw', './title.png';
 
while ( read $fh, my $chunk, 1024 ) { push @blocks, sha256 $chunk }
 
while ( scalar @blocks > 1 ) {
my @clone = @blocks and @blocks = ();
while ( @_ = splice @clone, 0, 2 ) {
push @blocks, scalar @_ == 1 ? $_[0] : sha256 $_[0].$_[1]
}
}
 
print unpack ( 'H*', $blocks[0] ) , "\n";
Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Phix[edit]

Library: Phix/libcurl
include builtins\libcurl.e
include builtins\sha256.e
 
constant ONE_MB = 1024 * 1024
 
function merkle(string filename, url, integer block_size=ONE_MB)
    if not file_exists(filename) then
        printf(1,"Downloading %s...\n",{filename})
        CURLcode res = curl_easy_get_file(url,"",filename) -- (no proxy)
        if res!=CURLE_OK then
            string error = sprintf("%d",res)
            if res=CURLE_COULDNT_RESOLVE_HOST then
                error &= " [CURLE_COULDNT_RESOLVE_HOST]"
            end if
            crash("Error %s downloading file\n", {error})
        end if  
    end if  
    string data = get_text(filename)
    sequence blocks = {}
    for i=1 to length(data) by block_size do
        blocks = append(blocks,sha256(data[i..min(i+block_size-1,length(data))]))
    end for
    while length(blocks)>1 do
        integer l = 0
        for i=1 to length(blocks) by 2 do
            l += 1
            blocks[l] = iff(i<length(blocks)?sha256(blocks[i]&blocks[i+1])
                                            :blocks[i])
        end for
        blocks = blocks[1..l]
    end while
    return blocks[1]            
end function
 
function asHex(string s)
string res = ""
    for i=1 to length(s) do
        res &= sprintf("%02X",s[i])
    end for
    return res
end function
 
printf(1,"%s\n",asHex(merkle("title.png", "https://rosettacode.org/mw/title.png", 1024)))
Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Python[edit]

This version attempts to combine blocks as soon as possible to minimize the memory footprint.

#!/usr/bin/env python
# compute the root label for a SHA256 Merkle tree built on blocks of a given
# size (default 1MB) taken from the given file(s)
import argh
import hashlib
import sys
 
@argh.arg('filename', nargs='?', default=None)
def main(filename, block_size=1024*1024):
if filename:
fin = open(filename, 'rb')
else:
fin = sys.stdin
 
stack = []
block = fin.read(block_size)
while block:
# a node is a pair: ( tree-level, hash )
node = (0, hashlib.sha256(block).digest())
stack.append(node)
 
# concatenate adjacent pairs at the same level
while len(stack) >= 2 and stack[-2][0] == stack[-1][0]:
a = stack[-2]
b = stack[-1]
l = a[0]
stack[-2:] = [(l+1, hashlib.sha256(a[1] + b[1]).digest())]
 
block = fin.read(block_size)
 
while len(stack) > 1:
# at the end we have to concatenate even across levels
a = stack[-2]
b = stack[-1]
al = a[0]
bl = b[0]
stack[-2:] = [(max(al, bl)+1, hashlib.sha256(a[1] + b[1]).digest())]
 
print(stack[0][1].hex())
 
 
argh.dispatch_command(main)
 
Output:
$ sha256tree.py --block-size=1024 title.png
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Raku[edit]

use Digest::SHA256::Native;
 
unit sub MAIN(Int :b(:$block-size) = 1024 × 1024, *@args);
 
my $in = @args ?? IO::CatHandle.new(@args) !! $*IN;
 
my @blocks = do while my $block = $in.read: $block-size { sha256 $block };
 
while @blocks > 1 {
@blocks = @blocks.batch(2).map: { $_ > 1 ?? sha256([~] $_) !! .[0] }
}
 
say @blocks[0]».fmt('%02x').join;
Output:
$ sha256tree --block-size=1024 title.png
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Rust[edit]

extern crate crypto;
 
use crypto::digest::Digest;
use crypto::sha2::Sha256;
use std::fs::File;
use std::io::prelude::*;
use std::io::BufReader;
 
fn sha256_merkle_tree(filename: &str, block_size: usize) -> std::io::Result<Option<Vec<u8>>> {
let mut md = Sha256::new();
let mut input = BufReader::new(File::open(filename)?);
let mut buffer = vec![0; block_size];
let mut digest = vec![0; md.output_bytes()];
let mut digests = Vec::new();
loop {
let bytes = input.read(&mut buffer)?;
if bytes == 0 {
break;
}
md.reset();
md.input(&buffer[0..bytes]);
md.result(&mut digest);
digests.push(digest.clone());
}
let mut len = digests.len();
if len == 0 {
return Ok(None);
}
while len > 1 {
let mut j = 0;
let mut i = 0;
while i < len {
if i + 1 < len {
md.reset();
md.input(&digests[i]);
md.input(&digests[i + 1]);
md.result(&mut digests[j]);
} else {
digests.swap(i, j);
}
i += 2;
j += 1;
}
len = j;
}
Ok(Some(digests[0].clone()))
}
 
fn digest_to_string(digest: &[u8]) -> String {
let mut result = String::new();
for x in digest {
result.push_str(&format!("{:02x}", x));
}
result
}
 
fn main() {
match sha256_merkle_tree("title.png", 1024) {
Ok(Some(digest)) => println!("{}", digest_to_string(&digest)),
Ok(None) => {}
Err(error) => eprintln!("I/O error: {}", error),
}
}
Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c

Wren[edit]

Library: Wren-crypto
Library: Wren-seq
Library: Wren-str
Library: Wren-fmt
import "io" for File
import "/crypto" for Sha256, Bytes
import "/seq" for Lst
import "/str" for Str
import "/fmt" for Conv
 
var bytes = File.read("title.png").bytes.toList
var chunks = Lst.chunks(bytes, 1024)
var hashes = List.filled(chunks.count, null)
var i = 0
for (chunk in chunks) {
var h = Sha256.digest(chunk.map { |b| String.fromByte(b) }.join())
hashes[i] = Str.chunks(h, 2).map { |x| Conv.atoi(x, 16) }.toList
i = i + 1
}
 
var buffer = List.filled(64, 0)
while (hashes.count > 1) {
var hashes2 = []
var i = 0
while (i < hashes.count) {
if (i < hashes.count - 1) {
for (j in 0..31) buffer[j] = hashes[i][j]
for (j in 0..31) buffer[j+32] = hashes[i+1][j]
var h = Sha256.digest(buffer.map { |b| String.fromByte(b) }.join())
var hb = Str.chunks(h, 2).map { |x| Conv.atoi(x, 16) }.toList
hashes2.add(hb)
} else {
hashes2.add(hashes[i])
}
i = i + 2
}
hashes = hashes2
}
System.print(Bytes.toHexString(hashes[0]))
Output:
a4f902cf9d51fe51eda156a6792e1445dff65edf3a217a1f3334cc9cf1495c2c