wiki:HashSourceCodes

Version 48 (modified by Administrator, 11 years ago) ( diff )

--

Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit chksum of the first and last 64k (even if they overlap because the file is smaller than 128k). On opensubtitles.org is movie file size limited to 9000000000 > $moviebytesize > 131072 bytes, if is there any reason to change these sizes, let us know. Licence of hashing source codes is GPL. Source codes was tested on Little Endian - DEC, Intel and compatible

Feel free to edit/add source-codes if you have faster/better implementation. Also don't forget to check, if hash is right for test. Test these 2 files please to ensure your algo is completely OK:

  • AVI file (12 909 756 bytes)
    • hash: 8e245d9679d31e12
  • DUMMY RAR file (2 565 922 bytes, 4 295 033 890 after RAR unpacking, test on UNPACKED file)
    • hash: 61f7751fc2a72bfb (for UNPACKED file)

C

#include <stdio.h>
#include <stdlib.h>

#define MAX(x,y) (((x) > (y)) ? (x) : (y))
#ifndef uint64_t
#define uint64_t unsigned long long
#endif

uint64_t compute_hash(FILE * handle)
{
        uint64_t hash, fsize;

        fseek(handle, 0, SEEK_END);
        fsize = ftell(handle);
        fseek(handle, 0, SEEK_SET);

        hash = fsize;

        for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++);
        fseek(handle, (long)MAX(0, fsize - 65536), SEEK_SET);
        for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++);
        
        return hash;
}

int main(int argc, char *argv)
{
        FILE * handle;
        uint64_t myhash;

        handle = fopen("breakdance.avi", "rb");
        
        if (!handle) 
        {
                printf("Error openning file!");
                return 1;
        }

        myhash = compute_hash(handle);  
        printf("%I64x", myhash);

        fclose(handle);
        return 0;
}

C - Public Domain License

#include <stdio.h>
#include <stdlib.h>

unsigned long long analizefileOSHahs(char *fileName){
 /*
  * Public Domain implementation by Kamil Dziobek. turbos11(at)gmail.com
  * This code implements Gibest hash algorithm first use in Media Player Classics
  * For more implementation(various languages and authors) see:
  * http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes   
  *
  * -works only on little-endian procesor DEC, Intel and compatible
  * -sizeof(unsigned long long) must be 8
  */
 
  FILE        *file;
  int i;
  unsigned long long t1=0;
  unsigned long long buffer1[8192*2];
  file = fopen(fileName, "rb");
  fread(buffer1, 8192, 8, file);
  fseek(file, -65536, SEEK_END);
  fread(&buffer1[8192], 8192, 8, file); 
  for (i=0;i<8192*2;i++)
    t1+=buffer1[i];
  t1+= ftell(file); //add filesize
  fclose(file); 
  return  t1;
};
int main(int argc, char *argv){
  unsigned long long myhash=analizefileOSHahs("C://tomaszkokowskizoofiliamovies.avi");
  printf("hash is %16I64x",myhash);
}

C++

 #include <iostream>
 #include <fstream> 
 
 typedef unsigned __int64 uint64_t;
 using namespace std;
 
 int MAX(int x, int y)
 {  
        if((x) > (y)) 
                return x;
        else    
                return y;
 }
 
 uint64_t compute_hash(ifstream& f)
 {
        uint64_t hash, fsize;
 
        f.seekg(0, ios::end);
        fsize = f.tellg();
        f.seekg(0, ios::beg);
 
        hash = fsize;
        for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp);
        f.seekg(MAX(0, (uint64_t)fsize - 65536), ios::beg);
        for(tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp);
        return hash;
 } 
 
 int main(int argc, char *argv)
 {
        ifstream f;
        uint64_t myhash;
 
        f.open("c:\\test.avi", ios::in|ios::binary|ios::ate);
        if (!f.is_open()) {
           cerr << "Error opening file" << endl;
           return 1;
        }
 
        myhash = compute_hash(f);
        cout << setw(16) << setfill('0') << hex << myhash;
 
        f.close();
        return 0;
 }

About C and C++ implementation

This only work on little-endian processor: DEC, Intel and compatible

Java

/**
 * Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit
 * checksum of the first and last 64k (even if they overlap because the file is smaller than
 * 128k).
 */
public class OpenSubtitlesHasher {
        
        /**
         * Size of the chunks that will be hashed in bytes (64 KB)
         */
        private static final int HASH_CHUNK_SIZE = 64 * 1024;
        
        
        public static String computeHash(File file) throws IOException {
                long size = file.length();
                long chunkSizeForFile = Math.min(HASH_CHUNK_SIZE, size);
                
                FileChannel fileChannel = new FileInputStream(file).getChannel();
                
                try {
                        long head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile));
                        long tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, Math.max(size - HASH_CHUNK_SIZE, 0), chunkSizeForFile));
                        
                        return String.format("%016x", size + head + tail);
                } finally {
                        fileChannel.close();
                }
        }
        

        public static String computeHash(InputStream stream, long length) throws IOException {
                
                int chunkSizeForFile = (int) Math.min(HASH_CHUNK_SIZE, length);
                
                // buffer that will contain the head and the tail chunk, chunks will overlap if length is smaller than two chunks
                byte[] chunkBytes = new byte[(int) Math.min(2 * HASH_CHUNK_SIZE, length)];
                
                DataInputStream in = new DataInputStream(stream);
                
                // first chunk
                in.readFully(chunkBytes, 0, chunkSizeForFile);
                
                long position = chunkSizeForFile;
                long tailChunkPosition = length - chunkSizeForFile;
                
                // seek to position of the tail chunk, or not at all if length is smaller than two chunks
                while (position < tailChunkPosition && (position += in.skip(tailChunkPosition - position)) >= 0);
                
                // second chunk, or the rest of the data if length is smaller than two chunks
                in.readFully(chunkBytes, chunkSizeForFile, chunkBytes.length - chunkSizeForFile);
                
                long head = computeHashForChunk(ByteBuffer.wrap(chunkBytes, 0, chunkSizeForFile));
                long tail = computeHashForChunk(ByteBuffer.wrap(chunkBytes, chunkBytes.length - chunkSizeForFile, chunkSizeForFile));
                
                return String.format("%016x", length + head + tail);
        }
        

        private static long computeHashForChunk(ByteBuffer buffer) {
                
                LongBuffer longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer();
                long hash = 0;
                
                while (longBuffer.hasRemaining()) {
                        hash += longBuffer.get();
                }
                
                return hash;
        }
        
}

C#

You can use GetHash.dll.

http://trac.opensubtitles.org/projects/opensubtitles/attachment/wiki/HashSourceCodes/GetHash.dll

Use Example:

    private void openFileDialog1_FileOk(object sender, CancelEventArgs e)
        {
            byte[] hash = GetHash.Main.ComputeHash(openFileDialog1.FileName);
            label1.Text =  GetHash.Main.ToHexadecimal(hash);

        }

or without using GetHash.dll:

using System;
using System.Text;
using System.IO;
   
namespace MovieHasher
{
    class Program
    {
        private static byte[] ComputeMovieHash(string filename)
        {
            byte[] result;
            using (Stream input = File.OpenRead(filename))
            {
                result = ComputeMovieHash(input);
            }
            return result;
        }
 
        private static byte[] ComputeMovieHash(Stream input)
        {
            long lhash, streamsize;
            streamsize = input.Length;
            lhash = streamsize;
 
            long i = 0;
            byte[] buffer = new byte[sizeof(long)];
            while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0))
            {
                i++;
                lhash += BitConverter.ToInt64(buffer, 0);
            }
 
            input.Position = Math.Max(0, streamsize - 65536);
            i = 0;
            while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0))
            {
                i++;
                lhash += BitConverter.ToInt64(buffer, 0);
            }
            input.Close();
            byte[] result = BitConverter.GetBytes(lhash);
            Array.Reverse(result);
            return result;
        }
 
        private static string ToHexadecimal(byte[] bytes)
        {
            StringBuilder hexBuilder = new StringBuilder();
            for(int i = 0; i < bytes.Length; i++)
            {
                hexBuilder.Append(bytes[i].ToString("x2"));
            }
            return hexBuilder.ToString();
        }
 
        static void Main(string[] args)
        {
            byte[] moviehash = ComputeMovieHash(@"C:\test.avi");
            Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash));
        }
    }
}

If you get overflow error read this.

VB.Net

Imports System
Imports System.Text
Imports System.IO
'Note: you must remove integer overflow checking.

Namespace MovieHasher
        Class Program
                Private Shared Function ComputeMovieHash(ByVal filename As String) As Byte()
                        Dim result As Byte()
                        Using input As Stream = File.OpenRead(filename)
                                result = ComputeMovieHash(input)
                        End Using
                        Return result
                End Function
                
                Private Function ComputeMovieHash(ByVal input As Stream) As Byte()
                        Dim lhash As System.Int64, streamsize As Long
                        streamsize = input.Length
                        lhash = streamsize
                        
                        Dim i As Long = 0
                        Dim buffer As Byte() = New Byte(Marshal.SizeOf(GetType(Long)) - 1) {}
                        While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0)
                                i += 1
                                
                                lhash += BitConverter.ToInt64(buffer, 0)
                        End While
                        
                        input.Position = Math.Max(0, streamsize - 65536)
                        i = 0
                        While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0)
                                i += 1
                                lhash += BitConverter.ToInt64(buffer, 0)
                        End While
                        input.Close()
                        Dim result As Byte() = BitConverter.GetBytes(lhash)
                        Array.Reverse(result)
                        Return result
                End Function
                
                Private Shared Function ToHexadecimal(ByVal bytes As Byte()) As String
                        Dim hexBuilder As New StringBuilder()
                        For i As Integer = 0 To bytes.Length - 1
                                hexBuilder.Append(bytes(i).ToString("x2"))
                        Next
                        Return hexBuilder.ToString()
                End Function
                
                Private Shared Sub Main(ByVal args As String())
                        Dim moviehash As Byte() = ComputeMovieHash("C:\test.avi")
                        Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash))
                End Sub
        End Class
End Namespace

Python

import struct, os

def hashFile(name): 
      try: 
                 
                longlongformat = 'q'  # long long 
                bytesize = struct.calcsize(longlongformat) 
                    
                f = open(name, "rb") 
                    
                filesize = os.path.getsize(name) 
                hash = filesize 
                    
                if filesize < 65536 * 2: 
                       return "SizeError" 
                 
                for x in range(65536/bytesize): 
                        buffer = f.read(bytesize) 
                        (l_value,)= struct.unpack(longlongformat, buffer)  
                        hash += l_value 
                        hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number  
                         
    
                f.seek(max(0,filesize-65536),0) 
                for x in range(65536/bytesize): 
                        buffer = f.read(bytesize) 
                        (l_value,)= struct.unpack(longlongformat, buffer)  
                        hash += l_value 
                        hash = hash & 0xFFFFFFFFFFFFFFFF 
                 
                f.close() 
                returnedhash =  "%016x" % hash 
                return returnedhash 
    
      except(IOError): 
                return "IOError"

Delphi

This is just a quick conversion of Gabest's original C code. Anyone who can come up with a cleaner code, please feel free to do so and post here.

function CalcGabestHash(const fname: string): string;
var
  i : integer;
  s : array[1..8] of char;
  tmp       : Int64 absolute s;
  hash      : Int64;
  readed    : integer;

  aStream: TFileStream;
begin
  result := '';
  if not FileExists(fname) then Exit;

  aStream := TFileStream.Create(fName, fmShareDenyNone);
  hash := aStream.Size;

  i := 0; readed := 1;
  while ((i < 8192) and (readed > 0)) do begin
    readed := aStream.Read(s, sizeof(s));
    if readed > 0 then
    begin
      hash := hash + tmp;
    end;
    i := i + 1;
  end;

  aStream.Seek(-65536, soFromEnd); // 65536

  i := 0; readed:= 1;
  while ((i < 8192) and (readed > 0)) do begin
    readed := aStream.Read(s, sizeof(s));
    if readed > 0 then
      hash := hash + tmp;
    i := i + 1;
  end;
  aStream.Free;
  result := Format('%.16x',[hash]);
end;

alternate version by TRP

unction CalcGabestHash(const Stream: TStream): Int64; overload;
const HashPartSize = 1 shl 16; // 64 KiB

  procedure UpdateHashFromStream(const Stream: TStream; var Hash:
Int64); inline;
  var buffer: Array[0..HashPartSize div SizeOf(Int64) - 1] of Int64;
      i     : integer;
  begin
    Stream.ReadBuffer(buffer[0], SizeOf(buffer));
    for i := Low(buffer) to High(buffer) do
      Inc(Hash, buffer[i]);
  end;

begin
  result:= Stream.Size;

  if result < HashPartSize then
  begin
    // stream too small return invalid hash
    result:= 0;
    exit;
  end;

  // first 64 KiB
  Stream.Position:= 0;
  UpdateHashFromStream(Stream, result);

  // last 64 KiB
  Stream.Seek(-HashPartSize, soEnd);
  UpdateHashFromStream(Stream, result);

  // use "IntToHex(result, 16);" to get a string and "StrToInt64('$' +
hash);" to get your Int64 back
end;

function CalcGabestHash(const FileName: TFileName): Int64; overload;
var stream: TStream;
begin
  stream:= TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
  try
    result:= CalcGabestHash(stream);
  finally
    stream.Free;
  end;
end;

Lua

-- will produce a correct hash regardless of architecture (big vs little endian)
local function movieHash(fileName)
        local fil = io.open(fileName, "rb")
        local lo,hi=0,0
        for i=1,8192 do
                local a,b,c,d = fil:read(4):byte(1,4)
                lo = lo + a + b*256 + c*65536 + d*16777216
                a,b,c,d = fil:read(4):byte(1,4)
                hi = hi + a + b*256 + c*65536 + d*16777216
                while lo>=4294967296 do
                        lo = lo-4294967296
                        hi = hi+1
                end
                while hi>=4294967296 do
                        hi = hi-4294967296
                end
        end
        local size = fil:seek("end", -65536) + 65536
        for i=1,8192 do
                local a,b,c,d = fil:read(4):byte(1,4)
                lo = lo + a + b*256 + c*65536 + d*16777216
                a,b,c,d = fil:read(4):byte(1,4)
                hi = hi + a + b*256 + c*65536 + d*16777216
                while lo>=4294967296 do
                        lo = lo-4294967296
                        hi = hi+1
                end
                while hi>=4294967296 do
                        hi = hi-4294967296
                end
        end
        lo = lo + size
                while lo>=4294967296 do
                        lo = lo-4294967296
                        hi = hi+1
                end
                while hi>=4294967296 do
                        hi = hi-4294967296
                end
        fil:close()
        return string.format("%08x%08x", hi,lo), size
end

print("breakdance.avi:")
print(movieHash("breakdance.avi"))
print("8e245d9679d31e12 <- should be")
print("")
print("dummy.rar:")
print(movieHash("dummy.rar"))
print("61f7751fc2a72bfb <- should be according to wiki")
print("2a527d74d45f5b1b <- what other hash tools actually report")

RealBasic

Combined routine that will calculate a fast hash for videofiles over 65K and a normal md5 for subtitles

    dim b as BinaryStream
    dim mb as MemoryBlock
    
    dim hash,bytesize as UINT64
    dim i, x, chunksize, filelen, difference as integer
    
    hash = 0 //Reset Hash
    difference = 0
    
    if f <> nil and f.Exists then
      b= f.OpenAsBinaryFile
      hash = b.Length
      bytesize = b.Length
      bytesizestr = str(bytesize)
      
      if bytesize >= 65536 and routine = "video" then
        chunksize = 65536
        mb = b.Read(65536)
        mb.LittleEndian = True
        
        for i= 0 to chunksize -1 step 8
          hash = hash+ mb.UINT64Value(i)
        next
        
        b.Position = max(b.Length-chunksize, 0)
        mb= b.Read(chunksize)
        mb.LittleEndian = True
        
        for i= 0 to chunksize -1 step 8
          hash = hash+ mb.UINT64Value(i)
        next

        
        myhash = Lowercase(str(hex(hash)))
        
      elseif routine = "subtitle" then
        
        dim c,result as string
        mb = md5(b.Read(b.Length))
        mb.LittleEndian = True
        
        for i = 0 to mb.size-1
          x = mb.byte( i )
          c = right( "00"+hex( x ), 2 )
          result = result + c
        next
        result = lowercase( result )
        myhash = result
        
      end

PHP 4/5

function OpenSubtitlesHash($file)
{
    $handle = fopen($file, "rb");
    $fsize = filesize($file);
    
    $hash = array(3 => 0, 
                  2 => 0, 
                  1 => ($fsize >> 16) & 0xFFFF, 
                  0 => $fsize & 0xFFFF);
        
    for ($i = 0; $i < 8192; $i++)
    {
        $tmp = ReadUINT64($handle);
        $hash = AddUINT64($hash, $tmp);
    }
    
    $offset = $fsize - 65536;
    fseek($handle, $offset > 0 ? $offset : 0, SEEK_SET);
    
    for ($i = 0; $i < 8192; $i++)
    {
        $tmp = ReadUINT64($handle);
        $hash = AddUINT64($hash, $tmp);         
    }
    
    fclose($handle);
        return UINT64FormatHex($hash);
}

function ReadUINT64($handle)
{
    $u = unpack("va/vb/vc/vd", fread($handle, 8));
    return array(0 => $u["a"], 1 => $u["b"], 2 => $u["c"], 3 => $u["d"]);
}

function AddUINT64($a, $b)
{
    $o = array(0 => 0, 1 => 0, 2 => 0, 3 => 0);

    $carry = 0;
    for ($i = 0; $i < 4; $i++) 
    {
        if (($a[$i] + $b[$i] + $carry) > 0xffff ) 
        {
            $o[$i] += ($a[$i] + $b[$i] + $carry) & 0xffff;
            $carry = 1;
        }
        else 
        {
            $o[$i] += ($a[$i] + $b[$i] + $carry);
            $carry = 0;
        }
    }
    
    return $o;   
}

function UINT64FormatHex($n)
{   
    return sprintf("%04x%04x%04x%04x", $n[3], $n[2], $n[1], $n[0]);
}

Perl

#!/usr/bin/perl
use strict;
use warnings;

print OpenSubtitlesHash('breakdance.avi');

sub OpenSubtitlesHash {
        my $filename = shift or die("Need video filename");

        open my $handle, "<", $filename or die $!;
        binmode $handle;

        my $fsize = -s $filename;

        my $hash = [$fsize & 0xFFFF, ($fsize >> 16) & 0xFFFF, 0, 0];

        $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192);

    my $offset = $fsize - 65536;
    seek($handle, $offset > 0 ? $offset : 0, 0) or die $!;

    $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192);

    close $handle or die $!;
    return UINT64FormatHex($hash);
}

sub ReadUINT64 {
        read($_[0], my $u, 8);
        return [unpack("vvvv", $u)];
}

sub AddUINT64 {
    my $o = [0,0,0,0];
    my $carry = 0;
    for my $i (0..3) {
        if (($_[0]->[$i] + $_[1]->[$i] + $carry) > 0xffff ) {
                        $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry) & 0xffff;
                        $carry = 1;
                } else {
                        $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry);
                        $carry = 0;
                }
        }
    return $o;
}

sub UINT64FormatHex {
    return sprintf("%04x%04x%04x%04x", $_[0]->[3], $_[0]->[2], $_[0]->[1], $_[0]->[0]);
}

Ruby

This is a quick translation/transliteration of the Perl script.

class Hasher

  def open_subtitles_hash(filename)
    raise "Need video filename" unless filename

    fh = File.open(filename)
    fsize = File.size(filename)

    hash = [fsize & 0xffff, (fsize >> 16) & 0xffff, 0, 0]

    8192.times { hash = add_unit_64(hash, read_uint_64(fh)) }

    offset = fsize - 65536
    fh.seek([0,offset].max, 0)

    8192.times { hash = add_unit_64(hash, read_uint_64(fh)) }

    fh.close

    return uint_64_format_hex(hash)
  end

  def read_uint_64(stream)
    stream.read(8).unpack("vvvv")
  end

  def add_unit_64(hash, input)
    res = [0,0,0,0]
    carry = 0

    hash.zip(input).each_with_index do |(h,i),n|
      sum = h + i + carry
      if sum > 0xffff
        res[n] += sum & 0xffff
        carry = 1
      else
        res[n] += sum
        carry = 0
      end
    end
    return res
  end

  def uint_64_format_hex(hash)
    sprintf("%04x%04x%04x%04x", *hash.reverse)
  end
end

if __FILE__ == $0
  require 'test/unit'

  class HashTester < Test::Unit::TestCase
    def setup
      @h = Hasher.new
    end

    def test_test_file_hash
      assert_equal("8e245d9679d31e12", @h.open_subtitles_hash('breakdance.avi'))
    end
  end
end


Another more "rubyesque" implementation.

module MovieHasher

  CHUNK_SIZE = 64 * 1024 # in bytes

  def self.compute_hash(filename)
    filesize = File.size(filename)
    hash = filesize

    # Read 64 kbytes, divide up into 64 bits and add each
    # to hash. Do for beginning and end of file.
    File.open(filename, 'rb') do |f|    
      # Q = unsigned long long = 64 bit
      f.read(CHUNK_SIZE).unpack("Q*").each do |n|
        hash = hash + n & 0xffffffffffffffff # to remain as 64 bit number
      end

      f.seek([0, filesize - CHUNK_SIZE].max, IO::SEEK_SET)

      # And again for the end of the file
      f.read(CHUNK_SIZE).unpack("Q*").each do |n|
        hash = hash + n & 0xffffffffffffffff
      end
    end

    sprintf("%016x", hash)
  end
end

if __FILE__ == $0
  require 'test/unit'

  class MovieHasherTest < Test::Unit::TestCase
    def test_compute_hash
      assert_equal("8e245d9679d31e12", MovieHasher::compute_hash('breakdance.avi'))
    end

    def test_compute_hash_large_file
      assert_equal("61f7751fc2a72bfb", MovieHasher::compute_hash('dummy.bin'))
    end
  end
end

Haskell

import IO(bracket)
import System.Environment(getArgs)
import System.IO(openBinaryFile,hClose,hFileSize,hSeek,IOMode(ReadMode),SeekMode(AbsoluteSeek,SeekFromEnd))
import qualified Data.ByteString.Lazy as L(hGet,unpack)
import Data.Binary.Get(runGet,getWord64le)
import Data.Binary.Put(runPut,putWord64le)
import Data.Word(Word64)
import Control.Monad(foldM)
import Data.Bits.Utils(w82s)
import Data.Hex(hex)

shortsum :: FilePath -> IO Word64
shortsum filename = bracket (openBinaryFile filename ReadMode) hClose $ \h -> do
  fs <- hFileSize h
  hSeek h AbsoluteSeek 0 ; begin <- L.hGet h chunksize
  hSeek h SeekFromEnd (-(toInteger chunksize)) ; end <- L.hGet h chunksize
  return $ (flip runGet $ begin) $ chunksum $ (flip runGet $ end) (chunksum . fromInteger $ fs)
  where
    chunksize = 0x10000
    chunksum n = foldM (\a _ -> getWord64le >>= return . (+a)) n [1..(chunksize`div`8)]

main :: IO ()
main = do
  args <- getArgs
  let fn = head $ args
  p <- shortsum fn
  putStrLn $ "The hash of file " ++ fn ++ ": " ++ (hex $ w82s $ reverse (L.unpack $ runPut $ putWord64le p))

AutoIT

Forum entry

#cs
	Hash code is based on Media Player Classic. It calculates: size + 64bit
	checksum of the first and last 64k (even if they overlap because the file is smaller than 128k).
	Authors: Authenticity & Emanuel "Datenshi" Lindgren @ AutoIT Forums.
        AutoIT v3.3.2.0
#ce
Func _Compute_Hash($sFileName)
	Local $hFile, $tRet, $tTmp, $iFileSize, $iRead, $iChunk, $iI
	$hFile = FileOpen($sFileName, 16)
	If Not $hFile Then Return SetError(1, 0, 0)
	$iFileSize = FileGetSize($sFileName)
	$iChunk = 65536
	If $iFileSize < $iChunk * 2 Then
		FileClose($hFile)
		Return SetError(2, 0, 0)
	EndIf
	$tRet = DllStructCreate("uint64")
	$tTmp = DllStructCreate("uint64")
	DllStructSetData($tRet, 1, $iFileSize)
	For $iI = 0 To ($iChunk / 8) - 1
		DllStructSetData($tTmp, 1, FileRead($hFile, 8))
		DllStructSetData($tRet, 1, DllStructGetData($tRet, 1) + DllStructGetData($tTmp, 1))
	Next
	FileSetPos($hFile, $iFileSize - $iChunk, 0)
	For $iI = 0 To ($iChunk / 8) - 1
		DllStructSetData($tTmp, 1, FileRead($hFile, 8))
		DllStructSetData($tRet, 1, DllStructGetData($tRet, 1) + DllStructGetData($tTmp, 1))
	Next
	FileClose($hFile)
	Return SetError(0, 0, _HEX(DllStructGetData($tRet, 1)))
EndFunc
Func _HEX($iValue)
	Return StringFormat("%#.8x%.8x", $iValue / 4294967296, $iValue)
EndFunc

FoxPro

PARAMETERS cfile
PRIVATE ALL 

*******
* enviroment setup
*******
cret=''
glTalk=(SET("TALK")="ON")

IF vartype(cfile)<>'C'
	cfile='breakdance.avi'
ENDIF



IF glTalk
	? cfile
	? cfile=''
	? LEN(cfile)
endif


nfile=FOPEN(cfile)
nsize=FSEEK(nfile,0,2)

IF gltalk
	? cfile	
	? 'size?>'
	?? nsize
endif
FSEEK(nfile,0,0)

******
* length reencode to 64 uint
*****
chash=hashsize(nsize)
cempty=chr(0)
cret=''
IF LEN(chash)<8
	FOR i=1 TO 8-LEN(chash)
		cret=cret+cempty
	ENDFOR
ENDIF
cret=cret+chash
nSum=0

*******
* first 64kb
******


	FOR i=1 TO 8192
		cpom=FREAD(nfile,8)
		cpom=reverse(cpom)
		nSum=nSum+LEN(cpom)
		IF gltalk
				do buildhex WITH cret
				?? '+'
				DO buildhex WITH cpom
				? '='
		ENDIF
		cret=adint64(cret,cpom)
	ENDFOR

*******
* last 64kb
*******

	FSEEK(nfile,-65536,2)
	FOR i=1 TO 8192
		cpom=FREAD(nfile,8)
		cpom=reverse(cpom)
		cret=adint64(cret,cpom)
		nSum=nSum+LEN(cpom)
	ENDFOR
FCLOSE(nfile)

****
* build hexa
****
IF gltalk

	DO buildhex WITH cret
	?
	? 'Spocital som'
	?? nSum
ENDIF
RETURN buildhex(cret)

FUNCTION reverse
PARAMETERS cstring
PRIVATE ALL 
cret=''
FOR i=1 TO LEN(cstring)
	cret=cret+SUBSTR(cstring,LEN(cstring)-i+1,1)
ENDFOR
RETURN cret

FUNCTION buildhex
PARAMETERS cstring,lkam
PRIVATE ALL 
gcTalk=SET("TALK")
cret=''
FOR i=1 TO LEN(cstring)
	cpom=dec2basx(ASC(SUBSTR(cstring,i,1)),16)
	IF LEN(cpom)<2
		cout='0'+cpom
		cpom=cout
	ENDIF
	
	cret=cret+cpom
	IF gcTALK="ON"
		?? cpom
		?? ':'
	ENDIF
ENDFOR
RETURN cret

FUNCTION adint64
PARAMETERS cstring1,cstring2
PRIVATE ALL 
DIMENSION car (8,1) as Character

***
* 8 bytes both
***
nincrement=0
cret=''
FOR i=8 TO 1 STEP -1
	nfir=ASC(SUBSTR(cstring1,i,1))
	nsec=ASC(SUBSTR(cstring2,i,1))
	nout=nincrement+nfir+nsec
	IF nout>255
		nincrement=INT(nout/256)
		nout=nout-(nincrement*256)
	ELSE
		nincrement=0
	ENDIF
	car(i)=CHR(nout)
ENDFOR
FOR i=1 TO 8
	cret=cret+car(i)
ENDFOR
RETURN cret


FUNCTION hashsize

PARAMETERS ncislo
PRIVATE ALL 
cret=''
creverse=''
DO WHILE .t.
	npom=INT(ncislo/256)
	npom2=ncislo-npom*256
	creverse=creverse+CHR(npom2)
	ncislo=npom
	IF ncislo=0
		EXIT
	ENDIF
ENDDO
FOR i=1 TO LEN(creverse)
	cret=cret+SUBSTR(creverse,LEN(creverse)-i+1,1)
ENDFOR 
RETURN cret


*..............................................................................
*   Function: DEC2BASX
*    Purpose:  Convert whole number 0-?, to base 2-16 
*
* Parameters: nTempNum - number to convert (0-9007199254740992)
*             base    - base to convert to i.e., 2 4 8 16...
*    returns: string
*      Usage:  cresult=Dec2BasX(nParm1, nParm2)
*              STORE Dec2BasX(255, 16) TO cMyString  &&... cMyString contains 'ff'
*..............................................................................
FUNCTION dec2basx
PARAMETERS nTempNum, nNewBase

STORE 0 TO nWorkVal,;
   remainder,;
   dividend,;
   nextnum,;
   digit

nWorkVal = nTempNum  
ret_str = ''

DO WHILE .T.
   digit = MOD(nWorkVal, nNewBase)
   dividend = nWorkVal / nNewBase
   nWorkVal = INT(dividend)

   DO CASE
      CASE digit = 10
         ret_str = 'a' + ret_str
      CASE digit = 11
         ret_str = 'b' + ret_str
      CASE digit = 12
         ret_str = 'c' + ret_str
      CASE digit = 13
         ret_str = 'd' + ret_str
      CASE digit = 14
         ret_str = 'e' + ret_str
      CASE digit = 15
         ret_str = 'f' + ret_str
      OTHERWISE
         ret_str = LTRIM(STR(digit)) + ret_str
   ENDCASE

   IF nWorkVal = 0
      EXIT
   ENDIF ( nWorkVal = 0 )
ENDDO ( .T. )
RETURN ret_str

Powershell 2.0

You can use GetHash.dll.

http://trac.opensubtitles.org/projects/opensubtitles/attachment/wiki/HashSourceCodes/GetHash.dll

Use Example:

	Add-Type -Path "GetHash.dll"

	function MovieHash([string]$path) {
		$hash = [GetHash.Main]	
		$hash::ToHexadecimal($hash::ComputeHash($path))
	}

	MovieHash $filename 

or without using GetHash.dll:

$dataLength = 65536

function LongSum([UInt64]$a, [UInt64]$b) { 
	[UInt64](([Decimal]$a + $b) % ([Decimal]([UInt64]::MaxValue) + 1)) 
}

function StreamHash([IO.Stream]$stream) {
	$hashLength = 8
	[UInt64]$lhash = 0
	[byte[]]$buffer = New-Object byte[] $hashLength
	$i = 0
	while ( ($i -lt ($dataLength / $hashLength)) -and ($stream.Read($buffer,0,$hashLength) -gt 0) ) {
		$i++
		$lhash = LongSum $lhash ([BitConverter]::ToUInt64($buffer,0))
	}
	$lhash
}

function MovieHash([string]$path) {
	try { 
		$stream = [IO.File]::OpenRead($path) 
		[UInt64]$lhash = $stream.Length
		$lhash = LongSum $lhash (StreamHash $stream)
		$stream.Position = [Math]::Max(0L, $stream.Length - $dataLength)
		$lhash = LongSum $lhash (StreamHash $stream)
		"{0:X}" -f $lhash
	}
	finally { $stream.Close() }
}

MovieHash $filename 

MASM

Calc_Hash proc uses esi ebx edx pFile:dword, pBuf:dword

	LOCAL hFile:dword, fSize:dword, NBR:dword, pMem:dword
	
	invoke CreateFile,pFile,GENERIC_ALL,0,0,OPEN_EXISTING,0,0
	mov hFile,eax	
	cmp eax,INVALID_HANDLE_VALUE
	jz @Error
	
	invoke SetFilePointer,hFile,0,NULL,FILE_END
	mov fSize,eax
	push eax
	
	invoke GlobalAlloc,GPTR,131072
	mov pMem,eax
	
	invoke SetFilePointer,hFile,0,NULL,FILE_BEGIN
	invoke ReadFile,hFile,pMem,65536,addr NBR,NULL
	
	sub fSize,65536
	add pMem,65536
	
	invoke SetFilePointer,hFile,fSize,NULL,FILE_BEGIN
	invoke ReadFile,hFile,pMem,65536,addr NBR,NULL
	
	sub pMem,65536
	mov esi,pMem
	mov ecx,131072
	pop eax
	mov edx,eax
	push eax

	@@:
	add edx,[esi]
	adc ebx,[esi+4]
	add esi,8
	sub ecx,8
	jnz @B
	
	push edx
	push ebx
	invoke wsprintf,pBuf,addr HashFormat
	pop eax
	pop eax
	
	invoke CloseHandle,hFile
	invoke GlobalFree,pMem
	pop ecx
	
	@Error: ; If error eax returns (INVALID_HANDLE_VALUE)
	
	; Hash value is copied to pBuf
	; eax returns Movie Filesize
	
	ret
	
Calc_Hash endp

Objective-C

This is implementation of hash for Objective-C for Mac by subsmarine.com

OSHashAlgorithm.m

#import "OSHashAlgorithm.h"


@implementation OSHashAlgorithm

+(NSString*)stringForHash:(uint64_t)hash
{
        return [[NSString stringWithFormat:@"%qx", hash ] autorelease];
}
+(VideoHash)hashForPath:(NSString*)path
{
        VideoHash hash;
        hash.fileHash =0;
        hash.fileSize =0;
        
        NSFileHandle *readFile = [NSFileHandle fileHandleForReadingAtPath:path];
        hash = [OSHashAlgorithm hashForFile:readFile];
        [readFile closeFile];
        return hash;    
}
+(VideoHash)hashForURL:(NSURL*)url
{
        VideoHash hash;
        hash.fileHash =0;
        hash.fileSize =0;
        
        NSFileHandle *readfile = [NSFileHandle fileHandleForReadingFromURL:url error:NULL];
        hash = [OSHashAlgorithm hashForFile:readfile];
        return hash;
}

+(VideoHash)hashForFile:(NSFileHandle*)handle
{
        VideoHash retHash;
        retHash.fileHash =0;
        retHash.fileSize =0;
        
        if( handle == nil )
                return retHash;
        
        const NSUInteger CHUNK_SIZE=65536;
        NSData *fileDataBegin, *fileDataEnd;
        uint64_t hash=0;
        
        
        fileDataBegin = [handle readDataOfLength:(NSUInteger)CHUNK_SIZE];
        [handle seekToEndOfFile];
        unsigned long long fileSize = [handle offsetInFile];
        if(fileSize < CHUNK_SIZE )
                return retHash;
        
        [handle seekToFileOffset:MAX(0,fileSize-CHUNK_SIZE) ];
        fileDataEnd = [handle readDataOfLength:(NSUInteger)CHUNK_SIZE];
        
        //
        // Calculate hash
        //
        
        // 1st. File size
        hash += fileSize;
        // 2nd. Begining data block
        uint64_t * data_bytes= (uint64_t*)[fileDataBegin bytes]; 
        for( int i=0; i< CHUNK_SIZE/sizeof(uint64_t); i++ )
                hash+=data_bytes[i];;
        // 3rd. Ending data block
        data_bytes= (uint64_t*)[fileDataEnd bytes]; 
        for( int i=0; i< CHUNK_SIZE/sizeof(uint64_t); i++ )
                hash+= data_bytes[i];
        
        retHash.fileHash = hash;
        retHash.fileSize = fileSize;
        
        return retHash;
        
}

@end

OSHashAlgorithm.h

#import <Cocoa/Cocoa.h>

typedef struct 
{
        uint64_t fileHash;
        uint64_t fileSize;
} VideoHash;

@interface OSHashAlgorithm : NSObject {

}
+(VideoHash)hashForPath:(NSString*)path;
+(VideoHash)hashForURL:(NSURL*)url;
+(VideoHash)hashForFile:(NSFileHandle*)handle;
+(NSString*)stringForHash:(uint64_t)hash;

@end

Vala

public uint64 hash(File file) {
	try {
		uint64 h;
		
		//get filesize and add it to hash
		var file_info = file.query_info("*", FileQueryInfoFlags.NONE);
		h = file_info.get_size();
		
		//add first 64kB of file to hash
		var dis = new DataInputStream(file.read());
		dis.set_byte_order(DataStreamByteOrder.LITTLE_ENDIAN);
		for(int i=0; i<65536/sizeof(uint64); i++) {
			h += dis.read_uint64();
		}
		//add last 64kB of file to hash
		dis = new DataInputStream(file.read());
		dis.set_byte_order(DataStreamByteOrder.LITTLE_ENDIAN);
		dis.skip((size_t)(file_info.get_size() - 65536));
		for(int i=0; i<65536/sizeof(uint64); i++) {
			h += dis.read_uint64();
		}
		
		return h;
	} catch (Error e) {
        error("%s", e.message);
    }
}

int main () {
    var file = File.new_for_path ("breakdance.avi");
    if (!file.query_exists ()) {
        stderr.printf ("File '%s' doesn't exist.\n", file.get_path ());
        return 1;
    }
    stdout.printf("%016llx\n", hash(file));
    
    file = File.new_for_path ("dummy.bin");
    if (!file.query_exists ()) {
        stderr.printf ("File '%s' doesn't exist.\n", file.get_path ());
        return 1;
    }
    stdout.printf("%016llx\n", hash(file));

    return 0;
}

Build with: valac --pkg gio-2.0 hash.vala

AutoHotKey

#NoEnv
SetBatchLines, -1
; http://www.opensubti.../breakdance.avi
; OpenSubtitles Hash = 8E245D9679D31E12
FilePath := "Breakdance.avi"
MsgBox, 0, OpenSubtitlesHash, % Filepath . ":`r`n" . GetOpenSubtitlesHash(FilePath)
ExitApp


; ==================================================================================================
GetOpenSubtitlesHash(FilePath) {
   ; http://trac.opensubt...HashSourceCodes
   Static X := { 0: "0",  1: "1",  2: "2",  3: "3",  4: "4",  5: "5",  6: "6",  7: "7"
              ,  8: "8",  9: "9", 10: "A", 11: "B", 12: "C", 13: "D", 14: "E", 15: "F"}
   ; Check the file size ---------------------------------------------------------------------------
   ; 9000000000 > $moviebytesize >= 131072 bytes (changed > to  >= for the lower limit)
   FileGetSize, FileSize, %FilePath%
   If (FileSize < 131072) || (FileSize >= 9000000000)
      Return ""
   ; Read the first and last 64 KB -----------------------------------------------------------------
   VarSetCapacity(FileParts, 131072)         ; allocate sufficient memory
   File := FileOpen(FilePath, "r")           ; open the file
   File.Seek(0, 0)                           ; set the file pointer (just for balance)
   File.RawRead(FileParts, 65536)            ; read the first 64 KB
   File.Seek(-65536, 2)                      ; set the file pointer for the last 64 KB
   File.RawRead(&FileParts + 65536, 65536)   ; read the last 64 KB
   File.Close()                              ; got all we need, so the file can be closed
   ; Now calculate the hash using two UINTs for the low- and high-order parts of an UINT64 ---------
   LoUINT := FileSize & 0xFFFFFFFF           ; store low-order UINT of file size
   HiUINT := FileSize >> 32                  ; store high-order UINT of file size
   Offset := -4                              ; to allow adding 4 on first iteration
   Loop, 16384 {                             ; 131072 / 8
      LoUINT += NumGet(FileParts, Offset += 4, "UInt") ; add first UINT value to low-order UINT
      HiUINT += NumGet(FileParts, Offset += 4, "UInt") ; add second UINT value to high-order UINT
   }
   ; Adjust the probable overflow of the low-order UINT
   HiUINT += LoUINT >> 32                    ; add the overflow to the high-order UINT
   LoUINT &= 0xFFFFFFFF                      ; remove the overflow from the low-order UINT
   ; Now get the hex string, i.e. the hash ---------------------------------------------------------
   Hash := ""
   VarSetCapacity(UINT64, 8, 0)
   NumPut((HiUINT << 32) | LoUINT, UINT64, 0, "UInt64")
   Loop, 8
      Hash .= X[(Byte := NumGet(UINT64, 8 - A_Index, "UChar")) >> 4] . X[Byte & 0x0F]
   Return Hash
}
; ==================================================================================================

Lisp

; opensubtitle hash, common lisp, sbcl
; sean langton 2013

(defun get-lvalue(stream)
  (let ((c)(n 0)(m 1))
    (loop for x from 0 to 7 do
(setf c (read-byte stream))
(setf n (+ n (* c m)))
(setf m (* m 256))
) n))
  
(defun hashfile(path)
  (let ((hash '(unsigned-byte 64))(len))
    (with-open-file (in path :element-type '(unsigned-byte 8))
      (setf len (file-length in))
      (setf hash len)

      (cond ((< len (* 2 65536)) 
    (print "file too small to hash")
    (return-from hashfile nil)))

      (loop for x from 0 to 8191  do
  (setf hash (logand (+ hash (get-lvalue in)) #xFFFFFFFFFFFFFFFF )))

      (file-position in (- len 65536))

      (loop for x from 0 to 8191  do
  (setf hash (logand (+ hash (get-lvalue in)) #xFFFFFFFFFFFFFFFF )))

      (format t "~&~16,'0x" hash))))

; (hashfile #p"~/Downloads/breakdance.avi")
; (hashfile #p"~/Downloads/dummy/dummy.bin")

Pascal

procedure ComputeHash(const Stream : TStream;
                      out   Size : qword;
                      out   Hash : string);
var
  hashQ : qword;
  fsize : qword;
  i : integer;
  read : integer;
  s : array[0..7] of char;
  tmp : qword absolute s;
begin
  Stream.Seek(0, soFromBeginning);
  Size := Stream.Size;
  hashQ := size;;

  i := 0;
  read := 1;
  while ((i < 8192) and (read > 0)) do begin
    read := Stream.Read(s, sizeof(s));
    if read > 0 then begin
      hashQ := hashQ + tmp;
    end;
    i := i + 1;
  end;

  Stream.Seek(-65536, soFromEnd);

  i := 0;
  read := 1;
  while ((i < 8192) and (read > 0)) do begin
    read := Stream.Read(s, sizeof(s));
    if read > 0 then begin
      hashQ := hashQ + tmp;
    end;
    i := i + 1;
  end;

  Hash := lowercase(Format('%.16x',[hashQ]));
end;

Scala

import java.io.{FileInputStream, File}
import java.nio.{LongBuffer, ByteOrder, ByteBuffer}
import java.nio.channels.FileChannel.MapMode
import scala.math._

class OpenSubtitlesHasher {
  private val hashChunkSize = 64L * 1024L

  def computeHash(file: File) : String = {
    val fileSize = file.length
    val chunkSizeForFile = min(fileSize, hashChunkSize)

    val fileChannel = new FileInputStream(file).getChannel

    try {
      val head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile))
      val tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, max(fileSize - hashChunkSize, 0), chunkSizeForFile))

      "%016x".format(fileSize + head + tail)
    } finally {
      fileChannel.close()
    }
  }

  private def computeHashForChunk(buffer: ByteBuffer) : Long = {
    def doCompute(longBuffer: LongBuffer, hash: Long) : Long = {
      longBuffer.hasRemaining match {
        case false => hash
        case true => doCompute(longBuffer, hash + longBuffer.get)
      }
    }
    val longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer()
    doCompute(longBuffer, 0L)
  }
}

Javascript

This implementation is WRONG, for files >= 2GB it counts WRONG hash, for example for dummy avi file it is 61f7751ec2a72bfb instead 61f7751fc2a72bfb - please send us correct code. Thanks!

   	        var b = 65536;        
                fs = file.size;
				var e = Array(8);
			    for (var a = fs, f = 0; f < 8; f++) {
			        e[f] = a & 255;
			        a = a >> 8
			    }
			    a = fs;    


			
			    var c = file.slice(0, b);
			    var g = new FileReader;   

				
			    g.onloadend = function (h) {				
			        if (h.target.readyState == FileReader.DONE) {
				
			            for (var f = h.target.result, d = 0; d < f.length; d++) e[(d + 8) % 8] += f.charCodeAt(d);
			            c = file.slice(a - b);
			            var g = new FileReader;   
			
			            g.onloadend = function (c) {
			                var b = "languages";
			                if (c.target.readyState == FileReader.DONE) {
			                    f = c.target.result;
			                    for (d = 0; d < f.length; d++) e[(d + 8) % 8] += f.charCodeAt(d);  
								lang = $("#SubLanguageID").multiselect("getChecked").map(function(){return this.value}).get().join(",");
								if (lang == "") lang = "all";
								var url = "http://www.opensubtitles.org/" + $(location).attr('pathname').substr(1,2) + "/search/sublanguageid-" + lang + "/moviehash-" + binl2hex(e) + "/moviebytesize-" + fs + "/dragsearch-on";
			                    document.location = url;
			                }
			            };
			            g.readAsBinaryString(c)
			        }
			    };
			    g.readAsBinaryString(c)  

function binl2hex(a) {
    var b = 255;
    a[1] += a[0] >> 8;
    a[0] = a[0] & b;
    a[2] += a[1] >> 8;
    a[1] = a[1] & b;
    a[3] += a[2] >> 8;
    a[2] = a[2] & b;
    a[4] += a[3] >> 8;
    a[3] = a[3] & b;
    a[5] += a[4] >> 8;
    a[4] = a[4] & b;
    a[6] += a[5] >> 8;
    a[5] = a[5] & b;
    a[7] += a[6] >> 8;
    a[6] = a[6] & b;
    a[7] = a[7] & b;
    for (var d = "0123456789abcdef", e = "", c = 7; c > -1; c--) e += d.charAt(a[c] >> 4 & 15) + d.charAt(a[c] & 15);
    return e
}                 

Attachments (1)

Download all attachments as: .zip

Note: See TracWiki for help on using the wiki.