wiki:HashSourceCodes

Version 53 (modified by Administrator, 11 years ago) ( diff )

--

OpenSubtitles.org is using special hash function to match subtitle files against movie files. Hash is not dependent on file name of movie file. Read about basics of hashing functions.

Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit chksum of the first and last 64k (even if they overlap because the file is smaller than 128k). On opensubtitles.org is movie file size limited to 9000000000 > $moviebytesize > 131072 bytes, if is there any reason to change these sizes, let us know. Licence of hashing source codes is GPL. Source codes was tested on Little Endian - DEC, Intel and compatible

Feel free to edit/add source-codes if you have faster/better implementation. Also don't forget to check, if hash is right for test. Test these 2 files please to ensure your algo is completely OK:

  • AVI file (12 909 756 bytes)
    • hash: 8e245d9679d31e12
  • DUMMY RAR file (2 565 922 bytes, 4 295 033 890 after RAR unpacking, test on UNPACKED file)
    • hash: 61f7751fc2a72bfb (for UNPACKED file)

C

#include <stdio.h>
#include <stdlib.h>

#define MAX(x,y) (((x) > (y)) ? (x) : (y))
#ifndef uint64_t
#define uint64_t unsigned long long
#endif

uint64_t compute_hash(FILE * handle)
{
        uint64_t hash, fsize;

        fseek(handle, 0, SEEK_END);
        fsize = ftell(handle);
        fseek(handle, 0, SEEK_SET);

        hash = fsize;

        for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++);
        fseek(handle, (long)MAX(0, fsize - 65536), SEEK_SET);
        for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++);
        
        return hash;
}

int main(int argc, char *argv)
{
        FILE * handle;
        uint64_t myhash;

        handle = fopen("breakdance.avi", "rb");
        
        if (!handle) 
        {
                printf("Error openning file!");
                return 1;
        }

        myhash = compute_hash(handle);  
        printf("%I64x", myhash);

        fclose(handle);
        return 0;
}

C - Public Domain License

#include <stdio.h>
#include <stdlib.h>

unsigned long long analizefileOSHahs(char *fileName){
 /*
  * Public Domain implementation by Kamil Dziobek. turbos11(at)gmail.com
  * This code implements Gibest hash algorithm first use in Media Player Classics
  * For more implementation(various languages and authors) see:
  * http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes   
  *
  * -works only on little-endian procesor DEC, Intel and compatible
  * -sizeof(unsigned long long) must be 8
  */
 
  FILE        *file;
  int i;
  unsigned long long t1=0;
  unsigned long long buffer1[8192*2];
  file = fopen(fileName, "rb");
  fread(buffer1, 8192, 8, file);
  fseek(file, -65536, SEEK_END);
  fread(&buffer1[8192], 8192, 8, file); 
  for (i=0;i<8192*2;i++)
    t1+=buffer1[i];
  t1+= ftell(file); //add filesize
  fclose(file); 
  return  t1;
};
int main(int argc, char *argv){
  unsigned long long myhash=analizefileOSHahs("C://tomaszkokowskizoofiliamovies.avi");
  printf("hash is %16I64x",myhash);
}

C++

 #include <iostream>
 #include <fstream> 
 
 typedef unsigned __int64 uint64_t;
 using namespace std;
 
 int MAX(int x, int y)
 {  
        if((x) > (y)) 
                return x;
        else    
                return y;
 }
 
 uint64_t compute_hash(ifstream& f)
 {
        uint64_t hash, fsize;
 
        f.seekg(0, ios::end);
        fsize = f.tellg();
        f.seekg(0, ios::beg);
 
        hash = fsize;
        for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp);
        f.seekg(MAX(0, (uint64_t)fsize - 65536), ios::beg);
        for(tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp);
        return hash;
 } 
 
 int main(int argc, char *argv)
 {
        ifstream f;
        uint64_t myhash;
 
        f.open("c:\\test.avi", ios::in|ios::binary|ios::ate);
        if (!f.is_open()) {
           cerr << "Error opening file" << endl;
           return 1;
        }
 
        myhash = compute_hash(f);
        cout << setw(16) << setfill('0') << hex << myhash;
 
        f.close();
        return 0;
 }

About C and C++ implementation

This only work on little-endian processor: DEC, Intel and compatible

Java

/**
 * Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit
 * checksum of the first and last 64k (even if they overlap because the file is smaller than
 * 128k).
 */
public class OpenSubtitlesHasher {
        
        /**
         * Size of the chunks that will be hashed in bytes (64 KB)
         */
        private static final int HASH_CHUNK_SIZE = 64 * 1024;
        
        
        public static String computeHash(File file) throws IOException {
                long size = file.length();
                long chunkSizeForFile = Math.min(HASH_CHUNK_SIZE, size);
                
                FileChannel fileChannel = new FileInputStream(file).getChannel();
                
                try {
                        long head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile));
                        long tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, Math.max(size - HASH_CHUNK_SIZE, 0), chunkSizeForFile));
                        
                        return String.format("%016x", size + head + tail);
                } finally {
                        fileChannel.close();
                }
        }
        

        public static String computeHash(InputStream stream, long length) throws IOException {
                
                int chunkSizeForFile = (int) Math.min(HASH_CHUNK_SIZE, length);
                
                // buffer that will contain the head and the tail chunk, chunks will overlap if length is smaller than two chunks
                byte[] chunkBytes = new byte[(int) Math.min(2 * HASH_CHUNK_SIZE, length)];
                
                DataInputStream in = new DataInputStream(stream);
                
                // first chunk
                in.readFully(chunkBytes, 0, chunkSizeForFile);
                
                long position = chunkSizeForFile;
                long tailChunkPosition = length - chunkSizeForFile;
                
                // seek to position of the tail chunk, or not at all if length is smaller than two chunks
                while (position < tailChunkPosition && (position += in.skip(tailChunkPosition - position)) >= 0);
                
                // second chunk, or the rest of the data if length is smaller than two chunks
                in.readFully(chunkBytes, chunkSizeForFile, chunkBytes.length - chunkSizeForFile);
                
                long head = computeHashForChunk(ByteBuffer.wrap(chunkBytes, 0, chunkSizeForFile));
                long tail = computeHashForChunk(ByteBuffer.wrap(chunkBytes, chunkBytes.length - chunkSizeForFile, chunkSizeForFile));
                
                return String.format("%016x", length + head + tail);
        }
        

        private static long computeHashForChunk(ByteBuffer buffer) {
                
                LongBuffer longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer();
                long hash = 0;
                
                while (longBuffer.hasRemaining()) {
                        hash += longBuffer.get();
                }
                
                return hash;
        }
        
}

C#

You can use GetHash.dll.

http://trac.opensubtitles.org/projects/opensubtitles/attachment/wiki/HashSourceCodes/GetHash.dll

Use Example:

    private void openFileDialog1_FileOk(object sender, CancelEventArgs e)
        {
            byte[] hash = GetHash.Main.ComputeHash(openFileDialog1.FileName);
            label1.Text =  GetHash.Main.ToHexadecimal(hash);

        }

or without using GetHash.dll:

using System;
using System.Text;
using System.IO;
   
namespace MovieHasher
{
    class Program
    {
        private static byte[] ComputeMovieHash(string filename)
        {
            byte[] result;
            using (Stream input = File.OpenRead(filename))
            {
                result = ComputeMovieHash(input);
            }
            return result;
        }
 
        private static byte[] ComputeMovieHash(Stream input)
        {
            long lhash, streamsize;
            streamsize = input.Length;
            lhash = streamsize;
 
            long i = 0;
            byte[] buffer = new byte[sizeof(long)];
            while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0))
            {
                i++;
                lhash += BitConverter.ToInt64(buffer, 0);
            }
 
            input.Position = Math.Max(0, streamsize - 65536);
            i = 0;
            while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0))
            {
                i++;
                lhash += BitConverter.ToInt64(buffer, 0);
            }
            input.Close();
            byte[] result = BitConverter.GetBytes(lhash);
            Array.Reverse(result);
            return result;
        }
 
        private static string ToHexadecimal(byte[] bytes)
        {
            StringBuilder hexBuilder = new StringBuilder();
            for(int i = 0; i < bytes.Length; i++)
            {
                hexBuilder.Append(bytes[i].ToString("x2"));
            }
            return hexBuilder.ToString();
        }
 
        static void Main(string[] args)
        {
            byte[] moviehash = ComputeMovieHash(@"C:\test.avi");
            Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash));
        }
    }
}

If you get overflow error read this.

VB.Net

Imports System
Imports System.Text
Imports System.IO
'Note: you must remove integer overflow checking.

Namespace MovieHasher
        Class Program
                Private Shared Function ComputeMovieHash(ByVal filename As String) As Byte()
                        Dim result As Byte()
                        Using input As Stream = File.OpenRead(filename)
                                result = ComputeMovieHash(input)
                        End Using
                        Return result
                End Function
                
                Private Function ComputeMovieHash(ByVal input As Stream) As Byte()
                        Dim lhash As System.Int64, streamsize As Long
                        streamsize = input.Length
                        lhash = streamsize
                        
                        Dim i As Long = 0
                        Dim buffer As Byte() = New Byte(Marshal.SizeOf(GetType(Long)) - 1) {}
                        While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0)
                                i += 1
                                
                                lhash += BitConverter.ToInt64(buffer, 0)
                        End While
                        
                        input.Position = Math.Max(0, streamsize - 65536)
                        i = 0
                        While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0)
                                i += 1
                                lhash += BitConverter.ToInt64(buffer, 0)
                        End While
                        input.Close()
                        Dim result As Byte() = BitConverter.GetBytes(lhash)
                        Array.Reverse(result)
                        Return result
                End Function
                
                Private Shared Function ToHexadecimal(ByVal bytes As Byte()) As String
                        Dim hexBuilder As New StringBuilder()
                        For i As Integer = 0 To bytes.Length - 1
                                hexBuilder.Append(bytes(i).ToString("x2"))
                        Next
                        Return hexBuilder.ToString()
                End Function
                
                Private Shared Sub Main(ByVal args As String())
                        Dim moviehash As Byte() = ComputeMovieHash("C:\test.avi")
                        Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash))
                End Sub
        End Class
End Namespace

Python

import struct, os

def hashFile(name): 
      try: 
                 
                longlongformat = 'q'  # long long 
                bytesize = struct.calcsize(longlongformat) 
                    
                f = open(name, "rb") 
                    
                filesize = os.path.getsize(name) 
                hash = filesize 
                    
                if filesize < 65536 * 2: 
                       return "SizeError" 
                 
                for x in range(65536/bytesize): 
                        buffer = f.read(bytesize) 
                        (l_value,)= struct.unpack(longlongformat, buffer)  
                        hash += l_value 
                        hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number  
                         
    
                f.seek(max(0,filesize-65536),0) 
                for x in range(65536/bytesize): 
                        buffer = f.read(bytesize) 
                        (l_value,)= struct.unpack(longlongformat, buffer)  
                        hash += l_value 
                        hash = hash & 0xFFFFFFFFFFFFFFFF 
                 
                f.close() 
                returnedhash =  "%016x" % hash 
                return returnedhash 
    
      except(IOError): 
                return "IOError"

Delphi

This is just a quick conversion of Gabest's original C code. Anyone who can come up with a cleaner code, please feel free to do so and post here.

function CalcGabestHash(const fname: string): string;
var
  i : integer;
  s : array[1..8] of char;
  tmp       : Int64 absolute s;
  hash      : Int64;
  readed    : integer;

  aStream: TFileStream;
begin
  result := '';
  if not FileExists(fname) then Exit;

  aStream := TFileStream.Create(fName, fmShareDenyNone);
  hash := aStream.Size;

  i := 0; readed := 1;
  while ((i < 8192) and (readed > 0)) do begin
    readed := aStream.Read(s, sizeof(s));
    if readed > 0 then
    begin
      hash := hash + tmp;
    end;
    i := i + 1;
  end;

  aStream.Seek(-65536, soFromEnd); // 65536

  i := 0; readed:= 1;
  while ((i < 8192) and (readed > 0)) do begin
    readed := aStream.Read(s, sizeof(s));
    if readed > 0 then
      hash := hash + tmp;
    i := i + 1;
  end;
  aStream.Free;
  result := Format('%.16x',[hash]);
end;

alternate version by TRP

unction CalcGabestHash(const Stream: TStream): Int64; overload;
const HashPartSize = 1 shl 16; // 64 KiB

  procedure UpdateHashFromStream(const Stream: TStream; var Hash:
Int64); inline;
  var buffer: Array[0..HashPartSize div SizeOf(Int64) - 1] of Int64;
      i     : integer;
  begin
    Stream.ReadBuffer(buffer[0], SizeOf(buffer));
    for i := Low(buffer) to High(buffer) do
      Inc(Hash, buffer[i]);
  end;

begin
  result:= Stream.Size;

  if result < HashPartSize then
  begin
    // stream too small return invalid hash
    result:= 0;
    exit;
  end;

  // first 64 KiB
  Stream.Position:= 0;
  UpdateHashFromStream(Stream, result);

  // last 64 KiB
  Stream.Seek(-HashPartSize, soEnd);
  UpdateHashFromStream(Stream, result);

  // use "IntToHex(result, 16);" to get a string and "StrToInt64('$' +
hash);" to get your Int64 back
end;

function CalcGabestHash(const FileName: TFileName): Int64; overload;
var stream: TStream;
begin
  stream:= TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite);
  try
    result:= CalcGabestHash(stream);
  finally
    stream.Free;
  end;
end;

Lua

-- will produce a correct hash regardless of architecture (big vs little endian)
local function movieHash(fileName)
        local fil = io.open(fileName, "rb")
        local lo,hi=0,0
        for i=1,8192 do
                local a,b,c,d = fil:read(4):byte(1,4)
                lo = lo + a + b*256 + c*65536 + d*16777216
                a,b,c,d = fil:read(4):byte(1,4)
                hi = hi + a + b*256 + c*65536 + d*16777216
                while lo>=4294967296 do
                        lo = lo-4294967296
                        hi = hi+1
                end
                while hi>=4294967296 do
                        hi = hi-4294967296
                end
        end
        local size = fil:seek("end", -65536) + 65536
        for i=1,8192 do
                local a,b,c,d = fil:read(4):byte(1,4)
                lo = lo + a + b*256 + c*65536 + d*16777216
                a,b,c,d = fil:read(4):byte(1,4)
                hi = hi + a + b*256 + c*65536 + d*16777216
                while lo>=4294967296 do
                        lo = lo-4294967296
                        hi = hi+1
                end
                while hi>=4294967296 do
                        hi = hi-4294967296
                end
        end
        lo = lo + size
                while lo>=4294967296 do
                        lo = lo-4294967296
                        hi = hi+1
                end
                while hi>=4294967296 do
                        hi = hi-4294967296
                end
        fil:close()
        return string.format("%08x%08x", hi,lo), size
end

print("breakdance.avi:")
print(movieHash("breakdance.avi"))
print("8e245d9679d31e12 <- should be")
print("")
print("dummy.rar:")
print(movieHash("dummy.rar"))
print("61f7751fc2a72bfb <- should be according to wiki")
print("2a527d74d45f5b1b <- what other hash tools actually report")

RealBasic/Xojo

Combined routine that will calculate a fast hash for videofiles over 65K and a normal md5 for subtitles

    dim b as BinaryStream
    dim mb as MemoryBlock
    
    dim hash,bytesize as UINT64
    dim i, x, chunksize, filelen, difference as integer
    
    hash = 0 //Reset Hash
    difference = 0
    
    if f <> nil and f.Exists then
      b= f.OpenAsBinaryFile
      hash = b.Length
      bytesize = b.Length
      bytesizestr = str(bytesize)
      
      if bytesize >= 65536 and routine = "video" then
        chunksize = 65536
        mb = b.Read(65536)
        mb.LittleEndian = True
        
        for i= 0 to chunksize -1 step 8
          hash = hash+ mb.UINT64Value(i)
        next
        
        b.Position = max(b.Length-chunksize, 0)
        mb= b.Read(chunksize)
        mb.LittleEndian = True
        
        for i= 0 to chunksize -1 step 8
          hash = hash+ mb.UINT64Value(i)
        next

        
        myhash = Lowercase(str(hex(hash)))
        
      elseif routine = "subtitle" then
        
        dim c,result as string
        mb = md5(b.Read(b.Length))
        mb.LittleEndian = True
        
        for i = 0 to mb.size-1
          x = mb.byte( i )
          c = right( "00"+hex( x ), 2 )
          result = result + c
        next
        result = lowercase( result )
        myhash = result
        
      end

PHP 4/5

function OpenSubtitlesHash($file)
{
    $handle = fopen($file, "rb");
    $fsize = filesize($file);
    
    $hash = array(3 => 0, 
                  2 => 0, 
                  1 => ($fsize >> 16) & 0xFFFF, 
                  0 => $fsize & 0xFFFF);
        
    for ($i = 0; $i < 8192; $i++)
    {
        $tmp = ReadUINT64($handle);
        $hash = AddUINT64($hash, $tmp);
    }
    
    $offset = $fsize - 65536;
    fseek($handle, $offset > 0 ? $offset : 0, SEEK_SET);
    
    for ($i = 0; $i < 8192; $i++)
    {
        $tmp = ReadUINT64($handle);
        $hash = AddUINT64($hash, $tmp);         
    }
    
    fclose($handle);
        return UINT64FormatHex($hash);
}

function ReadUINT64($handle)
{
    $u = unpack("va/vb/vc/vd", fread($handle, 8));
    return array(0 => $u["a"], 1 => $u["b"], 2 => $u["c"], 3 => $u["d"]);
}

function AddUINT64($a, $b)
{
    $o = array(0 => 0, 1 => 0, 2 => 0, 3 => 0);

    $carry = 0;
    for ($i = 0; $i < 4; $i++) 
    {
        if (($a[$i] + $b[$i] + $carry) > 0xffff ) 
        {
            $o[$i] += ($a[$i] + $b[$i] + $carry) & 0xffff;
            $carry = 1;
        }
        else 
        {
            $o[$i] += ($a[$i] + $b[$i] + $carry);
            $carry = 0;
        }
    }
    
    return $o;   
}

function UINT64FormatHex($n)
{   
    return sprintf("%04x%04x%04x%04x", $n[3], $n[2], $n[1], $n[0]);
}

Perl

#!/usr/bin/perl
use strict;
use warnings;

print OpenSubtitlesHash('breakdance.avi');

sub OpenSubtitlesHash {
        my $filename = shift or die("Need video filename");

        open my $handle, "<", $filename or die $!;
        binmode $handle;

        my $fsize = -s $filename;

        my $hash = [$fsize & 0xFFFF, ($fsize >> 16) & 0xFFFF, 0, 0];

        $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192);

    my $offset = $fsize - 65536;
    seek($handle, $offset > 0 ? $offset : 0, 0) or die $!;

    $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192);

    close $handle or die $!;
    return UINT64FormatHex($hash);
}

sub ReadUINT64 {
        read($_[0], my $u, 8);
        return [unpack("vvvv", $u)];
}

sub AddUINT64 {
    my $o = [0,0,0,0];
    my $carry = 0;
    for my $i (0..3) {
        if (($_[0]->[$i] + $_[1]->[$i] + $carry) > 0xffff ) {
                        $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry) & 0xffff;
                        $carry = 1;
                } else {
                        $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry);
                        $carry = 0;
                }
        }
    return $o;
}

sub UINT64FormatHex {
    return sprintf("%04x%04x%04x%04x", $_[0]->[3], $_[0]->[2], $_[0]->[1], $_[0]->[0]);
}

Ruby

This is a quick translation/transliteration of the Perl script.

class Hasher

  def open_subtitles_hash(filename)
    raise "Need video filename" unless filename

    fh = File.open(filename)
    fsize = File.size(filename)

    hash = [fsize & 0xffff, (fsize >> 16) & 0xffff, 0, 0]

    8192.times { hash = add_unit_64(hash, read_uint_64(fh)) }

    offset = fsize - 65536
    fh.seek([0,offset].max, 0)

    8192.times { hash = add_unit_64(hash, read_uint_64(fh)) }

    fh.close

    return uint_64_format_hex(hash)
  end

  def read_uint_64(stream)
    stream.read(8).unpack("vvvv")
  end

  def add_unit_64(hash, input)
    res = [0,0,0,0]
    carry = 0

    hash.zip(input).each_with_index do |(h,i),n|
      sum = h + i + carry
      if sum > 0xffff
        res[n] += sum & 0xffff
        carry = 1
      else
        res[n] += sum
        carry = 0
      end
    end
    return res
  end

  def uint_64_format_hex(hash)
    sprintf("%04x%04x%04x%04x", *hash.reverse)
  end
end

if __FILE__ == $0
  require 'test/unit'

  class HashTester < Test::Unit::TestCase
    def setup
      @h = Hasher.new
    end

    def test_test_file_hash
      assert_equal("8e245d9679d31e12", @h.open_subtitles_hash('breakdance.avi'))
    end
  end
end


Another more "rubyesque" implementation.

module MovieHasher

  CHUNK_SIZE = 64 * 1024 # in bytes

  def self.compute_hash(filename)
    filesize = File.size(filename)
    hash = filesize

    # Read 64 kbytes, divide up into 64 bits and add each
    # to hash. Do for beginning and end of file.
    File.open(filename, 'rb') do |f|    
      # Q = unsigned long long = 64 bit
      f.read(CHUNK_SIZE).unpack("Q*").each do |n|
        hash = hash + n & 0xffffffffffffffff # to remain as 64 bit number
      end

      f.seek([0, filesize - CHUNK_SIZE].max, IO::SEEK_SET)

      # And again for the end of the file
      f.read(CHUNK_SIZE).unpack("Q*").each do |n|
        hash = hash + n & 0xffffffffffffffff
      end
    end

    sprintf("%016x", hash)
  end
end

if __FILE__ == $0
  require 'test/unit'

  class MovieHasherTest < Test::Unit::TestCase
    def test_compute_hash
      assert_equal("8e245d9679d31e12", MovieHasher::compute_hash('breakdance.avi'))
    end

    def test_compute_hash_large_file
      assert_equal("61f7751fc2a72bfb", MovieHasher::compute_hash('dummy.bin'))
    end
  end
end

Haskell

import IO(bracket)
import System.Environment(getArgs)
import System.IO(openBinaryFile,hClose,hFileSize,hSeek,IOMode(ReadMode),SeekMode(AbsoluteSeek,SeekFromEnd))
import qualified Data.ByteString.Lazy as L(hGet,unpack)
import Data.Binary.Get(runGet,getWord64le)
import Data.Binary.Put(runPut,putWord64le)
import Data.Word(Word64)
import Control.Monad(foldM)
import Data.Bits.Utils(w82s)
import Data.Hex(hex)

shortsum :: FilePath -> IO Word64
shortsum filename = bracket (openBinaryFile filename ReadMode) hClose $ \h -> do
  fs <- hFileSize h
  hSeek h AbsoluteSeek 0 ; begin <- L.hGet h chunksize
  hSeek h SeekFromEnd (-(toInteger chunksize)) ; end <- L.hGet h chunksize
  return $ (flip runGet $ begin) $ chunksum $ (flip runGet $ end) (chunksum . fromInteger $ fs)
  where
    chunksize = 0x10000
    chunksum n = foldM (\a _ -> getWord64le >>= return . (+a)) n [1..(chunksize`div`8)]

main :: IO ()
main = do
  args <- getArgs
  let fn = head $ args
  p <- shortsum fn
  putStrLn $ "The hash of file " ++ fn ++ ": " ++ (hex $ w82s $ reverse (L.unpack $ runPut $ putWord64le p))

AutoIT

Forum entry

#cs
	Hash code is based on Media Player Classic. It calculates: size + 64bit
	checksum of the first and last 64k (even if they overlap because the file is smaller than 128k).
	Authors: Authenticity & Emanuel "Datenshi" Lindgren @ AutoIT Forums.
        AutoIT v3.3.2.0
#ce
Func _Compute_Hash($sFileName)
	Local $hFile, $tRet, $tTmp, $iFileSize, $iRead, $iChunk, $iI
	$hFile = FileOpen($sFileName, 16)
	If Not $hFile Then Return SetError(1, 0, 0)
	$iFileSize = FileGetSize($sFileName)
	$iChunk = 65536
	If $iFileSize < $iChunk * 2 Then
		FileClose($hFile)
		Return SetError(2, 0, 0)
	EndIf
	$tRet = DllStructCreate("uint64")
	$tTmp = DllStructCreate("uint64")
	DllStructSetData($tRet, 1, $iFileSize)
	For $iI = 0 To ($iChunk / 8) - 1
		DllStructSetData($tTmp, 1, FileRead($hFile, 8))
		DllStructSetData($tRet, 1, DllStructGetData($tRet, 1) + DllStructGetData($tTmp, 1))
	Next
	FileSetPos($hFile, $iFileSize - $iChunk, 0)
	For $iI = 0 To ($iChunk / 8) - 1
		DllStructSetData($tTmp, 1, FileRead($hFile, 8))
		DllStructSetData($tRet, 1, DllStructGetData($tRet, 1) + DllStructGetData($tTmp, 1))
	Next
	FileClose($hFile)
	Return SetError(0, 0, _HEX(DllStructGetData($tRet, 1)))
EndFunc
Func _HEX($iValue)
	Return StringFormat("%#.8x%.8x", $iValue / 4294967296, $iValue)
EndFunc

FoxPro

PARAMETERS cfile
PRIVATE ALL 

*******
* enviroment setup
*******
cret=''
glTalk=(SET("TALK")="ON")

IF vartype(cfile)<>'C'
	cfile='breakdance.avi'
ENDIF



IF glTalk
	? cfile
	? cfile=''
	? LEN(cfile)
endif


nfile=FOPEN(cfile)
nsize=FSEEK(nfile,0,2)

IF gltalk
	? cfile	
	? 'size?>'
	?? nsize
endif
FSEEK(nfile,0,0)

******
* length reencode to 64 uint
*****
chash=hashsize(nsize)
cempty=chr(0)
cret=''
IF LEN(chash)<8
	FOR i=1 TO 8-LEN(chash)
		cret=cret+cempty
	ENDFOR
ENDIF
cret=cret+chash
nSum=0

*******
* first 64kb
******


	FOR i=1 TO 8192
		cpom=FREAD(nfile,8)
		cpom=reverse(cpom)
		nSum=nSum+LEN(cpom)
		IF gltalk
				do buildhex WITH cret
				?? '+'
				DO buildhex WITH cpom
				? '='
		ENDIF
		cret=adint64(cret,cpom)
	ENDFOR

*******
* last 64kb
*******

	FSEEK(nfile,-65536,2)
	FOR i=1 TO 8192
		cpom=FREAD(nfile,8)
		cpom=reverse(cpom)
		cret=adint64(cret,cpom)
		nSum=nSum+LEN(cpom)
	ENDFOR
FCLOSE(nfile)

****
* build hexa
****
IF gltalk

	DO buildhex WITH cret
	?
	? 'Spocital som'
	?? nSum
ENDIF
RETURN buildhex(cret)

FUNCTION reverse
PARAMETERS cstring
PRIVATE ALL 
cret=''
FOR i=1 TO LEN(cstring)
	cret=cret+SUBSTR(cstring,LEN(cstring)-i+1,1)
ENDFOR
RETURN cret

FUNCTION buildhex
PARAMETERS cstring,lkam
PRIVATE ALL 
gcTalk=SET("TALK")
cret=''
FOR i=1 TO LEN(cstring)
	cpom=dec2basx(ASC(SUBSTR(cstring,i,1)),16)
	IF LEN(cpom)<2
		cout='0'+cpom
		cpom=cout
	ENDIF
	
	cret=cret+cpom
	IF gcTALK="ON"
		?? cpom
		?? ':'
	ENDIF
ENDFOR
RETURN cret

FUNCTION adint64
PARAMETERS cstring1,cstring2
PRIVATE ALL 
DIMENSION car (8,1) as Character

***
* 8 bytes both
***
nincrement=0
cret=''
FOR i=8 TO 1 STEP -1
	nfir=ASC(SUBSTR(cstring1,i,1))
	nsec=ASC(SUBSTR(cstring2,i,1))
	nout=nincrement+nfir+nsec
	IF nout>255
		nincrement=INT(nout/256)
		nout=nout-(nincrement*256)
	ELSE
		nincrement=0
	ENDIF
	car(i)=CHR(nout)
ENDFOR
FOR i=1 TO 8
	cret=cret+car(i)
ENDFOR
RETURN cret


FUNCTION hashsize

PARAMETERS ncislo
PRIVATE ALL 
cret=''
creverse=''
DO WHILE .t.
	npom=INT(ncislo/256)
	npom2=ncislo-npom*256
	creverse=creverse+CHR(npom2)
	ncislo=npom
	IF ncislo=0
		EXIT
	ENDIF
ENDDO
FOR i=1 TO LEN(creverse)
	cret=cret+SUBSTR(creverse,LEN(creverse)-i+1,1)
ENDFOR 
RETURN cret


*..............................................................................
*   Function: DEC2BASX
*    Purpose:  Convert whole number 0-?, to base 2-16 
*
* Parameters: nTempNum - number to convert (0-9007199254740992)
*             base    - base to convert to i.e., 2 4 8 16...
*    returns: string
*      Usage:  cresult=Dec2BasX(nParm1, nParm2)
*              STORE Dec2BasX(255, 16) TO cMyString  &&... cMyString contains 'ff'
*..............................................................................
FUNCTION dec2basx
PARAMETERS nTempNum, nNewBase

STORE 0 TO nWorkVal,;
   remainder,;
   dividend,;
   nextnum,;
   digit

nWorkVal = nTempNum  
ret_str = ''

DO WHILE .T.
   digit = MOD(nWorkVal, nNewBase)
   dividend = nWorkVal / nNewBase
   nWorkVal = INT(dividend)

   DO CASE
      CASE digit = 10
         ret_str = 'a' + ret_str
      CASE digit = 11
         ret_str = 'b' + ret_str
      CASE digit = 12
         ret_str = 'c' + ret_str
      CASE digit = 13
         ret_str = 'd' + ret_str
      CASE digit = 14
         ret_str = 'e' + ret_str
      CASE digit = 15
         ret_str = 'f' + ret_str
      OTHERWISE
         ret_str = LTRIM(STR(digit)) + ret_str
   ENDCASE

   IF nWorkVal = 0
      EXIT
   ENDIF ( nWorkVal = 0 )
ENDDO ( .T. )
RETURN ret_str

Powershell 2.0

You can use GetHash.dll.

http://trac.opensubtitles.org/projects/opensubtitles/attachment/wiki/HashSourceCodes/GetHash.dll

Use Example:

	Add-Type -Path "GetHash.dll"

	function MovieHash([string]$path) {
		$hash = [GetHash.Main]	
		$hash::ToHexadecimal($hash::ComputeHash($path))
	}

	MovieHash $filename 

or without using GetHash.dll:

$dataLength = 65536

function LongSum([UInt64]$a, [UInt64]$b) { 
	[UInt64](([Decimal]$a + $b) % ([Decimal]([UInt64]::MaxValue) + 1)) 
}

function StreamHash([IO.Stream]$stream) {
	$hashLength = 8
	[UInt64]$lhash = 0
	[byte[]]$buffer = New-Object byte[] $hashLength
	$i = 0
	while ( ($i -lt ($dataLength / $hashLength)) -and ($stream.Read($buffer,0,$hashLength) -gt 0) ) {
		$i++
		$lhash = LongSum $lhash ([BitConverter]::ToUInt64($buffer,0))
	}
	$lhash
}

function MovieHash([string]$path) {
	try { 
		$stream = [IO.File]::OpenRead($path) 
		[UInt64]$lhash = $stream.Length
		$lhash = LongSum $lhash (StreamHash $stream)
		$stream.Position = [Math]::Max(0L, $stream.Length - $dataLength)
		$lhash = LongSum $lhash (StreamHash $stream)
		"{0:X}" -f $lhash
	}
	finally { $stream.Close() }
}

MovieHash $filename 

MASM

Calc_Hash proc uses esi ebx edx pFile:dword, pBuf:dword

	LOCAL hFile:dword, fSize:dword, NBR:dword, pMem:dword
	
	invoke CreateFile,pFile,GENERIC_ALL,0,0,OPEN_EXISTING,0,0
	mov hFile,eax	
	cmp eax,INVALID_HANDLE_VALUE
	jz @Error
	
	invoke SetFilePointer,hFile,0,NULL,FILE_END
	mov fSize,eax
	push eax
	
	invoke GlobalAlloc,GPTR,131072
	mov pMem,eax
	
	invoke SetFilePointer,hFile,0,NULL,FILE_BEGIN
	invoke ReadFile,hFile,pMem,65536,addr NBR,NULL
	
	sub fSize,65536
	add pMem,65536
	
	invoke SetFilePointer,hFile,fSize,NULL,FILE_BEGIN
	invoke ReadFile,hFile,pMem,65536,addr NBR,NULL
	
	sub pMem,65536
	mov esi,pMem
	mov ecx,131072
	pop eax
	mov edx,eax
	push eax

	@@:
	add edx,[esi]
	adc ebx,[esi+4]
	add esi,8
	sub ecx,8
	jnz @B
	
	push edx
	push ebx
	invoke wsprintf,pBuf,addr HashFormat
	pop eax
	pop eax
	
	invoke CloseHandle,hFile
	invoke GlobalFree,pMem
	pop ecx
	
	@Error: ; If error eax returns (INVALID_HANDLE_VALUE)
	
	; Hash value is copied to pBuf
	; eax returns Movie Filesize
	
	ret
	
Calc_Hash endp

Objective-C

This is implementation of hash for Objective-C for Mac by subsmarine.com

OSHashAlgorithm.m

#import "OSHashAlgorithm.h"


@implementation OSHashAlgorithm

+(NSString*)stringForHash:(uint64_t)hash
{
        return [[NSString stringWithFormat:@"%qx", hash ] autorelease];
}
+(VideoHash)hashForPath:(NSString*)path
{
        VideoHash hash;
        hash.fileHash =0;
        hash.fileSize =0;
        
        NSFileHandle *readFile = [NSFileHandle fileHandleForReadingAtPath:path];
        hash = [OSHashAlgorithm hashForFile:readFile];
        [readFile closeFile];
        return hash;    
}
+(VideoHash)hashForURL:(NSURL*)url
{
        VideoHash hash;
        hash.fileHash =0;
        hash.fileSize =0;
        
        NSFileHandle *readfile = [NSFileHandle fileHandleForReadingFromURL:url error:NULL];
        hash = [OSHashAlgorithm hashForFile:readfile];
        return hash;
}

+(VideoHash)hashForFile:(NSFileHandle*)handle
{
        VideoHash retHash;
        retHash.fileHash =0;
        retHash.fileSize =0;
        
        if( handle == nil )
                return retHash;
        
        const NSUInteger CHUNK_SIZE=65536;
        NSData *fileDataBegin, *fileDataEnd;
        uint64_t hash=0;
        
        
        fileDataBegin = [handle readDataOfLength:(NSUInteger)CHUNK_SIZE];
        [handle seekToEndOfFile];
        unsigned long long fileSize = [handle offsetInFile];
        if(fileSize < CHUNK_SIZE )
                return retHash;
        
        [handle seekToFileOffset:MAX(0,fileSize-CHUNK_SIZE) ];
        fileDataEnd = [handle readDataOfLength:(NSUInteger)CHUNK_SIZE];
        
        //
        // Calculate hash
        //
        
        // 1st. File size
        hash += fileSize;
        // 2nd. Begining data block
        uint64_t * data_bytes= (uint64_t*)[fileDataBegin bytes]; 
        for( int i=0; i< CHUNK_SIZE/sizeof(uint64_t); i++ )
                hash+=data_bytes[i];;
        // 3rd. Ending data block
        data_bytes= (uint64_t*)[fileDataEnd bytes]; 
        for( int i=0; i< CHUNK_SIZE/sizeof(uint64_t); i++ )
                hash+= data_bytes[i];
        
        retHash.fileHash = hash;
        retHash.fileSize = fileSize;
        
        return retHash;
        
}

@end

OSHashAlgorithm.h

#import <Cocoa/Cocoa.h>

typedef struct 
{
        uint64_t fileHash;
        uint64_t fileSize;
} VideoHash;

@interface OSHashAlgorithm : NSObject {

}
+(VideoHash)hashForPath:(NSString*)path;
+(VideoHash)hashForURL:(NSURL*)url;
+(VideoHash)hashForFile:(NSFileHandle*)handle;
+(NSString*)stringForHash:(uint64_t)hash;

@end

Vala

public uint64 hash(File file) {
	try {
		uint64 h;
		
		//get filesize and add it to hash
		var file_info = file.query_info("*", FileQueryInfoFlags.NONE);
		h = file_info.get_size();
		
		//add first 64kB of file to hash
		var dis = new DataInputStream(file.read());
		dis.set_byte_order(DataStreamByteOrder.LITTLE_ENDIAN);
		for(int i=0; i<65536/sizeof(uint64); i++) {
			h += dis.read_uint64();
		}
		//add last 64kB of file to hash
		dis = new DataInputStream(file.read());
		dis.set_byte_order(DataStreamByteOrder.LITTLE_ENDIAN);
		dis.skip((size_t)(file_info.get_size() - 65536));
		for(int i=0; i<65536/sizeof(uint64); i++) {
			h += dis.read_uint64();
		}
		
		return h;
	} catch (Error e) {
        error("%s", e.message);
    }
}

int main () {
    var file = File.new_for_path ("breakdance.avi");
    if (!file.query_exists ()) {
        stderr.printf ("File '%s' doesn't exist.\n", file.get_path ());
        return 1;
    }
    stdout.printf("%016llx\n", hash(file));
    
    file = File.new_for_path ("dummy.bin");
    if (!file.query_exists ()) {
        stderr.printf ("File '%s' doesn't exist.\n", file.get_path ());
        return 1;
    }
    stdout.printf("%016llx\n", hash(file));

    return 0;
}

Build with: valac --pkg gio-2.0 hash.vala

AutoHotKey

#NoEnv
SetBatchLines, -1
; http://www.opensubti.../breakdance.avi
; OpenSubtitles Hash = 8E245D9679D31E12
FilePath := "Breakdance.avi"
MsgBox, 0, OpenSubtitlesHash, % Filepath . ":`r`n" . GetOpenSubtitlesHash(FilePath)
ExitApp


; ==================================================================================================
GetOpenSubtitlesHash(FilePath) {
   ; http://trac.opensubt...HashSourceCodes
   Static X := { 0: "0",  1: "1",  2: "2",  3: "3",  4: "4",  5: "5",  6: "6",  7: "7"
              ,  8: "8",  9: "9", 10: "A", 11: "B", 12: "C", 13: "D", 14: "E", 15: "F"}
   ; Check the file size ---------------------------------------------------------------------------
   ; 9000000000 > $moviebytesize >= 131072 bytes (changed > to  >= for the lower limit)
   FileGetSize, FileSize, %FilePath%
   If (FileSize < 131072) || (FileSize >= 9000000000)
      Return ""
   ; Read the first and last 64 KB -----------------------------------------------------------------
   VarSetCapacity(FileParts, 131072)         ; allocate sufficient memory
   File := FileOpen(FilePath, "r")           ; open the file
   File.Seek(0, 0)                           ; set the file pointer (just for balance)
   File.RawRead(FileParts, 65536)            ; read the first 64 KB
   File.Seek(-65536, 2)                      ; set the file pointer for the last 64 KB
   File.RawRead(&FileParts + 65536, 65536)   ; read the last 64 KB
   File.Close()                              ; got all we need, so the file can be closed
   ; Now calculate the hash using two UINTs for the low- and high-order parts of an UINT64 ---------
   LoUINT := FileSize & 0xFFFFFFFF           ; store low-order UINT of file size
   HiUINT := FileSize >> 32                  ; store high-order UINT of file size
   Offset := -4                              ; to allow adding 4 on first iteration
   Loop, 16384 {                             ; 131072 / 8
      LoUINT += NumGet(FileParts, Offset += 4, "UInt") ; add first UINT value to low-order UINT
      HiUINT += NumGet(FileParts, Offset += 4, "UInt") ; add second UINT value to high-order UINT
   }
   ; Adjust the probable overflow of the low-order UINT
   HiUINT += LoUINT >> 32                    ; add the overflow to the high-order UINT
   LoUINT &= 0xFFFFFFFF                      ; remove the overflow from the low-order UINT
   ; Now get the hex string, i.e. the hash ---------------------------------------------------------
   Hash := ""
   VarSetCapacity(UINT64, 8, 0)
   NumPut((HiUINT << 32) | LoUINT, UINT64, 0, "UInt64")
   Loop, 8
      Hash .= X[(Byte := NumGet(UINT64, 8 - A_Index, "UChar")) >> 4] . X[Byte & 0x0F]
   Return Hash
}
; ==================================================================================================

Lisp

; opensubtitle hash, common lisp, sbcl
; sean langton 2013

(defun get-lvalue(stream)
  (let ((c)(n 0)(m 1))
    (loop for x from 0 to 7 do
(setf c (read-byte stream))
(setf n (+ n (* c m)))
(setf m (* m 256))
) n))
  
(defun hashfile(path)
  (let ((hash '(unsigned-byte 64))(len))
    (with-open-file (in path :element-type '(unsigned-byte 8))
      (setf len (file-length in))
      (setf hash len)

      (cond ((< len (* 2 65536)) 
    (print "file too small to hash")
    (return-from hashfile nil)))

      (loop for x from 0 to 8191  do
  (setf hash (logand (+ hash (get-lvalue in)) #xFFFFFFFFFFFFFFFF )))

      (file-position in (- len 65536))

      (loop for x from 0 to 8191  do
  (setf hash (logand (+ hash (get-lvalue in)) #xFFFFFFFFFFFFFFFF )))

      (format t "~&~16,'0x" hash))))

; (hashfile #p"~/Downloads/breakdance.avi")
; (hashfile #p"~/Downloads/dummy/dummy.bin")

Pascal

procedure ComputeHash(const Stream : TStream;
                      out   Size : qword;
                      out   Hash : string);
var
  hashQ : qword;
  fsize : qword;
  i : integer;
  read : integer;
  s : array[0..7] of char;
  tmp : qword absolute s;
begin
  Stream.Seek(0, soFromBeginning);
  Size := Stream.Size;
  hashQ := size;;

  i := 0;
  read := 1;
  while ((i < 8192) and (read > 0)) do begin
    read := Stream.Read(s, sizeof(s));
    if read > 0 then begin
      hashQ := hashQ + tmp;
    end;
    i := i + 1;
  end;

  Stream.Seek(-65536, soFromEnd);

  i := 0;
  read := 1;
  while ((i < 8192) and (read > 0)) do begin
    read := Stream.Read(s, sizeof(s));
    if read > 0 then begin
      hashQ := hashQ + tmp;
    end;
    i := i + 1;
  end;

  Hash := lowercase(Format('%.16x',[hashQ]));
end;

Scala

import java.io.{FileInputStream, File}
import java.nio.{LongBuffer, ByteOrder, ByteBuffer}
import java.nio.channels.FileChannel.MapMode
import scala.math._

class OpenSubtitlesHasher {
  private val hashChunkSize = 64L * 1024L

  def computeHash(file: File) : String = {
    val fileSize = file.length
    val chunkSizeForFile = min(fileSize, hashChunkSize)

    val fileChannel = new FileInputStream(file).getChannel

    try {
      val head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile))
      val tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, max(fileSize - hashChunkSize, 0), chunkSizeForFile))

      "%016x".format(fileSize + head + tail)
    } finally {
      fileChannel.close()
    }
  }

  private def computeHashForChunk(buffer: ByteBuffer) : Long = {
    def doCompute(longBuffer: LongBuffer, hash: Long) : Long = {
      longBuffer.hasRemaining match {
        case false => hash
        case true => doCompute(longBuffer, hash + longBuffer.get)
      }
    }
    val longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer()
    doCompute(longBuffer, 0L)
  }
}

Javascript

Download example. Key is using Javascript LONG library. Thanks to Oscar Brito for fixing the code. Check also https://github.com/ka2er/node-opensubtitles-api/blob/master/lib/opensubtitles.js#L36

//be sure Long.min.js is already included
/*
 * Calculate OpenSubtitles hash
 * (Oscar Brito - aetheon@gmail.com)
 *
 * @param {File} file - a File obj contained on a DataTransfer
 * @param {Function} onComplete - the result callback
 */
var OpenSubtitlesHash = function(file, onComplete){

    var HASH_CHUNK_SIZE = 64 * 1024;
    if(file.size<HASH_CHUNK_SIZE)
        HASH_CHUNK_SIZE = file.size;


    // sum chunk long values
    var sumChunk = function(arrayBuffer){

        var view = new DataView(arrayBuffer);
        var hNumber = new dcodeIO.Long();

        for(var i=0; i<arrayBuffer.byteLength; i+=8){

            var low = view.getUint32(i, true);
            var high = view.getUint32(i+4, true);

            var n = new dcodeIO.Long(low, high);
            hNumber = hNumber.add(n);
        }

        return hNumber;

    };


    // read chunk
    var readChunk = function(start, end, callback){

        var reader = new FileReader();
        reader.onload = function(e){ 
            
            // sum all long values on the chunk
            var number = sumChunk(e.currentTarget.result);
            
            if(callback)
                callback(number);

        }

        var blob = file.slice(start, end);
        reader.readAsArrayBuffer(blob);
    };


    // read the first chunk
    readChunk(0, HASH_CHUNK_SIZE, function(head){

        // read the tail chunk
        var start = file.size-HASH_CHUNK_SIZE;
        if(start < 0)
            start = 0;

        readChunk(start, file.size, function(tail){

            // sum all values            
            var sum = head.add(tail).add(new dcodeIO.Long(file.size));
            // convert to hex
            var sumHex = sum.toString(16);

            if(onComplete) 
                onComplete(sumHex);

        });

    });
    
};
                  

// TODO
$(document).ready(function() {
	
	$('#search_field').bind('drop', function(e, ev) {
        e.preventDefault();
		var files = e.originalEvent.dataTransfer.files;
		$.each(files, function(index, file) {
			
            OpenSubtitlesHash(file, function(hash){

                // TODO
                document.write(hash);

            })

		});
	});

});

Attachments (1)

Download all attachments as: .zip

Note: See TracWiki for help on using the wiki.