wiki:HashSourceCodes

Version 35 (modified by guest, 13 years ago) (diff)

Edited the VB.Net example to use System.Decimal this is a 96bit number and will avoid issues with buffer overflow.

Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit chksum of the first and last 64k (even if they overlap because the file is smaller than 128k). On opensubtitles.org is movie file size limited to 9000000000 > $moviebytesize > 131072 bytes, if is there any reason to change these sizes, let us know. Licence of hashing source codes is GPL. Source codes was tested on Little Endian - DEC, Intel and compatible

Feel free to edit/add source-codes if you have faster/better implementation. Also don't forget to check, if hash is right for test. Test these 2 files please to ensure your algo is completely OK:

  • AVI file (12 909 756 bytes)
    • hash: 8e245d9679d31e12
  • DUMMY RAR file (2 565 922 bytes, 4 295 033 890 after RAR unpacking)
    • hash: 61f7751fc2a72bfb

C

#include <stdio.h>
#include <stdlib.h>

#define MAX(x,y) (((x) > (y)) ? (x) : (y))
#ifndef uint64_t
#define uint64_t unsigned long long
#endif

uint64_t compute_hash(FILE * handle)
{
        uint64_t hash, fsize;

        fseek(handle, 0, SEEK_END);
        fsize = ftell(handle);
        fseek(handle, 0, SEEK_SET);

        hash = fsize;

        for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++);
        fseek(handle, (long)MAX(0, fsize - 65536), SEEK_SET);
        for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++);
        
        return hash;
}

int main(int argc, char *argv)
{
        FILE * handle;
        uint64_t myhash;

        handle = fopen("breakdance.avi", "rb");
        
        if (!handle) 
        {
                printf("Error openning file!");
                return 1;
        }

        myhash = compute_hash(handle);  
        printf("%I64x", myhash);

        fclose(handle);
        return 0;
}

C - Public Domain License

#include <stdio.h>
#include <stdlib.h>

unsigned long long analizefileOSHahs(char *fileName){
 /*
  * Public Domain implementation by Kamil Dziobek. turbos11(at)gmail.com
  * This code implements Gibest hash algorithm first use in Media Player Classics
  * For more implementation(various languages and authors) see:
  * http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes   
  *
  * -works only on little-endian procesor DEC, Intel and compatible
  * -sizeof(unsigned long long) must be 8
  */
 
  FILE        *file;
  int i;
  unsigned long long t1=0;
  unsigned long long buffer1[8192*2];
  file = fopen(fileName, "rb");
  fread(buffer1, 8192, 8, file);
  fseek(file, -65536, SEEK_END);
  fread(&buffer1[8192], 8192, 8, file); 
  for (i=0;i<8192*2;i++)
    t1+=buffer1[i];
  t1+= ftell(file); //add filesize
  fclose(file); 
  return  t1;
};
int main(int argc, char *argv){
  unsigned long long myhash=analizefileOSHahs("C://tomaszkokowskizoofiliamovies.avi");
  printf("hash is %16I64x",myhash);
}

C++

 #include <iostream>
 #include <fstream> 
 
 typedef unsigned __int64 uint64_t;
 using namespace std;
 
 int MAX(int x, int y)
 {  
        if((x) > (y)) 
                return x;
        else    
                return y;
 }
 
 uint64_t compute_hash(ifstream& f)
 {
        uint64_t hash, fsize;
 
        f.seekg(0, ios::end);
        fsize = f.tellg();
        f.seekg(0, ios::beg);
 
        hash = fsize;
        for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp);
        f.seekg(MAX(0, (uint64_t)fsize - 65536), ios::beg);
        for(tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp);
        return hash;
 } 
 
 int main(int argc, char *argv)
 {
        ifstream f;
        uint64_t myhash;
 
        f.open("c:\\test.avi", ios::in|ios::binary|ios::ate);
        if (!f.is_open()) {
           cerr << "Error opening file" << endl;
           return 1;
        }
 
        myhash = compute_hash(f);
        cout << setw(16) << setfill('0') << hex << myhash;
 
        f.close();
        return 0;
 }

About C and C++ implementation

This only work on little-endian processor: DEC, Intel and compatible

Java

/**
 * Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit
 * checksum of the first and last 64k (even if they overlap because the file is smaller than
 * 128k).
 */
public class OpenSubtitlesHasher {
        
        /**
         * Size of the chunks that will be hashed in bytes (64 KB)
         */
        private static final int HASH_CHUNK_SIZE = 64 * 1024;
        
        
        public static String computeHash(File file) throws IOException {
                long size = file.length();
                long chunkSizeForFile = Math.min(HASH_CHUNK_SIZE, size);
                
                FileChannel fileChannel = new FileInputStream(file).getChannel();
                
                try {
                        long head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile));
                        long tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, Math.max(size - HASH_CHUNK_SIZE, 0), chunkSizeForFile));
                        
                        return String.format("%016x", size + head + tail);
                } finally {
                        fileChannel.close();
                }
        }
        

        public static String computeHash(InputStream stream, long length) throws IOException {
                
                int chunkSizeForFile = (int) Math.min(HASH_CHUNK_SIZE, length);
                
                // buffer that will contain the head and the tail chunk, chunks will overlap if length is smaller than two chunks
                byte[] chunkBytes = new byte[(int) Math.min(2 * HASH_CHUNK_SIZE, length)];
                
                DataInputStream in = new DataInputStream(stream);
                
                // first chunk
                in.readFully(chunkBytes, 0, chunkSizeForFile);
                
                long position = chunkSizeForFile;
                long tailChunkPosition = length - chunkSizeForFile;
                
                // seek to position of the tail chunk, or not at all if length is smaller than two chunks
                while (position < tailChunkPosition && (position += in.skip(tailChunkPosition - position)) >= 0);
                
                // second chunk, or the rest of the data if length is smaller than two chunks
                in.readFully(chunkBytes, chunkSizeForFile, chunkBytes.length - chunkSizeForFile);
                
                long head = computeHashForChunk(ByteBuffer.wrap(chunkBytes, 0, chunkSizeForFile));
                long tail = computeHashForChunk(ByteBuffer.wrap(chunkBytes, chunkBytes.length - chunkSizeForFile, chunkSizeForFile));
                
                return String.format("%016x", length + head + tail);
        }
        

        private static long computeHashForChunk(ByteBuffer buffer) {
                
                LongBuffer longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer();
                long hash = 0;
                
                while (longBuffer.hasRemaining()) {
                        hash += longBuffer.get();
                }
                
                return hash;
        }
        
}

C#

You can use GetHash?.dll.

http://trac.opensubtitles.org/projects/opensubtitles/attachment/wiki/HashSourceCodes/GetHash.dll

Use Example:

    private void openFileDialog1_FileOk(object sender, CancelEventArgs e)
        {
            byte[] hash = GetHash.Main.ComputeHash(openFileDialog1.FileName);
            label1.Text =  GetHash.Main.ToHexadecimal(hash);

        }

or without using GetHash?.dll:

using System;
using System.Text;
using System.IO;
   
namespace MovieHasher
{
    class Program
    {
        private static byte[] ComputeMovieHash(string filename)
        {
            byte[] result;
            using (Stream input = File.OpenRead(filename))
            {
                result = ComputeMovieHash(input);
            }
            return result;
        }
 
        private static byte[] ComputeMovieHash(Stream input)
        {
            long lhash, streamsize;
            streamsize = input.Length;
            lhash = streamsize;
 
            long i = 0;
            byte[] buffer = new byte[sizeof(long)];
            while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0))
            {
                i++;
                lhash += BitConverter.ToInt64(buffer, 0);
            }
 
            input.Position = Math.Max(0, streamsize - 65536);
            i = 0;
            while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0))
            {
                i++;
                lhash += BitConverter.ToInt64(buffer, 0);
            }
            input.Close();
            byte[] result = BitConverter.GetBytes(lhash);
            Array.Reverse(result);
            return result;
        }
 
        private static string ToHexadecimal(byte[] bytes)
        {
            StringBuilder hexBuilder = new StringBuilder();
            for(int i = 0; i < bytes.Length; i++)
            {
                hexBuilder.Append(bytes[i].ToString("x2"));
            }
            return hexBuilder.ToString();
        }
 
        static void Main(string[] args)
        {
            byte[] moviehash = ComputeMovieHash(@"C:\test.avi");
            Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash));
        }
    }
}

If you get overflow error read this.

VB.Net

Imports System
Imports System.Text
Imports System.IO
Imports System.Runtime.InteropServices

Namespace MovieHasher
    Class Program
        Private Shared Function ComputeMovieHash(ByVal filename As String) As Byte()
            Dim result As Byte()
            Using input As Stream = File.OpenRead(filename)
                result = ComputeMovieHash(input)
            End Using
            Return result
        End Function

        Private Shared Function ComputeMovieHash(ByVal input As Stream) As Byte()
            Dim lhash As System.Decimal, streamsize As Long
            streamsize = input.Length
            lhash = streamsize

            Dim i As Long = 0
            Dim buffer As Byte() = New Byte(Marshal.SizeOf(GetType(Long)) - 1) {}
            While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0)
                i += 1
                lhash += BitConverter.ToInt64(buffer, 0)
            End While

            input.Position = Math.Max(0, streamsize - 65536)
            i = 0
            While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0)
                i += 1
                lhash += BitConverter.ToInt64(buffer, 0)
            End While
            input.Close()

            Dim lhashArray() As Integer = Decimal.GetBits(lhash)

            Dim result(7) As Byte

            Array.Copy(BitConverter.GetBytes(lhashArray(0)), 0, result, 0, 4)
            Array.Copy(BitConverter.GetBytes(lhashArray(1)), 0, result, 4, 4)

            Array.Reverse(result)
            Return result
        End Function

        Private Shared Function ToHexadecimal(ByVal bytes As Byte()) As String
            Dim hexBuilder As New StringBuilder()
            For i As Integer = 0 To bytes.Length - 1
                hexBuilder.Append(bytes(i).ToString("x2"))
            Next
            Return hexBuilder.ToString()
        End Function

        Public Sub New(ByVal Path As String)
            Dim moviehash As Byte() = ComputeMovieHash(Path)
            Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash))
        End Sub
    End Class
End Namespace

Python

import struct, os

def hashFile(name): 
      try: 
                 
                longlongformat = 'q'  # long long 
                bytesize = struct.calcsize(longlongformat) 
                    
                f = open(name, "rb") 
                    
                filesize = os.path.getsize(name) 
                hash = filesize 
                    
                if filesize < 65536 * 2: 
                       return "SizeError" 
                 
                for x in range(65536/bytesize): 
                        buffer = f.read(bytesize) 
                        (l_value,)= struct.unpack(longlongformat, buffer)  
                        hash += l_value 
                        hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number  
                         
    
                f.seek(max(0,filesize-65536),0) 
                for x in range(65536/bytesize): 
                        buffer = f.read(bytesize) 
                        (l_value,)= struct.unpack(longlongformat, buffer)  
                        hash += l_value 
                        hash = hash & 0xFFFFFFFFFFFFFFFF 
                 
                f.close() 
                returnedhash =  "%016x" % hash 
                return returnedhash 
    
      except(IOError): 
                return "IOError"

Delphi

This is just a quick conversion of Gabest's original C code. Anyone who can come up with a cleaner code, please feel free to do so and post here.

function CalcGabestHash(const fname: string): string;
var
  i : integer;
  s : array[1..8] of char;
  tmp       : Int64 absolute s;
  hash      : Int64;
  readed    : integer;

  aStream: TFileStream;
begin
  result := '';
  if not FileExists(fname) then Exit;

  aStream := TFileStream.Create(fName, fmShareDenyNone);
  hash := aStream.Size;

  i := 0; readed := 1;
  while ((i < 8192) and (readed > 0)) do begin
    readed := aStream.Read(s, sizeof(s));
    if readed > 0 then
    begin
      hash := hash + tmp;
    end;
    i := i + 1;
  end;

  aStream.Seek(-65536, soFromEnd); // 65536

  i := 0; readed:= 1;
  while ((i < 8192) and (readed > 0)) do begin
    readed := aStream.Read(s, sizeof(s));
    if readed > 0 then
      hash := hash + tmp;
    i := i + 1;
  end;
  aStream.Free;
  result := Format('%.16x',[hash]);
end;

RealBasic

Combined routine that will calculate a fast hash for videofiles over 65K and a normal md5 for subtitles

    dim b as BinaryStream
    dim mb as MemoryBlock
    
    dim hash,bytesize as UINT64
    dim i, x, chunksize, filelen, difference as integer
    
    hash = 0 //Reset Hash
    difference = 0
    
    if f <> nil and f.Exists then
      b= f.OpenAsBinaryFile
      hash = b.Length
      bytesize = b.Length
      bytesizestr = str(bytesize)
      
      if bytesize >= 65536 and routine = "video" then
        chunksize = 65536
        mb = b.Read(65536)
        mb.LittleEndian = True
        
        for i= 0 to chunksize -1 step 8
          hash = hash+ mb.UINT64Value(i)
        next
        
        b.Position = max(b.Length-chunksize, 0)
        mb= b.Read(chunksize)
        mb.LittleEndian = True
        
        for i= 0 to chunksize -1 step 8
          hash = hash+ mb.UINT64Value(i)
        next

        
        myhash = Lowercase(str(hex(hash)))
        
      elseif routine = "subtitle" then
        
        dim c,result as string
        mb = md5(b.Read(b.Length))
        mb.LittleEndian = True
        
        for i = 0 to mb.size-1
          x = mb.byte( i )
          c = right( "00"+hex( x ), 2 )
          result = result + c
        next
        result = lowercase( result )
        myhash = result
        
      end

PHP 4/5

function OpenSubtitlesHash($file)
{
    $handle = fopen($file, "rb");
    $fsize = filesize($file);
    
    $hash = array(3 => 0, 
                  2 => 0, 
                  1 => ($fsize >> 16) & 0xFFFF, 
                  0 => $fsize & 0xFFFF);
        
    for ($i = 0; $i < 8192; $i++)
    {
        $tmp = ReadUINT64($handle);
        $hash = AddUINT64($hash, $tmp);
    }
    
    $offset = $fsize - 65536;
    fseek($handle, $offset > 0 ? $offset : 0, SEEK_SET);
    
    for ($i = 0; $i < 8192; $i++)
    {
        $tmp = ReadUINT64($handle);
        $hash = AddUINT64($hash, $tmp);         
    }
    
    fclose($handle);
        return UINT64FormatHex($hash);
}

function ReadUINT64($handle)
{
    $u = unpack("va/vb/vc/vd", fread($handle, 8));
    return array(0 => $u["a"], 1 => $u["b"], 2 => $u["c"], 3 => $u["d"]);
}

function AddUINT64($a, $b)
{
    $o = array(0 => 0, 1 => 0, 2 => 0, 3 => 0);

    $carry = 0;
    for ($i = 0; $i < 4; $i++) 
    {
        if (($a[$i] + $b[$i] + $carry) > 0xffff ) 
        {
            $o[$i] += ($a[$i] + $b[$i] + $carry) & 0xffff;
            $carry = 1;
        }
        else 
        {
            $o[$i] += ($a[$i] + $b[$i] + $carry);
            $carry = 0;
        }
    }
    
    return $o;   
}

function UINT64FormatHex($n)
{   
    return sprintf("%04x%04x%04x%04x", $n[3], $n[2], $n[1], $n[0]);
}

Perl

#!/usr/bin/perl
use strict;
use warnings;

print OpenSubtitlesHash('breakdance.avi');

sub OpenSubtitlesHash {
        my $filename = shift or die("Need video filename");

        open my $handle, "<", $filename or die $!;
        binmode $handle;

        my $fsize = -s $filename;

        my $hash = [$fsize & 0xFFFF, ($fsize >> 16) & 0xFFFF, 0, 0];

        $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192);

    my $offset = $fsize - 65536;
    seek($handle, $offset > 0 ? $offset : 0, 0) or die $!;

    $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192);

    close $handle or die $!;
    return UINT64FormatHex($hash);
}

sub ReadUINT64 {
        read($_[0], my $u, 8);
        return [unpack("vvvv", $u)];
}

sub AddUINT64 {
    my $o = [0,0,0,0];
    my $carry = 0;
    for my $i (0..3) {
        if (($_[0]->[$i] + $_[1]->[$i] + $carry) > 0xffff ) {
                        $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry) & 0xffff;
                        $carry = 1;
                } else {
                        $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry);
                        $carry = 0;
                }
        }
    return $o;
}

sub UINT64FormatHex {
    return sprintf("%04x%04x%04x%04x", $_[0]->[3], $_[0]->[2], $_[0]->[1], $_[0]->[0]);
}

Ruby

This is a quick translation/transliteration of the Perl script.

class Hasher

  def open_subtitles_hash(filename)
    raise "Need video filename" unless filename

    fh = File.open(filename)
    fsize = File.size(filename)

    hash = [fsize & 0xffff, (fsize >> 16) & 0xffff, 0, 0]

    8192.times { hash = add_unit_64(hash, read_uint_64(fh)) }

    offset = fsize - 65536
    fh.seek([0,offset].max, 0)

    8192.times { hash = add_unit_64(hash, read_uint_64(fh)) }

    fh.close

    return uint_64_format_hex(hash)
  end

  def read_uint_64(stream)
    stream.read(8).unpack("vvvv")
  end

  def add_unit_64(hash, input)
    res = [0,0,0,0]
    carry = 0

    hash.zip(input).each_with_index do |(h,i),n|
      sum = h + i + carry
      if sum > 0xffff
        res[n] += sum & 0xffff
        carry = 1
      else
        res[n] += sum
        carry = 0
      end
    end
    return res
  end

  def uint_64_format_hex(hash)
    sprintf("%04x%04x%04x%04x", *hash.reverse)
  end
end

if __FILE__ == $0
  require 'test/unit'

  class HashTester < Test::Unit::TestCase
    def setup
      @h = Hasher.new
    end

    def test_test_file_hash
      assert_equal("8e245d9679d31e12", @h.open_subtitles_hash('breakdance.avi'))
    end
  end
end


Another more "rubyesque" implementation.

module MovieHasher

  CHUNK_SIZE = 64 * 1024 # in bytes

  def self.compute_hash(filename)
    filesize = File.size(filename)
    hash = filesize

    # Read 64 kbytes, divide up into 64 bits and add each
    # to hash. Do for beginning and end of file.
    File.open(filename, 'rb') do |f|    
      # Q = unsigned long long = 64 bit
      f.read(CHUNK_SIZE).unpack("Q*").each do |n|
        hash = hash + n & 0xffffffffffffffff # to remain as 64 bit number
      end

      f.seek([0, filesize - CHUNK_SIZE].max, IO::SEEK_SET)

      # And again for the end of the file
      f.read(CHUNK_SIZE).unpack("Q*").each do |n|
        hash = hash + n & 0xffffffffffffffff
      end
    end

    sprintf("%016x", hash)
  end
end

if __FILE__ == $0
  require 'test/unit'

  class MovieHasherTest < Test::Unit::TestCase
    def test_compute_hash
      assert_equal("8e245d9679d31e12", MovieHasher::compute_hash('breakdance.avi'))
    end

    def test_compute_hash_large_file
      assert_equal("61f7751fc2a72bfb", MovieHasher::compute_hash('dummy.bin'))
    end
  end
end

Haskell

import IO(bracket)
import System.Environment(getArgs)
import System.IO(openBinaryFile,hClose,hFileSize,hSeek,IOMode(ReadMode),SeekMode(AbsoluteSeek,SeekFromEnd))
import qualified Data.ByteString.Lazy as L(hGet,unpack)
import Data.Binary.Get(runGet,getWord64le)
import Data.Binary.Put(runPut,putWord64le)
import Data.Word(Word64)
import Control.Monad(foldM)
import Data.Bits.Utils(w82s)
import Data.Hex(hex)

shortsum :: FilePath -> IO Word64
shortsum filename = bracket (openBinaryFile filename ReadMode) hClose $ \h -> do
  fs <- hFileSize h
  hSeek h AbsoluteSeek 0 ; begin <- L.hGet h chunksize
  hSeek h SeekFromEnd (-(toInteger chunksize)) ; end <- L.hGet h chunksize
  return $ (flip runGet $ begin) $ chunksum $ (flip runGet $ end) (chunksum . fromInteger $ fs)
  where
    chunksize = 0x10000
    chunksum n = foldM (\a _ -> getWord64le >>= return . (+a)) n [1..(chunksize`div`8)]

main :: IO ()
main = do
  args <- getArgs
  let fn = head $ args
  p <- shortsum fn
  putStrLn $ "The hash of file " ++ fn ++ ": " ++ (hex $ w82s $ reverse (L.unpack $ runPut $ putWord64le p))

AutoIT

Forum entry

#cs
	Hash code is based on Media Player Classic. It calculates: size + 64bit
	checksum of the first and last 64k (even if they overlap because the file is smaller than 128k).
	Authors: Authenticity & Emanuel "Datenshi" Lindgren @ AutoIT Forums.
        AutoIT v3.3.2.0
#ce
Func _Compute_Hash($sFileName)
	Local $hFile, $tRet, $tTmp, $iFileSize, $iRead, $iChunk, $iI
	$hFile = FileOpen($sFileName, 16)
	If Not $hFile Then Return SetError(1, 0, 0)
	$iFileSize = FileGetSize($sFileName)
	$iChunk = 65536
	If $iFileSize < $iChunk * 2 Then
		FileClose($hFile)
		Return SetError(2, 0, 0)
	EndIf
	$tRet = DllStructCreate("uint64")
	$tTmp = DllStructCreate("uint64")
	DllStructSetData($tRet, 1, $iFileSize)
	For $iI = 0 To ($iChunk / 8) - 1
		DllStructSetData($tTmp, 1, FileRead($hFile, 8))
		DllStructSetData($tRet, 1, DllStructGetData($tRet, 1) + DllStructGetData($tTmp, 1))
	Next
	FileSetPos($hFile, $iFileSize - $iChunk, 0)
	For $iI = 0 To ($iChunk / 8) - 1
		DllStructSetData($tTmp, 1, FileRead($hFile, 8))
		DllStructSetData($tRet, 1, DllStructGetData($tRet, 1) + DllStructGetData($tTmp, 1))
	Next
	FileClose($hFile)
	Return SetError(0, 0, _HEX(DllStructGetData($tRet, 1)))
EndFunc
Func _HEX($iValue)
	Return StringFormat("%#.8x%.8x", $iValue / 4294967296, $iValue)
EndFunc

FoxPro

PARAMETERS cfile
PRIVATE ALL 

*******
* enviroment setup
*******
cret=''
glTalk=(SET("TALK")="ON")

IF vartype(cfile)<>'C'
	cfile='breakdance.avi'
ENDIF



IF glTalk
	? cfile
	? cfile=''
	? LEN(cfile)
endif


nfile=FOPEN(cfile)
nsize=FSEEK(nfile,0,2)

IF gltalk
	? cfile	
	? 'size?>'
	?? nsize
endif
FSEEK(nfile,0,0)

******
* length reencode to 64 uint
*****
chash=hashsize(nsize)
cempty=chr(0)
cret=''
IF LEN(chash)<8
	FOR i=1 TO 8-LEN(chash)
		cret=cret+cempty
	ENDFOR
ENDIF
cret=cret+chash
nSum=0

*******
* first 64kb
******


	FOR i=1 TO 8192
		cpom=FREAD(nfile,8)
		cpom=reverse(cpom)
		nSum=nSum+LEN(cpom)
		IF gltalk
				do buildhex WITH cret
				?? '+'
				DO buildhex WITH cpom
				? '='
		ENDIF
		cret=adint64(cret,cpom)
	ENDFOR

*******
* last 64kb
*******

	FSEEK(nfile,-65536,2)
	FOR i=1 TO 8192
		cpom=FREAD(nfile,8)
		cpom=reverse(cpom)
		cret=adint64(cret,cpom)
		nSum=nSum+LEN(cpom)
	ENDFOR
FCLOSE(nfile)

****
* build hexa
****
IF gltalk

	DO buildhex WITH cret
	?
	? 'Spocital som'
	?? nSum
ENDIF
RETURN buildhex(cret)

FUNCTION reverse
PARAMETERS cstring
PRIVATE ALL 
cret=''
FOR i=1 TO LEN(cstring)
	cret=cret+SUBSTR(cstring,LEN(cstring)-i+1,1)
ENDFOR
RETURN cret

FUNCTION buildhex
PARAMETERS cstring,lkam
PRIVATE ALL 
gcTalk=SET("TALK")
cret=''
FOR i=1 TO LEN(cstring)
	cpom=dec2basx(ASC(SUBSTR(cstring,i,1)),16)
	IF LEN(cpom)<2
		cout='0'+cpom
		cpom=cout
	ENDIF
	
	cret=cret+cpom
	IF gcTALK="ON"
		?? cpom
		?? ':'
	ENDIF
ENDFOR
RETURN cret

FUNCTION adint64
PARAMETERS cstring1,cstring2
PRIVATE ALL 
DIMENSION car (8,1) as Character

***
* 8 bytes both
***
nincrement=0
cret=''
FOR i=8 TO 1 STEP -1
	nfir=ASC(SUBSTR(cstring1,i,1))
	nsec=ASC(SUBSTR(cstring2,i,1))
	nout=nincrement+nfir+nsec
	IF nout>255
		nincrement=INT(nout/256)
		nout=nout-(nincrement*256)
	ELSE
		nincrement=0
	ENDIF
	car(i)=CHR(nout)
ENDFOR
FOR i=1 TO 8
	cret=cret+car(i)
ENDFOR
RETURN cret


FUNCTION hashsize

PARAMETERS ncislo
PRIVATE ALL 
cret=''
creverse=''
DO WHILE .t.
	npom=INT(ncislo/256)
	npom2=ncislo-npom*256
	creverse=creverse+CHR(npom2)
	ncislo=npom
	IF ncislo=0
		EXIT
	ENDIF
ENDDO
FOR i=1 TO LEN(creverse)
	cret=cret+SUBSTR(creverse,LEN(creverse)-i+1,1)
ENDFOR 
RETURN cret


*..............................................................................
*   Function: DEC2BASX
*    Purpose:  Convert whole number 0-?, to base 2-16 
*
* Parameters: nTempNum - number to convert (0-9007199254740992)
*             base    - base to convert to i.e., 2 4 8 16...
*    returns: string
*      Usage:  cresult=Dec2BasX(nParm1, nParm2)
*              STORE Dec2BasX(255, 16) TO cMyString  &&... cMyString contains 'ff'
*..............................................................................
FUNCTION dec2basx
PARAMETERS nTempNum, nNewBase

STORE 0 TO nWorkVal,;
   remainder,;
   dividend,;
   nextnum,;
   digit

nWorkVal = nTempNum  
ret_str = ''

DO WHILE .T.
   digit = MOD(nWorkVal, nNewBase)
   dividend = nWorkVal / nNewBase
   nWorkVal = INT(dividend)

   DO CASE
      CASE digit = 10
         ret_str = 'a' + ret_str
      CASE digit = 11
         ret_str = 'b' + ret_str
      CASE digit = 12
         ret_str = 'c' + ret_str
      CASE digit = 13
         ret_str = 'd' + ret_str
      CASE digit = 14
         ret_str = 'e' + ret_str
      CASE digit = 15
         ret_str = 'f' + ret_str
      OTHERWISE
         ret_str = LTRIM(STR(digit)) + ret_str
   ENDCASE

   IF nWorkVal = 0
      EXIT
   ENDIF ( nWorkVal = 0 )
ENDDO ( .T. )
RETURN ret_str

Powershell 2.0

You can use GetHash?.dll.

http://trac.opensubtitles.org/projects/opensubtitles/attachment/wiki/HashSourceCodes/GetHash.dll

Use Example:

	Add-Type -Path "GetHash.dll"

	function MovieHash([string]$path) {
		$hash = [GetHash.Main]	
		$hash::ToHexadecimal($hash::ComputeHash($path))
	}

	MovieHash $filename 

or without using GetHash?.dll:

$dataLength = 65536

function LongSum([UInt64]$a, [UInt64]$b) { 
	[UInt64](([Decimal]$a + $b) % ([Decimal]([UInt64]::MaxValue) + 1)) 
}

function StreamHash([IO.Stream]$stream) {
	$hashLength = 8
	[UInt64]$lhash = 0
	[byte[]]$buffer = New-Object byte[] $hashLength
	$i = 0
	while ( ($i -lt ($dataLength / $hashLength)) -and ($stream.Read($buffer,0,$hashLength) -gt 0) ) {
		$i++
		$lhash = LongSum $lhash ([BitConverter]::ToUInt64($buffer,0))
	}
	$lhash
}

function MovieHash([string]$path) {
	try { 
		$stream = [IO.File]::OpenRead($path) 
		[UInt64]$lhash = $stream.Length
		$lhash = LongSum $lhash (StreamHash $stream)
		$stream.Position = [Math]::Max(0L, $stream.Length - $dataLength)
		$lhash = LongSum $lhash (StreamHash $stream)
		"{0:X}" -f $lhash
	}
	finally { $stream.Close() }
}

MovieHash $filename 

MASM

Calc_Hash proc uses esi ebx edx pFile:dword, pBuf:dword

	LOCAL hFile:dword, fSize:dword, NBR:dword, pMem:dword
	
	invoke CreateFile,pFile,GENERIC_ALL,0,0,OPEN_EXISTING,0,0
	mov hFile,eax	
	cmp eax,INVALID_HANDLE_VALUE
	jz @Error
	
	invoke SetFilePointer,hFile,0,NULL,FILE_END
	mov fSize,eax
	push eax
	
	invoke GlobalAlloc,GPTR,131072
	mov pMem,eax
	
	invoke SetFilePointer,hFile,0,NULL,FILE_BEGIN
	invoke ReadFile,hFile,pMem,65536,addr NBR,NULL
	
	sub fSize,65536
	add pMem,65536
	
	invoke SetFilePointer,hFile,fSize,NULL,FILE_BEGIN
	invoke ReadFile,hFile,pMem,65536,addr NBR,NULL
	
	sub pMem,65536
	mov esi,pMem
	mov ecx,131072
	pop eax
	mov edx,eax
	push eax

	@@:
	add edx,[esi]
	adc ebx,[esi+4]
	add esi,8
	sub ecx,8
	jnz @B
	
	push edx
	push ebx
	invoke wsprintf,pBuf,addr HashFormat
	pop eax
	pop eax
	
	invoke CloseHandle,hFile
	invoke GlobalFree,pMem
	pop ecx
	
	@Error: ; If error eax returns (INVALID_HANDLE_VALUE)
	
	; Hash value is copied to pBuf
	; eax returns Movie Filesize
	
	ret
	
Calc_Hash endp

Objective-C

This is implementation of hash for Objective-C for Mac by subsmarine.com

OSHashAlgorithm.m

#import "OSHashAlgorithm.h"


@implementation OSHashAlgorithm

+(NSString*)stringForHash:(uint64_t)hash
{
	return [[NSString stringWithFormat:@"%qx", hash ] autorelease];
}
+(VideoHash)hashForPath:(NSString*)path
{
	VideoHash hash;
	hash.fileHash =0;
	hash.fileSize =0;
	
	NSFileHandle *readFile = [NSFileHandle fileHandleForReadingAtPath:path];
	hash = [OSHashAlgorithm hashForFile:readFile];
	[readFile closeFile];
	return hash;	
}
+(VideoHash)hashForURL:(NSURL*)url
{
	VideoHash hash;
	hash.fileHash =0;
	hash.fileSize =0;
	
	NSFileHandle *readfile = [NSFileHandle fileHandleForReadingFromURL:url error:NULL];
	hash = [OSHashAlgorithm hashForFile:readfile];
	return hash;
}

+(VideoHash)hashForFile:(NSFileHandle*)handle
{
	VideoHash retHash;
	retHash.fileHash =0;
	retHash.fileSize =0;
	
	if( handle == nil )
		return retHash;
	
	const NSUInteger CHUNK_SIZE=65536;
	NSData *fileDataBegin, *fileDataEnd;
	uint64_t hash=0;
	
	
	fileDataBegin = [handle readDataOfLength:(NSUInteger)CHUNK_SIZE];
	[handle seekToEndOfFile];
	unsigned long long fileSize = [handle offsetInFile];
	if(fileSize < CHUNK_SIZE )
		return retHash;
	
	[handle seekToFileOffset:MAX(0,fileSize-CHUNK_SIZE) ];
	fileDataEnd = [handle readDataOfLength:(NSUInteger)CHUNK_SIZE];
	
	//
	// Calculate hash
	//
	
	// 1st. File size
	hash += fileSize;
	// 2nd. Begining data block
	uint64_t * data_bytes= (uint64_t*)[fileDataBegin bytes]; 
	for( int i=0; i< CHUNK_SIZE/sizeof(uint64_t); i++ )
		hash+=data_bytes[i];;
	// 3rd. Ending data block
	data_bytes= (uint64_t*)[fileDataEnd bytes]; 
	for( int i=0; i< CHUNK_SIZE/sizeof(uint64_t); i++ )
		hash+= data_bytes[i];
	
	retHash.fileHash = hash;
	retHash.fileSize = fileSize;
	
	return retHash;
	
}

@end

OSHashAlgorithm.h

#import <Cocoa/Cocoa.h>

typedef struct 
{
	uint64_t fileHash;
	uint64_t fileSize;
} VideoHash;

@interface OSHashAlgorithm : NSObject {

}
+(VideoHash)hashForPath:(NSString*)path;
+(VideoHash)hashForURL:(NSURL*)url;
+(VideoHash)hashForFile:(NSFileHandle*)handle;
+(NSString*)stringForHash:(uint64_t)hash;

@end

Vala

public uint64 hash(File file) {
	try {
		uint64 h;
		
		//get filesize and add it to hash
		var file_info = file.query_info("*", FileQueryInfoFlags.NONE);
		h = file_info.get_size();
		
		//add first 64kB of file to hash
		var dis = new DataInputStream(file.read());
		dis.set_byte_order(DataStreamByteOrder.LITTLE_ENDIAN);
		for(int i=0; i<65536/sizeof(uint64); i++) {
			h += dis.read_uint64();
		}
		//add last 64kB of file to hash
		dis = new DataInputStream(file.read());
		dis.set_byte_order(DataStreamByteOrder.LITTLE_ENDIAN);
		dis.skip((size_t)(file_info.get_size() - 65536));
		for(int i=0; i<65536/sizeof(uint64); i++) {
			h += dis.read_uint64();
		}
		
		return h;
	} catch (Error e) {
        error("%s", e.message);
    }
}

int main () {
    var file = File.new_for_path ("breakdance.avi");
    if (!file.query_exists ()) {
        stderr.printf ("File '%s' doesn't exist.\n", file.get_path ());
        return 1;
    }
    stdout.printf("%016llx\n", hash(file));
    
    file = File.new_for_path ("dummy.bin");
    if (!file.query_exists ()) {
        stderr.printf ("File '%s' doesn't exist.\n", file.get_path ());
        return 1;
    }
    stdout.printf("%016llx\n", hash(file));

    return 0;
}

Build with: valac --pkg gio-2.0 hash.vala

Attachments (1)

Download all attachments as: .zip