[[PageOutline(2, Programming Languages)]] !OpenSubtitles.org is using special hash function to match subtitle files against movie files. Hash is not dependent on file name of movie file. Read about basics of [http://en.wikipedia.org/wiki/Hash_function hashing functions]. Hash code is based on [http://sourceforge.net/projects/guliverkli/ Media Player Classic]. In natural language it calculates: size + 64bit chksum of the first and last 64k (even if they overlap because the file is smaller than 128k). On opensubtitles.org is movie file size limited to '''9000000000 > $moviebytesize > 131072 bytes''', if is there any reason to change these sizes, let us know. Licence of [http://guliverkli.svn.sourceforge.net/viewvc/guliverkli/ hashing source codes] is [http://sourceforge.net/projects/guliverkli/ GPL]. Source codes was tested on Little Endian - DEC, Intel and compatible Important: there might be cases, when your calculated hash is not 16 characters, so make sure you add zero-leading padding - some of source codes doesn't implement this ('101eae5380a4769' => '0101eae5380a4769'). Feel free to edit/add source-codes if you have faster/better implementation. Also don't forget to check, if hash is right for test. '''Test these 2 files please to ensure your algo is completely OK''' (otherwise you can poison the database and that nobody wants): * [http://www.opensubtitles.org/addons/avi/breakdance.avi AVI file] (12 909 756 bytes) * hash: '''8e245d9679d31e12''' * [http://www.opensubtitles.org/addons/avi/dummy.rar DUMMY RAR file] (2 565 922 bytes, 4 295 033 890 after RAR unpacking, test on UNPACKED file) * hash: '''61f7751fc2a72bfb''' (for UNPACKED file) == C == {{{ #!c #include #include #define MAX(x,y) (((x) > (y)) ? (x) : (y)) #ifndef uint64_t #define uint64_t unsigned long long #endif uint64_t compute_hash(FILE * handle) { uint64_t hash, fsize; fseek(handle, 0, SEEK_END); fsize = ftell(handle); fseek(handle, 0, SEEK_SET); hash = fsize; for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++); fseek(handle, (long)MAX(0, fsize - 65536), SEEK_SET); for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++); return hash; } int main(int argc, char *argv) { FILE * handle; uint64_t myhash; handle = fopen("breakdance.avi", "rb"); if (!handle) { printf("Error openning file!"); return 1; } myhash = compute_hash(handle); printf("%I64x", myhash); fclose(handle); return 0; } }}} == C - Public Domain License == {{{ #!c #include #include unsigned long long analizefileOSHahs(char *fileName){ /* * Public Domain implementation by Kamil Dziobek. turbos11(at)gmail.com * This code implements Gibest hash algorithm first use in Media Player Classics * For more implementation(various languages and authors) see: * http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes * * -works only on little-endian procesor DEC, Intel and compatible * -sizeof(unsigned long long) must be 8 */ FILE *file; int i; unsigned long long t1=0; unsigned long long buffer1[8192*2]; file = fopen(fileName, "rb"); fread(buffer1, 8192, 8, file); fseek(file, -65536, SEEK_END); fread(&buffer1[8192], 8192, 8, file); for (i=0;i<8192*2;i++) t1+=buffer1[i]; t1+= ftell(file); //add filesize fclose(file); return t1; }; int main(int argc, char *argv){ unsigned long long myhash=analizefileOSHahs("C://tomaszkokowskizoofiliamovies.avi"); printf("hash is %16I64x",myhash); } }}} == C++ == {{{ #!cpp #include #include typedef unsigned __int64 uint64_t; using namespace std; int MAX(int x, int y) { if((x) > (y)) return x; else return y; } uint64_t compute_hash(ifstream& f) { uint64_t hash, fsize; f.seekg(0, ios::end); fsize = f.tellg(); f.seekg(0, ios::beg); hash = fsize; for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp); f.seekg(MAX(0, (uint64_t)fsize - 65536), ios::beg); for(tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp); return hash; } int main(int argc, char *argv) { ifstream f; uint64_t myhash; f.open("c:\\test.avi", ios::in|ios::binary|ios::ate); if (!f.is_open()) { cerr << "Error opening file" << endl; return 1; } myhash = compute_hash(f); cout << setw(16) << setfill('0') << hex << myhash; f.close(); return 0; } }}} == About C and C++ implementation == This only work on little-endian processor: DEC, Intel and compatible == Java == {{{ #!java /** * Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit * checksum of the first and last 64k (even if they overlap because the file is smaller than * 128k). */ public class OpenSubtitlesHasher { /** * Size of the chunks that will be hashed in bytes (64 KB) */ private static final int HASH_CHUNK_SIZE = 64 * 1024; public static String computeHash(File file) throws IOException { long size = file.length(); long chunkSizeForFile = Math.min(HASH_CHUNK_SIZE, size); FileChannel fileChannel = new FileInputStream(file).getChannel(); try { long head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile)); long tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, Math.max(size - HASH_CHUNK_SIZE, 0), chunkSizeForFile)); return String.format("%016x", size + head + tail); } finally { fileChannel.close(); } } public static String computeHash(InputStream stream, long length) throws IOException { int chunkSizeForFile = (int) Math.min(HASH_CHUNK_SIZE, length); // buffer that will contain the head and the tail chunk, chunks will overlap if length is smaller than two chunks byte[] chunkBytes = new byte[(int) Math.min(2 * HASH_CHUNK_SIZE, length)]; DataInputStream in = new DataInputStream(stream); // first chunk in.readFully(chunkBytes, 0, chunkSizeForFile); long position = chunkSizeForFile; long tailChunkPosition = length - chunkSizeForFile; // seek to position of the tail chunk, or not at all if length is smaller than two chunks while (position < tailChunkPosition && (position += in.skip(tailChunkPosition - position)) >= 0); // second chunk, or the rest of the data if length is smaller than two chunks in.readFully(chunkBytes, chunkSizeForFile, chunkBytes.length - chunkSizeForFile); long head = computeHashForChunk(ByteBuffer.wrap(chunkBytes, 0, chunkSizeForFile)); long tail = computeHashForChunk(ByteBuffer.wrap(chunkBytes, chunkBytes.length - chunkSizeForFile, chunkSizeForFile)); return String.format("%016x", length + head + tail); } private static long computeHashForChunk(ByteBuffer buffer) { LongBuffer longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer(); long hash = 0; while (longBuffer.hasRemaining()) { hash += longBuffer.get(); } return hash; } } }}} == C# == You can use GetHash.dll.[[BR]] [http://trac.opensubtitles.org/projects/opensubtitles/attachment/wiki/HashSourceCodes/GetHash.dll][[BR]] Use Example:[[BR]] {{{ private void openFileDialog1_FileOk(object sender, CancelEventArgs e) { byte[] hash = GetHash.Main.ComputeHash(openFileDialog1.FileName); label1.Text = GetHash.Main.ToHexadecimal(hash); } }}} or without using GetHash.dll: {{{ #!c# using System; using System.Text; using System.IO; namespace MovieHasher { class Program { private static byte[] ComputeMovieHash(string filename) { byte[] result; using (Stream input = File.OpenRead(filename)) { result = ComputeMovieHash(input); } return result; } private static byte[] ComputeMovieHash(Stream input) { long lhash, streamsize; streamsize = input.Length; lhash = streamsize; long i = 0; byte[] buffer = new byte[sizeof(long)]; while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0)) { i++; lhash += BitConverter.ToInt64(buffer, 0); } input.Position = Math.Max(0, streamsize - 65536); i = 0; while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0)) { i++; lhash += BitConverter.ToInt64(buffer, 0); } input.Close(); byte[] result = BitConverter.GetBytes(lhash); Array.Reverse(result); return result; } private static string ToHexadecimal(byte[] bytes) { StringBuilder hexBuilder = new StringBuilder(); for(int i = 0; i < bytes.Length; i++) { hexBuilder.Append(bytes[i].ToString("x2")); } return hexBuilder.ToString(); } static void Main(string[] args) { byte[] moviehash = ComputeMovieHash(@"C:\test.avi"); Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash)); } } } }}} If you get overflow error read [http://forum.opensubtitles.org/viewtopic.php?t=1562 this]. == VB.Net == {{{ #!vba Imports System Imports System.Text Imports System.IO 'Note: you must remove integer overflow checking. Namespace MovieHasher Class Program Private Shared Function ComputeMovieHash(ByVal filename As String) As Byte() Dim result As Byte() Using input As Stream = File.OpenRead(filename) result = ComputeMovieHash(input) End Using Return result End Function Private Function ComputeMovieHash(ByVal input As Stream) As Byte() Dim lhash As System.Int64, streamsize As Long streamsize = input.Length lhash = streamsize Dim i As Long = 0 Dim buffer As Byte() = New Byte(Marshal.SizeOf(GetType(Long)) - 1) {} While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0) i += 1 lhash += BitConverter.ToInt64(buffer, 0) End While input.Position = Math.Max(0, streamsize - 65536) i = 0 While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0) i += 1 lhash += BitConverter.ToInt64(buffer, 0) End While input.Close() Dim result As Byte() = BitConverter.GetBytes(lhash) Array.Reverse(result) Return result End Function Private Shared Function ToHexadecimal(ByVal bytes As Byte()) As String Dim hexBuilder As New StringBuilder() For i As Integer = 0 To bytes.Length - 1 hexBuilder.Append(bytes(i).ToString("x2")) Next Return hexBuilder.ToString() End Function Private Shared Sub Main(ByVal args As String()) Dim moviehash As Byte() = ComputeMovieHash("C:\test.avi") Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash)) End Sub End Class End Namespace }}} == Python == {{{ #!python import struct, os def hashFile(name): try: longlongformat = ' 0)) do begin readed := aStream.Read(s, sizeof(s)); if readed > 0 then begin hash := hash + tmp; end; i := i + 1; end; aStream.Seek(-65536, soFromEnd); // 65536 i := 0; readed:= 1; while ((i < 8192) and (readed > 0)) do begin readed := aStream.Read(s, sizeof(s)); if readed > 0 then hash := hash + tmp; i := i + 1; end; aStream.Free; result := Format('%.16x',[hash]); end; }}} alternate version by TRP {{{ unction CalcGabestHash(const Stream: TStream): Int64; overload; const HashPartSize = 1 shl 16; // 64 KiB procedure UpdateHashFromStream(const Stream: TStream; var Hash: Int64); inline; var buffer: Array[0..HashPartSize div SizeOf(Int64) - 1] of Int64; i : integer; begin Stream.ReadBuffer(buffer[0], SizeOf(buffer)); for i := Low(buffer) to High(buffer) do Inc(Hash, buffer[i]); end; begin result:= Stream.Size; if result < HashPartSize then begin // stream too small return invalid hash result:= 0; exit; end; // first 64 KiB Stream.Position:= 0; UpdateHashFromStream(Stream, result); // last 64 KiB Stream.Seek(-HashPartSize, soEnd); UpdateHashFromStream(Stream, result); // use "IntToHex(result, 16);" to get a string and "StrToInt64('$' + hash);" to get your Int64 back end; function CalcGabestHash(const FileName: TFileName): Int64; overload; var stream: TStream; begin stream:= TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite); try result:= CalcGabestHash(stream); finally stream.Free; end; end; }}} == Lua == {{{ #!lua -- will produce a correct hash regardless of architecture (big vs little endian) local function movieHash(fileName) local fil = io.open(fileName, "rb") local lo,hi=0,0 for i=1,8192 do local a,b,c,d = fil:read(4):byte(1,4) lo = lo + a + b*256 + c*65536 + d*16777216 a,b,c,d = fil:read(4):byte(1,4) hi = hi + a + b*256 + c*65536 + d*16777216 while lo>=4294967296 do lo = lo-4294967296 hi = hi+1 end while hi>=4294967296 do hi = hi-4294967296 end end local size = fil:seek("end", -65536) + 65536 for i=1,8192 do local a,b,c,d = fil:read(4):byte(1,4) lo = lo + a + b*256 + c*65536 + d*16777216 a,b,c,d = fil:read(4):byte(1,4) hi = hi + a + b*256 + c*65536 + d*16777216 while lo>=4294967296 do lo = lo-4294967296 hi = hi+1 end while hi>=4294967296 do hi = hi-4294967296 end end lo = lo + size while lo>=4294967296 do lo = lo-4294967296 hi = hi+1 end while hi>=4294967296 do hi = hi-4294967296 end fil:close() return string.format("%08x%08x", hi,lo), size end print("breakdance.avi:") print(movieHash("breakdance.avi")) print("8e245d9679d31e12 <- should be") print("") print("dummy.rar:") print(movieHash("dummy.rar")) print("61f7751fc2a72bfb <- should be according to wiki") print("2a527d74d45f5b1b <- what other hash tools actually report") }}} == !RealBasic/Xojo == Combined routine that will calculate a fast hash for videofiles over 65K and a normal md5 for subtitles {{{ dim b as BinaryStream dim mb as MemoryBlock dim hash,bytesize as UINT64 dim i, x, chunksize, filelen, difference as integer hash = 0 //Reset Hash difference = 0 if f <> nil and f.Exists then b= f.OpenAsBinaryFile hash = b.Length bytesize = b.Length bytesizestr = str(bytesize) if bytesize >= 65536 and routine = "video" then chunksize = 65536 mb = b.Read(65536) mb.LittleEndian = True for i= 0 to chunksize -1 step 8 hash = hash+ mb.UINT64Value(i) next b.Position = max(b.Length-chunksize, 0) mb= b.Read(chunksize) mb.LittleEndian = True for i= 0 to chunksize -1 step 8 hash = hash+ mb.UINT64Value(i) next myhash = Lowercase(str(hex(hash))) elseif routine = "subtitle" then dim c,result as string mb = md5(b.Read(b.Length)) mb.LittleEndian = True for i = 0 to mb.size-1 x = mb.byte( i ) c = right( "00"+hex( x ), 2 ) result = result + c next result = lowercase( result ) myhash = result end }}} == PHP 4/5 == {{{ #!php function OpenSubtitlesHash($file) { $handle = fopen($file, "rb"); $fsize = filesize($file); $hash = array(3 => 0, 2 => 0, 1 => ($fsize >> 16) & 0xFFFF, 0 => $fsize & 0xFFFF); for ($i = 0; $i < 8192; $i++) { $tmp = ReadUINT64($handle); $hash = AddUINT64($hash, $tmp); } $offset = $fsize - 65536; fseek($handle, $offset > 0 ? $offset : 0, SEEK_SET); for ($i = 0; $i < 8192; $i++) { $tmp = ReadUINT64($handle); $hash = AddUINT64($hash, $tmp); } fclose($handle); return UINT64FormatHex($hash); } function ReadUINT64($handle) { $u = unpack("va/vb/vc/vd", fread($handle, 8)); return array(0 => $u["a"], 1 => $u["b"], 2 => $u["c"], 3 => $u["d"]); } function AddUINT64($a, $b) { $o = array(0 => 0, 1 => 0, 2 => 0, 3 => 0); $carry = 0; for ($i = 0; $i < 4; $i++) { if (($a[$i] + $b[$i] + $carry) > 0xffff ) { $o[$i] += ($a[$i] + $b[$i] + $carry) & 0xffff; $carry = 1; } else { $o[$i] += ($a[$i] + $b[$i] + $carry); $carry = 0; } } return $o; } function UINT64FormatHex($n) { return sprintf("%04x%04x%04x%04x", $n[3], $n[2], $n[1], $n[0]); } }}} == Perl == {{{ #!perl #!/usr/bin/perl use strict; use warnings; print OpenSubtitlesHash('breakdance.avi'); sub OpenSubtitlesHash { my $filename = shift or die("Need video filename"); open my $handle, "<", $filename or die $!; binmode $handle; my $fsize = -s $filename; my $hash = [$fsize & 0xFFFF, ($fsize >> 16) & 0xFFFF, 0, 0]; $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192); my $offset = $fsize - 65536; seek($handle, $offset > 0 ? $offset : 0, 0) or die $!; $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192); close $handle or die $!; return UINT64FormatHex($hash); } sub ReadUINT64 { read($_[0], my $u, 8); return [unpack("vvvv", $u)]; } sub AddUINT64 { my $o = [0,0,0,0]; my $carry = 0; for my $i (0..3) { if (($_[0]->[$i] + $_[1]->[$i] + $carry) > 0xffff ) { $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry) & 0xffff; $carry = 1; } else { $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry); $carry = 0; } } return $o; } sub UINT64FormatHex { return sprintf("%04x%04x%04x%04x", $_[0]->[3], $_[0]->[2], $_[0]->[1], $_[0]->[0]); } }}} == Ruby == This is a quick translation/transliteration of the Perl script. {{{ #!ruby class Hasher def open_subtitles_hash(filename) raise "Need video filename" unless filename fh = File.open(filename) fsize = File.size(filename) hash = [fsize & 0xffff, (fsize >> 16) & 0xffff, 0, 0] 8192.times { hash = add_unit_64(hash, read_uint_64(fh)) } offset = fsize - 65536 fh.seek([0,offset].max, 0) 8192.times { hash = add_unit_64(hash, read_uint_64(fh)) } fh.close return uint_64_format_hex(hash) end def read_uint_64(stream) stream.read(8).unpack("vvvv") end def add_unit_64(hash, input) res = [0,0,0,0] carry = 0 hash.zip(input).each_with_index do |(h,i),n| sum = h + i + carry if sum > 0xffff res[n] += sum & 0xffff carry = 1 else res[n] += sum carry = 0 end end return res end def uint_64_format_hex(hash) sprintf("%04x%04x%04x%04x", *hash.reverse) end end if __FILE__ == $0 require 'test/unit' class HashTester < Test::Unit::TestCase def setup @h = Hasher.new end def test_test_file_hash assert_equal("8e245d9679d31e12", @h.open_subtitles_hash('breakdance.avi')) end end end }}} Another more "rubyesque" implementation. {{{ #!ruby module MovieHasher CHUNK_SIZE = 64 * 1024 # in bytes def self.compute_hash(filename) filesize = File.size(filename) hash = filesize # Read 64 kbytes, divide up into 64 bits and add each # to hash. Do for beginning and end of file. File.open(filename, 'rb') do |f| # Q = unsigned long long = 64 bit f.read(CHUNK_SIZE).unpack("Q*").each do |n| hash = hash + n & 0xffffffffffffffff # to remain as 64 bit number end f.seek([0, filesize - CHUNK_SIZE].max, IO::SEEK_SET) # And again for the end of the file f.read(CHUNK_SIZE).unpack("Q*").each do |n| hash = hash + n & 0xffffffffffffffff end end sprintf("%016x", hash) end end if __FILE__ == $0 require 'test/unit' class MovieHasherTest < Test::Unit::TestCase def test_compute_hash assert_equal("8e245d9679d31e12", MovieHasher::compute_hash('breakdance.avi')) end def test_compute_hash_large_file assert_equal("61f7751fc2a72bfb", MovieHasher::compute_hash('dummy.bin')) end end end }}} == Haskell == {{{ #!haskell import IO(bracket) import System.Environment(getArgs) import System.IO(openBinaryFile,hClose,hFileSize,hSeek,IOMode(ReadMode),SeekMode(AbsoluteSeek,SeekFromEnd)) import qualified Data.ByteString.Lazy as L(hGet,unpack) import Data.Binary.Get(runGet,getWord64le) import Data.Binary.Put(runPut,putWord64le) import Data.Word(Word64) import Control.Monad(foldM) import Data.Bits.Utils(w82s) import Data.Hex(hex) shortsum :: FilePath -> IO Word64 shortsum filename = bracket (openBinaryFile filename ReadMode) hClose $ \h -> do fs <- hFileSize h hSeek h AbsoluteSeek 0 ; begin <- L.hGet h chunksize hSeek h SeekFromEnd (-(toInteger chunksize)) ; end <- L.hGet h chunksize return $ (flip runGet $ begin) $ chunksum $ (flip runGet $ end) (chunksum . fromInteger $ fs) where chunksize = 0x10000 chunksum n = foldM (\a _ -> getWord64le >>= return . (+a)) n [1..(chunksize`div`8)] main :: IO () main = do args <- getArgs let fn = head $ args p <- shortsum fn putStrLn $ "The hash of file " ++ fn ++ ": " ++ (hex $ w82s $ reverse (L.unpack $ runPut $ putWord64le p)) }}} == AutoIT == [http://www.autoitscript.com/forum/topic/107155-opensubtitles-org-hashing-func/page__p__755889__hl__opensubtitles__fromsearch__1#entry755889 Forum entry] {{{ #cs Hash code is based on Media Player Classic. It calculates: size + 64bit checksum of the first and last 64k (even if they overlap because the file is smaller than 128k). Authors: Authenticity & Emanuel "Datenshi" Lindgren @ AutoIT Forums. AutoIT v3.3.2.0 #ce Func _Compute_Hash($sFileName) Local $hFile, $tRet, $tTmp, $iFileSize, $iRead, $iChunk, $iI $hFile = FileOpen($sFileName, 16) If Not $hFile Then Return SetError(1, 0, 0) $iFileSize = FileGetSize($sFileName) $iChunk = 65536 If $iFileSize < $iChunk * 2 Then FileClose($hFile) Return SetError(2, 0, 0) EndIf $tRet = DllStructCreate("uint64") $tTmp = DllStructCreate("uint64") DllStructSetData($tRet, 1, $iFileSize) For $iI = 0 To ($iChunk / 8) - 1 DllStructSetData($tTmp, 1, FileRead($hFile, 8)) DllStructSetData($tRet, 1, DllStructGetData($tRet, 1) + DllStructGetData($tTmp, 1)) Next FileSetPos($hFile, $iFileSize - $iChunk, 0) For $iI = 0 To ($iChunk / 8) - 1 DllStructSetData($tTmp, 1, FileRead($hFile, 8)) DllStructSetData($tRet, 1, DllStructGetData($tRet, 1) + DllStructGetData($tTmp, 1)) Next FileClose($hFile) Return SetError(0, 0, _HEX(DllStructGetData($tRet, 1))) EndFunc Func _HEX($iValue) Return StringFormat("%#.8x%.8x", $iValue / 4294967296, $iValue) EndFunc }}} == !FoxPro == {{{ PARAMETERS cfile PRIVATE ALL ******* * enviroment setup ******* cret='' glTalk=(SET("TALK")="ON") IF vartype(cfile)<>'C' cfile='breakdance.avi' ENDIF IF glTalk ? cfile ? cfile='' ? LEN(cfile) endif nfile=FOPEN(cfile) nsize=FSEEK(nfile,0,2) IF gltalk ? cfile ? 'size?>' ?? nsize endif FSEEK(nfile,0,0) ****** * length reencode to 64 uint ***** chash=hashsize(nsize) cempty=chr(0) cret='' IF LEN(chash)<8 FOR i=1 TO 8-LEN(chash) cret=cret+cempty ENDFOR ENDIF cret=cret+chash nSum=0 ******* * first 64kb ****** FOR i=1 TO 8192 cpom=FREAD(nfile,8) cpom=reverse(cpom) nSum=nSum+LEN(cpom) IF gltalk do buildhex WITH cret ?? '+' DO buildhex WITH cpom ? '=' ENDIF cret=adint64(cret,cpom) ENDFOR ******* * last 64kb ******* FSEEK(nfile,-65536,2) FOR i=1 TO 8192 cpom=FREAD(nfile,8) cpom=reverse(cpom) cret=adint64(cret,cpom) nSum=nSum+LEN(cpom) ENDFOR FCLOSE(nfile) **** * build hexa **** IF gltalk DO buildhex WITH cret ? ? 'Spocital som' ?? nSum ENDIF RETURN buildhex(cret) FUNCTION reverse PARAMETERS cstring PRIVATE ALL cret='' FOR i=1 TO LEN(cstring) cret=cret+SUBSTR(cstring,LEN(cstring)-i+1,1) ENDFOR RETURN cret FUNCTION buildhex PARAMETERS cstring,lkam PRIVATE ALL gcTalk=SET("TALK") cret='' FOR i=1 TO LEN(cstring) cpom=dec2basx(ASC(SUBSTR(cstring,i,1)),16) IF LEN(cpom)<2 cout='0'+cpom cpom=cout ENDIF cret=cret+cpom IF gcTALK="ON" ?? cpom ?? ':' ENDIF ENDFOR RETURN cret FUNCTION adint64 PARAMETERS cstring1,cstring2 PRIVATE ALL DIMENSION car (8,1) as Character *** * 8 bytes both *** nincrement=0 cret='' FOR i=8 TO 1 STEP -1 nfir=ASC(SUBSTR(cstring1,i,1)) nsec=ASC(SUBSTR(cstring2,i,1)) nout=nincrement+nfir+nsec IF nout>255 nincrement=INT(nout/256) nout=nout-(nincrement*256) ELSE nincrement=0 ENDIF car(i)=CHR(nout) ENDFOR FOR i=1 TO 8 cret=cret+car(i) ENDFOR RETURN cret FUNCTION hashsize PARAMETERS ncislo PRIVATE ALL cret='' creverse='' DO WHILE .t. npom=INT(ncislo/256) npom2=ncislo-npom*256 creverse=creverse+CHR(npom2) ncislo=npom IF ncislo=0 EXIT ENDIF ENDDO FOR i=1 TO LEN(creverse) cret=cret+SUBSTR(creverse,LEN(creverse)-i+1,1) ENDFOR RETURN cret *.............................................................................. * Function: DEC2BASX * Purpose: Convert whole number 0-?, to base 2-16 * * Parameters: nTempNum - number to convert (0-9007199254740992) * base - base to convert to i.e., 2 4 8 16... * returns: string * Usage: cresult=Dec2BasX(nParm1, nParm2) * STORE Dec2BasX(255, 16) TO cMyString &&... cMyString contains 'ff' *.............................................................................. FUNCTION dec2basx PARAMETERS nTempNum, nNewBase STORE 0 TO nWorkVal,; remainder,; dividend,; nextnum,; digit nWorkVal = nTempNum ret_str = '' DO WHILE .T. digit = MOD(nWorkVal, nNewBase) dividend = nWorkVal / nNewBase nWorkVal = INT(dividend) DO CASE CASE digit = 10 ret_str = 'a' + ret_str CASE digit = 11 ret_str = 'b' + ret_str CASE digit = 12 ret_str = 'c' + ret_str CASE digit = 13 ret_str = 'd' + ret_str CASE digit = 14 ret_str = 'e' + ret_str CASE digit = 15 ret_str = 'f' + ret_str OTHERWISE ret_str = LTRIM(STR(digit)) + ret_str ENDCASE IF nWorkVal = 0 EXIT ENDIF ( nWorkVal = 0 ) ENDDO ( .T. ) RETURN ret_str }}} == Powershell 2.0 == You can use GetHash.dll.[[BR]] [http://trac.opensubtitles.org/projects/opensubtitles/attachment/wiki/HashSourceCodes/GetHash.dll][[BR]] Use Example:[[BR]] {{{ Add-Type -Path "GetHash.dll" function MovieHash([string]$path) { $hash = [GetHash.Main] $hash::ToHexadecimal($hash::ComputeHash($path)) } MovieHash $filename }}} or without using GetHash.dll: {{{ $dataLength = 65536 function LongSum([UInt64]$a, [UInt64]$b) { [UInt64](([Decimal]$a + $b) % ([Decimal]([UInt64]::MaxValue) + 1)) } function StreamHash([IO.Stream]$stream) { $hashLength = 8 [UInt64]$lhash = 0 [byte[]]$buffer = New-Object byte[] $hashLength $i = 0 while ( ($i -lt ($dataLength / $hashLength)) -and ($stream.Read($buffer,0,$hashLength) -gt 0) ) { $i++ $lhash = LongSum $lhash ([BitConverter]::ToUInt64($buffer,0)) } $lhash } function MovieHash([string]$path) { try { $stream = [IO.File]::OpenRead($path) [UInt64]$lhash = $stream.Length $lhash = LongSum $lhash (StreamHash $stream) $stream.Position = [Math]::Max(0L, $stream.Length - $dataLength) $lhash = LongSum $lhash (StreamHash $stream) "{0:X}" -f $lhash } finally { $stream.Close() } } MovieHash $filename }}} == MASM == {{{ Calc_Hash proc uses esi ebx edx pFile:dword, pBuf:dword LOCAL hFile:dword, fSize:dword, NBR:dword, pMem:dword invoke CreateFile,pFile,GENERIC_ALL,0,0,OPEN_EXISTING,0,0 mov hFile,eax cmp eax,INVALID_HANDLE_VALUE jz @Error invoke SetFilePointer,hFile,0,NULL,FILE_END mov fSize,eax push eax invoke GlobalAlloc,GPTR,131072 mov pMem,eax invoke SetFilePointer,hFile,0,NULL,FILE_BEGIN invoke ReadFile,hFile,pMem,65536,addr NBR,NULL sub fSize,65536 add pMem,65536 invoke SetFilePointer,hFile,fSize,NULL,FILE_BEGIN invoke ReadFile,hFile,pMem,65536,addr NBR,NULL sub pMem,65536 mov esi,pMem mov ecx,131072 pop eax mov edx,eax push eax @@: add edx,[esi] adc ebx,[esi+4] add esi,8 sub ecx,8 jnz @B push edx push ebx invoke wsprintf,pBuf,addr HashFormat pop eax pop eax invoke CloseHandle,hFile invoke GlobalFree,pMem pop ecx @Error: ; If error eax returns (INVALID_HANDLE_VALUE) ; Hash value is copied to pBuf ; eax returns Movie Filesize ret Calc_Hash endp }}} == Objective-C == This is implementation of hash for Objective-C for Mac by subsmarine.com '''OSHashAlgorithm.m''' {{{ #!c++ #import "OSHashAlgorithm.h" @implementation OSHashAlgorithm +(NSString*)stringForHash:(uint64_t)hash { return [[NSString stringWithFormat:@"%qx", hash ] autorelease]; } +(VideoHash)hashForPath:(NSString*)path { VideoHash hash; hash.fileHash =0; hash.fileSize =0; NSFileHandle *readFile = [NSFileHandle fileHandleForReadingAtPath:path]; hash = [OSHashAlgorithm hashForFile:readFile]; [readFile closeFile]; return hash; } +(VideoHash)hashForURL:(NSURL*)url { VideoHash hash; hash.fileHash =0; hash.fileSize =0; NSFileHandle *readfile = [NSFileHandle fileHandleForReadingFromURL:url error:NULL]; hash = [OSHashAlgorithm hashForFile:readfile]; return hash; } +(VideoHash)hashForFile:(NSFileHandle*)handle { VideoHash retHash; retHash.fileHash =0; retHash.fileSize =0; if( handle == nil ) return retHash; const NSUInteger CHUNK_SIZE=65536; NSData *fileDataBegin, *fileDataEnd; uint64_t hash=0; fileDataBegin = [handle readDataOfLength:(NSUInteger)CHUNK_SIZE]; [handle seekToEndOfFile]; unsigned long long fileSize = [handle offsetInFile]; if(fileSize < CHUNK_SIZE ) return retHash; [handle seekToFileOffset:MAX(0,fileSize-CHUNK_SIZE) ]; fileDataEnd = [handle readDataOfLength:(NSUInteger)CHUNK_SIZE]; // // Calculate hash // // 1st. File size hash += fileSize; // 2nd. Begining data block uint64_t * data_bytes= (uint64_t*)[fileDataBegin bytes]; for( int i=0; i< CHUNK_SIZE/sizeof(uint64_t); i++ ) hash+=data_bytes[i];; // 3rd. Ending data block data_bytes= (uint64_t*)[fileDataEnd bytes]; for( int i=0; i< CHUNK_SIZE/sizeof(uint64_t); i++ ) hash+= data_bytes[i]; retHash.fileHash = hash; retHash.fileSize = fileSize; return retHash; } @end }}} '''OSHashAlgorithm.h''' {{{ #!c++ #import typedef struct { uint64_t fileHash; uint64_t fileSize; } VideoHash; @interface OSHashAlgorithm : NSObject { } +(VideoHash)hashForPath:(NSString*)path; +(VideoHash)hashForURL:(NSURL*)url; +(VideoHash)hashForFile:(NSFileHandle*)handle; +(NSString*)stringForHash:(uint64_t)hash; @end }}} == Vala == {{{ public uint64 hash(File file) { try { uint64 h; //get filesize and add it to hash var file_info = file.query_info("*", FileQueryInfoFlags.NONE); h = file_info.get_size(); //add first 64kB of file to hash var dis = new DataInputStream(file.read()); dis.set_byte_order(DataStreamByteOrder.LITTLE_ENDIAN); for(int i=0; i<65536/sizeof(uint64); i++) { h += dis.read_uint64(); } //add last 64kB of file to hash dis = new DataInputStream(file.read()); dis.set_byte_order(DataStreamByteOrder.LITTLE_ENDIAN); dis.skip((size_t)(file_info.get_size() - 65536)); for(int i=0; i<65536/sizeof(uint64); i++) { h += dis.read_uint64(); } return h; } catch (Error e) { error("%s", e.message); } } int main () { var file = File.new_for_path ("breakdance.avi"); if (!file.query_exists ()) { stderr.printf ("File '%s' doesn't exist.\n", file.get_path ()); return 1; } stdout.printf("%016llx\n", hash(file)); file = File.new_for_path ("dummy.bin"); if (!file.query_exists ()) { stderr.printf ("File '%s' doesn't exist.\n", file.get_path ()); return 1; } stdout.printf("%016llx\n", hash(file)); return 0; } }}} Build with: valac --pkg gio-2.0 hash.vala == !AutoHotKey == {{{ #NoEnv SetBatchLines, -1 ; http://www.opensubti.../breakdance.avi ; OpenSubtitles Hash = 8E245D9679D31E12 FilePath := "Breakdance.avi" MsgBox, 0, OpenSubtitlesHash, % Filepath . ":`r`n" . GetOpenSubtitlesHash(FilePath) ExitApp ; ================================================================================================== GetOpenSubtitlesHash(FilePath) { ; http://trac.opensubt...HashSourceCodes Static X := { 0: "0", 1: "1", 2: "2", 3: "3", 4: "4", 5: "5", 6: "6", 7: "7" , 8: "8", 9: "9", 10: "A", 11: "B", 12: "C", 13: "D", 14: "E", 15: "F"} ; Check the file size --------------------------------------------------------------------------- ; 9000000000 > $moviebytesize >= 131072 bytes (changed > to >= for the lower limit) FileGetSize, FileSize, %FilePath% If (FileSize < 131072) || (FileSize >= 9000000000) Return "" ; Read the first and last 64 KB ----------------------------------------------------------------- VarSetCapacity(FileParts, 131072) ; allocate sufficient memory File := FileOpen(FilePath, "r") ; open the file File.Seek(0, 0) ; set the file pointer (just for balance) File.RawRead(FileParts, 65536) ; read the first 64 KB File.Seek(-65536, 2) ; set the file pointer for the last 64 KB File.RawRead(&FileParts + 65536, 65536) ; read the last 64 KB File.Close() ; got all we need, so the file can be closed ; Now calculate the hash using two UINTs for the low- and high-order parts of an UINT64 --------- LoUINT := FileSize & 0xFFFFFFFF ; store low-order UINT of file size HiUINT := FileSize >> 32 ; store high-order UINT of file size Offset := -4 ; to allow adding 4 on first iteration Loop, 16384 { ; 131072 / 8 LoUINT += NumGet(FileParts, Offset += 4, "UInt") ; add first UINT value to low-order UINT HiUINT += NumGet(FileParts, Offset += 4, "UInt") ; add second UINT value to high-order UINT } ; Adjust the probable overflow of the low-order UINT HiUINT += LoUINT >> 32 ; add the overflow to the high-order UINT LoUINT &= 0xFFFFFFFF ; remove the overflow from the low-order UINT ; Now get the hex string, i.e. the hash --------------------------------------------------------- Hash := "" VarSetCapacity(UINT64, 8, 0) NumPut((HiUINT << 32) | LoUINT, UINT64, 0, "UInt64") Loop, 8 Hash .= X[(Byte := NumGet(UINT64, 8 - A_Index, "UChar")) >> 4] . X[Byte & 0x0F] Return Hash } ; ================================================================================================== }}} == Lisp == {{{ ; opensubtitle hash, common lisp, sbcl ; sean langton 2013 (defun get-lvalue(stream) (let ((c)(n 0)(m 1)) (loop for x from 0 to 7 do (setf c (read-byte stream)) (setf n (+ n (* c m))) (setf m (* m 256)) ) n)) (defun hashfile(path) (let ((hash '(unsigned-byte 64))(len)) (with-open-file (in path :element-type '(unsigned-byte 8)) (setf len (file-length in)) (setf hash len) (cond ((< len (* 2 65536)) (print "file too small to hash") (return-from hashfile nil))) (loop for x from 0 to 8191 do (setf hash (logand (+ hash (get-lvalue in)) #xFFFFFFFFFFFFFFFF ))) (file-position in (- len 65536)) (loop for x from 0 to 8191 do (setf hash (logand (+ hash (get-lvalue in)) #xFFFFFFFFFFFFFFFF ))) (format t "~&~16,'0x" hash)))) ; (hashfile #p"~/Downloads/breakdance.avi") ; (hashfile #p"~/Downloads/dummy/dummy.bin") }}} == Pascal == {{{ procedure ComputeHash(const Stream : TStream; out Size : qword; out Hash : string); var hashQ : qword; fsize : qword; i : integer; read : integer; s : array[0..7] of char; tmp : qword absolute s; begin Stream.Seek(0, soFromBeginning); Size := Stream.Size; hashQ := size;; i := 0; read := 1; while ((i < 8192) and (read > 0)) do begin read := Stream.Read(s, sizeof(s)); if read > 0 then begin hashQ := hashQ + tmp; end; i := i + 1; end; Stream.Seek(-65536, soFromEnd); i := 0; read := 1; while ((i < 8192) and (read > 0)) do begin read := Stream.Read(s, sizeof(s)); if read > 0 then begin hashQ := hashQ + tmp; end; i := i + 1; end; Hash := lowercase(Format('%.16x',[hashQ])); end; }}} == Scala == {{{ import java.io.{FileInputStream, File} import java.nio.{LongBuffer, ByteOrder, ByteBuffer} import java.nio.channels.FileChannel.MapMode import scala.math._ class OpenSubtitlesHasher { private val hashChunkSize = 64L * 1024L def computeHash(file: File) : String = { val fileSize = file.length val chunkSizeForFile = min(fileSize, hashChunkSize) val fileChannel = new FileInputStream(file).getChannel try { val head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile)) val tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, max(fileSize - hashChunkSize, 0), chunkSizeForFile)) "%016x".format(fileSize + head + tail) } finally { fileChannel.close() } } private def computeHashForChunk(buffer: ByteBuffer) : Long = { def doCompute(longBuffer: LongBuffer, hash: Long) : Long = { longBuffer.hasRemaining match { case false => hash case true => doCompute(longBuffer, hash + longBuffer.get) } } val longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer() doCompute(longBuffer, 0L) } } }}} == Javascript == There is some WRONG implementations floating around, please always check correct hash codes with test files at start of this document. This implementation works fine, credits go to Rasmus - THANKS! {{{ function(file, callback) { var HASH_CHUNK_SIZE = 65536, //64 * 1024 longs = [], temp = file.size; function read(start, end, callback) { var reader = new FileReader(); reader.onload = function(e) { callback.call(reader, process(e.target.result)); }; if (end === undefined) { reader.readAsBinaryString(file.slice(start)); } else { reader.readAsBinaryString(file.slice(start, end)); } } function process(chunk) { for (var i = 0; i < chunk.length; i++) { longs[(i + 8) % 8] += chunk.charCodeAt(i); } } function binl2hex(a) { var b = 255, d = '0123456789abcdef', e = '', c = 7; a[1] += a[0] >> 8; a[0] = a[0] & b; a[2] += a[1] >> 8; a[1] = a[1] & b; a[3] += a[2] >> 8; a[2] = a[2] & b; a[4] += a[3] >> 8; a[3] = a[3] & b; a[5] += a[4] >> 8; a[4] = a[4] & b; a[6] += a[5] >> 8; a[5] = a[5] & b; a[7] += a[6] >> 8; a[6] = a[6] & b; a[7] = a[7] & b; for (d, e, c; c > -1; c--) { e += d.charAt(a[c] >> 4 & 15) + d.charAt(a[c] & 15); } return e; } for (var i = 0; i < 8; i++) { longs[i] = temp & 255; temp = temp >> 8; } read(0, HASH_CHUNK_SIZE, function() { read(file.size - HASH_CHUNK_SIZE, undefined, function() { callback.call(null, file, binl2hex(longs)); }); }); } }}} == Groovy == {{{ import java.nio.ByteBuffer import java.nio.ByteOrder import java.nio.channels.FileChannel import java.nio.channels.FileChannel.MapMode class OpenSubtitlesHasher { def static HASH_CHUNK_SIZE = 64 * 1024 def static computeHash(file) { def size = file.length() def chunkSizeForFile = Math.min(HASH_CHUNK_SIZE, size) def fileChannel = new FileInputStream(file).getChannel() try { def head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile)) def tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, Math.max(size - HASH_CHUNK_SIZE, 0), chunkSizeForFile)) return String.format("%016x", size + head + tail) } finally { fileChannel.close() } } def static computeHash(stream, length){ def chunkSizeForFile = (int) Math.min(HASH_CHUNK_SIZE, length) def chunkBytes = new byte[(int) Math.min(2 * HASH_CHUNK_SIZE, length)] def dis = new DataInputStream(stream) dis.readFully(chunkBytes, 0, chunkSizeForFile) def position = chunkSizeForFile def tailChunkPosition = length - chunkSizeForFile while (position < tailChunkPosition && (position += dis.skip(tailChunkPosition - position)) >= 0) dis.readFully(chunkBytes, chunkSizeForFile, chunkBytes.length - chunkSizeForFile) def head = computeHashForChunk(ByteBuffer.wrap(chunkBytes, 0, chunkSizeForFile)) def tail = computeHashForChunk(ByteBuffer.wrap(chunkBytes, chunkBytes.length - chunkSizeForFile, chunkSizeForFile)) return String.format("%016x", length + head + tail) } def static computeHashForChunk(buffer) { def longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer() def hash = 0 while (longBuffer.hasRemaining()) { hash += longBuffer.get() } return hash } } }}} == Bash == {{{ #!/bin/bash # Copyright (C) # 2014 - Tomasz Wisniewski dagon666 # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. correct_64bit() { local pow32=$(( 1 << 32 )) while [ "$g_lo" -ge $pow32 ]; do g_lo=$(( g_lo - pow32 )) g_hi=$(( g_hi + 1 )) done while [ "$g_hi" -ge $pow32 ]; do g_hi=$(( g_hi - pow32 )) done } hash_part() { local file="$1" local curr=0 local dsize=$((8192*8)) local bytes_at_once=2048 local groups=$(( (bytes_at_once / 8) - 1 )) local k=0 local i=0 local offset=0 declare -a num=() while [ "$curr" -lt "$dsize" ]; do num=( $(od -t u1 -An -N "$bytes_at_once" -w$bytes_at_once -j "$curr" "$file") ) for k in $(seq 0 $groups); do offset=$(( k * 8 )) g_lo=$(( g_lo + \ num[$(( offset + 0 ))] + \ (num[$(( offset + 1 ))] << 8) + \ (num[$(( offset + 2 ))] << 16) + \ (num[$(( offset + 3 ))] << 24) )) g_hi=$(( g_hi + \ num[$(( offset + 4 ))] + \ (num[$(( offset + 5 ))] << 8) + \ (num[$(( offset + 6 ))] << 16) + \ (num[$(( offset + 7 ))] << 24) )) correct_64bit done curr=$(( curr + bytes_at_once )) done } hash_file() { g_lo=0 g_hi=0 local file="$1" local size=$(stat -c%s "$file") local offset=$(( size - 65536 )) local part1=$(mktemp part1.XXXXXXXX) local part2=$(mktemp part2.XXXXXXXX) dd if="$file" bs=8192 count=8 of="$part1" 2> /dev/null dd if="$file" skip="$offset" bs=1 of="$part2" 2> /dev/null hash_part "$part1" hash_part "$part2" g_lo=$(( g_lo + size )) correct_64bit unlink "$part1" unlink "$part2" printf "%08x%08x\n" $g_hi $g_lo } hash_file "breakdance.avi" echo "8e245d9679d31e12 <- should be" hash_file "dummy.bin" echo "61f7751fc2a72bfb <- should be" }}} == GO == https://github.com/oz/osdb/blob/6a89d7f831a6a3874260fe4677e546d551cad79d/osdb.go#L42 {{{ import ( "bytes" "encoding/binary" "fmt" "os" ) const ( ChunkSize = 65536 // 64k ) // Generate an OSDB hash for an *os.File. func HashFile(file *os.File) (hash uint64, err error) { fi, err := file.Stat() if err != nil { return } if fi.Size() < ChunkSize { return 0, fmt.Errorf("File is too small") } // Read head and tail blocks. buf := make([]byte, ChunkSize*2) err = readChunk(file, 0, buf[:ChunkSize]) if err != nil { return } err = readChunk(file, fi.Size()-ChunkSize, buf[ChunkSize:]) if err != nil { return } // Convert to uint64, and sum. var nums [(ChunkSize * 2) / 8]uint64 reader := bytes.NewReader(buf) err = binary.Read(reader, binary.LittleEndian, &nums) if err != nil { return 0, err } for _, num := range nums { hash += num } return hash + uint64(fi.Size()), nil } // Read a chunk of a file at `offset` so as to fill `buf`. func readChunk(file *os.File, offset int64, buf []byte) (err error) { n, err := file.ReadAt(buf, offset) if err != nil { return } if n != ChunkSize { return fmt.Errorf("Invalid read %v", n) } return } }}} == SWIFT 2 == {{{ // OSHash.swift // Originally implemented from Objective-C version for Swift by omerucel 18/04/2015 // http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes#Objective-C // Updated for Swift 2 by eduo on 15/06/15. // Copyright © 2015 Eduardo Gutierrez. All rights reserved. // import Foundation class OSHashAlgorithm: NSObject { let chunkSize: Int = 65536; struct VideoHash { var fileHash: String var fileSize: UInt64 } func hashForPath (path: String) -> VideoHash { var fileHash = VideoHash(fileHash: "", fileSize: 0) let fileHandler = NSFileHandle(forReadingAtPath: path)! let fileDataBegin: NSData = fileHandler.readDataOfLength(chunkSize) fileHandler.seekToEndOfFile() let fileSize: UInt64 = fileHandler.offsetInFile if (UInt64(chunkSize) > fileSize) { return fileHash } fileHandler.seekToFileOffset(max(0, fileSize - UInt64(chunkSize))) let fileDataEnd: NSData = fileHandler.readDataOfLength(chunkSize) var hash: UInt64 = fileSize var data_bytes = UnsafeBufferPointer( start: UnsafePointer(fileDataBegin.bytes), count: fileDataBegin.length/sizeof(UInt64) ) hash = data_bytes.reduce(hash,combine: &+) data_bytes = UnsafeBufferPointer( start: UnsafePointer(fileDataEnd.bytes), count: fileDataEnd.length/sizeof(UInt64) ) hash = data_bytes.reduce(hash,combine: &+) fileHash.fileHash = String(format:"%qx", arguments: [hash]) fileHash.fileSize = fileSize fileHandler.closeFile() return fileHash } } ///var osha = OSHashAlgorithm() ///var result = osha.hashForPath(fileName) ///println(result.fileHash) ///println(result.fileSize) }}} == SWIFT 3 == Source codes: https://github.com/niklasberglund/OpenSubtitlesHash.swift {{{ // // This Swift 3 version is based on Swift 2 version by eduo: // https://gist.github.com/eduo/7188bb0029f3bcbf03d4 // // Created by Niklas Berglund on 2017-01-01. // import Foundation class OpenSubtitlesHash: NSObject { static let chunkSize: Int = 65536 struct VideoHash { var fileHash: String var fileSize: UInt64 } public class func hashFor(_ url: URL) -> VideoHash { return self.hashFor(url.path) } public class func hashFor(_ path: String) -> VideoHash { var fileHash = VideoHash(fileHash: "", fileSize: 0) let fileHandler = FileHandle(forReadingAtPath: path)! let fileDataBegin: NSData = fileHandler.readData(ofLength: chunkSize) as NSData fileHandler.seekToEndOfFile() let fileSize: UInt64 = fileHandler.offsetInFile if (UInt64(chunkSize) > fileSize) { return fileHash } fileHandler.seek(toFileOffset: max(0, fileSize - UInt64(chunkSize))) let fileDataEnd: NSData = fileHandler.readData(ofLength: chunkSize) as NSData var hash: UInt64 = fileSize var data_bytes = UnsafeBufferPointer( start: UnsafePointer(fileDataBegin.bytes.assumingMemoryBound(to: UInt64.self)), count: fileDataBegin.length/MemoryLayout.size ) hash = data_bytes.reduce(hash,&+) data_bytes = UnsafeBufferPointer( start: UnsafePointer(fileDataEnd.bytes.assumingMemoryBound(to: UInt64.self)), count: fileDataEnd.length/MemoryLayout.size ) hash = data_bytes.reduce(hash,&+) fileHash.fileHash = String(format:"%016qx", arguments: [hash]) fileHash.fileSize = fileSize fileHandler.closeFile() return fileHash } } // Usage example: // let videoUrl = Bundle.main.url(forResource: "dummy5", withExtension: "rar") // let videoHash = OpenSubtitlesHash.hashFor(videoUrl!) // debugPrint("File hash: \(videoHash.fileHash)\nFile size: \(videoHash.fileSize)") }}} == RUST == {{{ use std::fs; use std::fs::File; use std::io::{Read, Seek, SeekFrom, BufReader}; use std::mem; const HASH_BLK_SIZE: u64 = 65536; fn create_hash(file: File, fsize: u64) -> Result { let mut buf = [0u8; 8]; let mut word: u64; let mut hash_val: u64 = fsize; // seed hash with file size let iterations = HASH_BLK_SIZE / 8; let mut reader = BufReader::with_capacity(HASH_BLK_SIZE as usize, file); for _ in 0..iterations { try!(reader.read(&mut buf)); unsafe { word = mem::transmute(buf); }; hash_val = hash_val.wrapping_add(word); } try!(reader.seek(SeekFrom::Start(fsize - HASH_BLK_SIZE))); for _ in 0..iterations { try!(reader.read(&mut buf)); unsafe { word = mem::transmute( buf); }; hash_val = hash_val.wrapping_add(word); } let hash_string = format!("{:01$x}", hash_val, 16); Ok(hash_string) } fn main() { let fname = "breakdance.avi"; let fsize = fs::metadata(fname).unwrap().len(); if fsize>HASH_BLK_SIZE { let file = File::open(fname).unwrap(); let fhash = create_hash(file, fsize).unwrap(); println!("Hash for {} is {}", fname, fhash); } } }}}