Version 63 (modified by os, 10 years ago) (diff) |
---|
Programming Languages
OpenSubtitles.org is using special hash function to match subtitle files against movie files. Hash is not dependent on file name of movie file. Read about basics of hashing functions.
Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit chksum of the first and last 64k (even if they overlap because the file is smaller than 128k). On opensubtitles.org is movie file size limited to 9000000000 > $moviebytesize > 131072 bytes, if is there any reason to change these sizes, let us know. Licence of hashing source codes is GPL. Source codes was tested on Little Endian - DEC, Intel and compatible
Important: there might be cases, when your calculated hash is not 16 characters, so make sure you add zero-leading padding - some of source codes doesn't implement this.
Feel free to edit/add source-codes if you have faster/better implementation. Also don't forget to check, if hash is right for test. Test these 2 files please to ensure your algo is completely OK (otherwise you can poison the database and that nobody wants):
- AVI file (12 909 756 bytes)
- hash: 8e245d9679d31e12
- DUMMY RAR file (2 565 922 bytes, 4 295 033 890 after RAR unpacking, test on UNPACKED file)
- hash: 61f7751fc2a72bfb (for UNPACKED file)
C
#include <stdio.h> #include <stdlib.h> #define MAX(x,y) (((x) > (y)) ? (x) : (y)) #ifndef uint64_t #define uint64_t unsigned long long #endif uint64_t compute_hash(FILE * handle) { uint64_t hash, fsize; fseek(handle, 0, SEEK_END); fsize = ftell(handle); fseek(handle, 0, SEEK_SET); hash = fsize; for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++); fseek(handle, (long)MAX(0, fsize - 65536), SEEK_SET); for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++); return hash; } int main(int argc, char *argv) { FILE * handle; uint64_t myhash; handle = fopen("breakdance.avi", "rb"); if (!handle) { printf("Error openning file!"); return 1; } myhash = compute_hash(handle); printf("%I64x", myhash); fclose(handle); return 0; }
C - Public Domain License
#include <stdio.h> #include <stdlib.h> unsigned long long analizefileOSHahs(char *fileName){ /* * Public Domain implementation by Kamil Dziobek. turbos11(at)gmail.com * This code implements Gibest hash algorithm first use in Media Player Classics * For more implementation(various languages and authors) see: * http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes * * -works only on little-endian procesor DEC, Intel and compatible * -sizeof(unsigned long long) must be 8 */ FILE *file; int i; unsigned long long t1=0; unsigned long long buffer1[8192*2]; file = fopen(fileName, "rb"); fread(buffer1, 8192, 8, file); fseek(file, -65536, SEEK_END); fread(&buffer1[8192], 8192, 8, file); for (i=0;i<8192*2;i++) t1+=buffer1[i]; t1+= ftell(file); //add filesize fclose(file); return t1; }; int main(int argc, char *argv){ unsigned long long myhash=analizefileOSHahs("C://tomaszkokowskizoofiliamovies.avi"); printf("hash is %16I64x",myhash); }
C++
#include <iostream> #include <fstream> typedef unsigned __int64 uint64_t; using namespace std; int MAX(int x, int y) { if((x) > (y)) return x; else return y; } uint64_t compute_hash(ifstream& f) { uint64_t hash, fsize; f.seekg(0, ios::end); fsize = f.tellg(); f.seekg(0, ios::beg); hash = fsize; for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp); f.seekg(MAX(0, (uint64_t)fsize - 65536), ios::beg); for(tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp); return hash; } int main(int argc, char *argv) { ifstream f; uint64_t myhash; f.open("c:\\test.avi", ios::in|ios::binary|ios::ate); if (!f.is_open()) { cerr << "Error opening file" << endl; return 1; } myhash = compute_hash(f); cout << setw(16) << setfill('0') << hex << myhash; f.close(); return 0; }
About C and C++ implementation
This only work on little-endian processor: DEC, Intel and compatible
Java
/** * Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit * checksum of the first and last 64k (even if they overlap because the file is smaller than * 128k). */ public class OpenSubtitlesHasher { /** * Size of the chunks that will be hashed in bytes (64 KB) */ private static final int HASH_CHUNK_SIZE = 64 * 1024; public static String computeHash(File file) throws IOException { long size = file.length(); long chunkSizeForFile = Math.min(HASH_CHUNK_SIZE, size); FileChannel fileChannel = new FileInputStream(file).getChannel(); try { long head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile)); long tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, Math.max(size - HASH_CHUNK_SIZE, 0), chunkSizeForFile)); return String.format("%016x", size + head + tail); } finally { fileChannel.close(); } } public static String computeHash(InputStream stream, long length) throws IOException { int chunkSizeForFile = (int) Math.min(HASH_CHUNK_SIZE, length); // buffer that will contain the head and the tail chunk, chunks will overlap if length is smaller than two chunks byte[] chunkBytes = new byte[(int) Math.min(2 * HASH_CHUNK_SIZE, length)]; DataInputStream in = new DataInputStream(stream); // first chunk in.readFully(chunkBytes, 0, chunkSizeForFile); long position = chunkSizeForFile; long tailChunkPosition = length - chunkSizeForFile; // seek to position of the tail chunk, or not at all if length is smaller than two chunks while (position < tailChunkPosition && (position += in.skip(tailChunkPosition - position)) >= 0); // second chunk, or the rest of the data if length is smaller than two chunks in.readFully(chunkBytes, chunkSizeForFile, chunkBytes.length - chunkSizeForFile); long head = computeHashForChunk(ByteBuffer.wrap(chunkBytes, 0, chunkSizeForFile)); long tail = computeHashForChunk(ByteBuffer.wrap(chunkBytes, chunkBytes.length - chunkSizeForFile, chunkSizeForFile)); return String.format("%016x", length + head + tail); } private static long computeHashForChunk(ByteBuffer buffer) { LongBuffer longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer(); long hash = 0; while (longBuffer.hasRemaining()) { hash += longBuffer.get(); } return hash; } }
C#
You can use GetHash?.dll.
http://trac.opensubtitles.org/projects/opensubtitles/attachment/wiki/HashSourceCodes/GetHash.dll
Use Example:
private void openFileDialog1_FileOk(object sender, CancelEventArgs e) { byte[] hash = GetHash.Main.ComputeHash(openFileDialog1.FileName); label1.Text = GetHash.Main.ToHexadecimal(hash); }
or without using GetHash?.dll:
using System; using System.Text; using System.IO; namespace MovieHasher { class Program { private static byte[] ComputeMovieHash(string filename) { byte[] result; using (Stream input = File.OpenRead(filename)) { result = ComputeMovieHash(input); } return result; } private static byte[] ComputeMovieHash(Stream input) { long lhash, streamsize; streamsize = input.Length; lhash = streamsize; long i = 0; byte[] buffer = new byte[sizeof(long)]; while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0)) { i++; lhash += BitConverter.ToInt64(buffer, 0); } input.Position = Math.Max(0, streamsize - 65536); i = 0; while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0)) { i++; lhash += BitConverter.ToInt64(buffer, 0); } input.Close(); byte[] result = BitConverter.GetBytes(lhash); Array.Reverse(result); return result; } private static string ToHexadecimal(byte[] bytes) { StringBuilder hexBuilder = new StringBuilder(); for(int i = 0; i < bytes.Length; i++) { hexBuilder.Append(bytes[i].ToString("x2")); } return hexBuilder.ToString(); } static void Main(string[] args) { byte[] moviehash = ComputeMovieHash(@"C:\test.avi"); Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash)); } } }
If you get overflow error read this.
VB.Net
Imports System Imports System.Text Imports System.IO 'Note: you must remove integer overflow checking. Namespace MovieHasher Class Program Private Shared Function ComputeMovieHash(ByVal filename As String) As Byte() Dim result As Byte() Using input As Stream = File.OpenRead(filename) result = ComputeMovieHash(input) End Using Return result End Function Private Function ComputeMovieHash(ByVal input As Stream) As Byte() Dim lhash As System.Int64, streamsize As Long streamsize = input.Length lhash = streamsize Dim i As Long = 0 Dim buffer As Byte() = New Byte(Marshal.SizeOf(GetType(Long)) - 1) {} While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0) i += 1 lhash += BitConverter.ToInt64(buffer, 0) End While input.Position = Math.Max(0, streamsize - 65536) i = 0 While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0) i += 1 lhash += BitConverter.ToInt64(buffer, 0) End While input.Close() Dim result As Byte() = BitConverter.GetBytes(lhash) Array.Reverse(result) Return result End Function Private Shared Function ToHexadecimal(ByVal bytes As Byte()) As String Dim hexBuilder As New StringBuilder() For i As Integer = 0 To bytes.Length - 1 hexBuilder.Append(bytes(i).ToString("x2")) Next Return hexBuilder.ToString() End Function Private Shared Sub Main(ByVal args As String()) Dim moviehash As Byte() = ComputeMovieHash("C:\test.avi") Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash)) End Sub End Class End Namespace
Python
import struct, os def hashFile(name): try: longlongformat = '<q' # little-endian long long bytesize = struct.calcsize(longlongformat) f = open(name, "rb") filesize = os.path.getsize(name) hash = filesize if filesize < 65536 * 2: return "SizeError" for x in range(65536/bytesize): buffer = f.read(bytesize) (l_value,)= struct.unpack(longlongformat, buffer) hash += l_value hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number f.seek(max(0,filesize-65536),0) for x in range(65536/bytesize): buffer = f.read(bytesize) (l_value,)= struct.unpack(longlongformat, buffer) hash += l_value hash = hash & 0xFFFFFFFFFFFFFFFF f.close() returnedhash = "%016x" % hash return returnedhash except(IOError): return "IOError"
Delphi
This is just a quick conversion of Gabest's original C code. Anyone who can come up with a cleaner code, please feel free to do so and post here.
function CalcGabestHash(const fname: string): string; var i : integer; s : array[1..8] of char; tmp : Int64 absolute s; hash : Int64; readed : integer; aStream: TFileStream; begin result := ''; if not FileExists(fname) then Exit; aStream := TFileStream.Create(fName, fmShareDenyNone); hash := aStream.Size; i := 0; readed := 1; while ((i < 8192) and (readed > 0)) do begin readed := aStream.Read(s, sizeof(s)); if readed > 0 then begin hash := hash + tmp; end; i := i + 1; end; aStream.Seek(-65536, soFromEnd); // 65536 i := 0; readed:= 1; while ((i < 8192) and (readed > 0)) do begin readed := aStream.Read(s, sizeof(s)); if readed > 0 then hash := hash + tmp; i := i + 1; end; aStream.Free; result := Format('%.16x',[hash]); end;
alternate version by TRP
unction CalcGabestHash(const Stream: TStream): Int64; overload; const HashPartSize = 1 shl 16; // 64 KiB procedure UpdateHashFromStream(const Stream: TStream; var Hash: Int64); inline; var buffer: Array[0..HashPartSize div SizeOf(Int64) - 1] of Int64; i : integer; begin Stream.ReadBuffer(buffer[0], SizeOf(buffer)); for i := Low(buffer) to High(buffer) do Inc(Hash, buffer[i]); end; begin result:= Stream.Size; if result < HashPartSize then begin // stream too small return invalid hash result:= 0; exit; end; // first 64 KiB Stream.Position:= 0; UpdateHashFromStream(Stream, result); // last 64 KiB Stream.Seek(-HashPartSize, soEnd); UpdateHashFromStream(Stream, result); // use "IntToHex(result, 16);" to get a string and "StrToInt64('$' + hash);" to get your Int64 back end; function CalcGabestHash(const FileName: TFileName): Int64; overload; var stream: TStream; begin stream:= TFileStream.Create(FileName, fmOpenRead or fmShareDenyWrite); try result:= CalcGabestHash(stream); finally stream.Free; end; end;
Lua
-- will produce a correct hash regardless of architecture (big vs little endian) local function movieHash(fileName) local fil = io.open(fileName, "rb") local lo,hi=0,0 for i=1,8192 do local a,b,c,d = fil:read(4):byte(1,4) lo = lo + a + b*256 + c*65536 + d*16777216 a,b,c,d = fil:read(4):byte(1,4) hi = hi + a + b*256 + c*65536 + d*16777216 while lo>=4294967296 do lo = lo-4294967296 hi = hi+1 end while hi>=4294967296 do hi = hi-4294967296 end end local size = fil:seek("end", -65536) + 65536 for i=1,8192 do local a,b,c,d = fil:read(4):byte(1,4) lo = lo + a + b*256 + c*65536 + d*16777216 a,b,c,d = fil:read(4):byte(1,4) hi = hi + a + b*256 + c*65536 + d*16777216 while lo>=4294967296 do lo = lo-4294967296 hi = hi+1 end while hi>=4294967296 do hi = hi-4294967296 end end lo = lo + size while lo>=4294967296 do lo = lo-4294967296 hi = hi+1 end while hi>=4294967296 do hi = hi-4294967296 end fil:close() return string.format("%08x%08x", hi,lo), size end print("breakdance.avi:") print(movieHash("breakdance.avi")) print("8e245d9679d31e12 <- should be") print("") print("dummy.rar:") print(movieHash("dummy.rar")) print("61f7751fc2a72bfb <- should be according to wiki") print("2a527d74d45f5b1b <- what other hash tools actually report")
RealBasic/Xojo
Combined routine that will calculate a fast hash for videofiles over 65K and a normal md5 for subtitles
dim b as BinaryStream dim mb as MemoryBlock dim hash,bytesize as UINT64 dim i, x, chunksize, filelen, difference as integer hash = 0 //Reset Hash difference = 0 if f <> nil and f.Exists then b= f.OpenAsBinaryFile hash = b.Length bytesize = b.Length bytesizestr = str(bytesize) if bytesize >= 65536 and routine = "video" then chunksize = 65536 mb = b.Read(65536) mb.LittleEndian = True for i= 0 to chunksize -1 step 8 hash = hash+ mb.UINT64Value(i) next b.Position = max(b.Length-chunksize, 0) mb= b.Read(chunksize) mb.LittleEndian = True for i= 0 to chunksize -1 step 8 hash = hash+ mb.UINT64Value(i) next myhash = Lowercase(str(hex(hash))) elseif routine = "subtitle" then dim c,result as string mb = md5(b.Read(b.Length)) mb.LittleEndian = True for i = 0 to mb.size-1 x = mb.byte( i ) c = right( "00"+hex( x ), 2 ) result = result + c next result = lowercase( result ) myhash = result end
PHP 4/5
function OpenSubtitlesHash($file)
{
$handle = fopen($file, "rb");
$fsize = filesize($file);
$hash = array(3 => 0,
2 => 0,
1 => ($fsize >> 16) & 0xFFFF,
0 => $fsize & 0xFFFF);
for ($i = 0; $i < 8192; $i++)
{
$tmp = ReadUINT64($handle);
$hash = AddUINT64($hash, $tmp);
}
$offset = $fsize - 65536;
fseek($handle, $offset > 0 ? $offset : 0, SEEK_SET);
for ($i = 0; $i < 8192; $i++)
{
$tmp = ReadUINT64($handle);
$hash = AddUINT64($hash, $tmp);
}
fclose($handle);
return UINT64FormatHex($hash);
}
function ReadUINT64($handle)
{
$u = unpack("va/vb/vc/vd", fread($handle, 8));
return array(0 => $u["a"], 1 => $u["b"], 2 => $u["c"], 3 => $u["d"]);
}
function AddUINT64($a, $b)
{
$o = array(0 => 0, 1 => 0, 2 => 0, 3 => 0);
$carry = 0;
for ($i = 0; $i < 4; $i++)
{
if (($a[$i] + $b[$i] + $carry) > 0xffff )
{
$o[$i] += ($a[$i] + $b[$i] + $carry) & 0xffff;
$carry = 1;
}
else
{
$o[$i] += ($a[$i] + $b[$i] + $carry);
$carry = 0;
}
}
return $o;
}
function UINT64FormatHex($n)
{
return sprintf("%04x%04x%04x%04x", $n[3], $n[2], $n[1], $n[0]);
}
Perl
#!/usr/bin/perl use strict; use warnings; print OpenSubtitlesHash('breakdance.avi'); sub OpenSubtitlesHash { my $filename = shift or die("Need video filename"); open my $handle, "<", $filename or die $!; binmode $handle; my $fsize = -s $filename; my $hash = [$fsize & 0xFFFF, ($fsize >> 16) & 0xFFFF, 0, 0]; $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192); my $offset = $fsize - 65536; seek($handle, $offset > 0 ? $offset : 0, 0) or die $!; $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192); close $handle or die $!; return UINT64FormatHex($hash); } sub ReadUINT64 { read($_[0], my $u, 8); return [unpack("vvvv", $u)]; } sub AddUINT64 { my $o = [0,0,0,0]; my $carry = 0; for my $i (0..3) { if (($_[0]->[$i] + $_[1]->[$i] + $carry) > 0xffff ) { $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry) & 0xffff; $carry = 1; } else { $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry); $carry = 0; } } return $o; } sub UINT64FormatHex { return sprintf("%04x%04x%04x%04x", $_[0]->[3], $_[0]->[2], $_[0]->[1], $_[0]->[0]); }
Ruby
This is a quick translation/transliteration of the Perl script.
class Hasher def open_subtitles_hash(filename) raise "Need video filename" unless filename fh = File.open(filename) fsize = File.size(filename) hash = [fsize & 0xffff, (fsize >> 16) & 0xffff, 0, 0] 8192.times { hash = add_unit_64(hash, read_uint_64(fh)) } offset = fsize - 65536 fh.seek([0,offset].max, 0) 8192.times { hash = add_unit_64(hash, read_uint_64(fh)) } fh.close return uint_64_format_hex(hash) end def read_uint_64(stream) stream.read(8).unpack("vvvv") end def add_unit_64(hash, input) res = [0,0,0,0] carry = 0 hash.zip(input).each_with_index do |(h,i),n| sum = h + i + carry if sum > 0xffff res[n] += sum & 0xffff carry = 1 else res[n] += sum carry = 0 end end return res end def uint_64_format_hex(hash) sprintf("%04x%04x%04x%04x", *hash.reverse) end end if __FILE__ == $0 require 'test/unit' class HashTester < Test::Unit::TestCase def setup @h = Hasher.new end def test_test_file_hash assert_equal("8e245d9679d31e12", @h.open_subtitles_hash('breakdance.avi')) end end end
Another more "rubyesque" implementation.
module MovieHasher CHUNK_SIZE = 64 * 1024 # in bytes def self.compute_hash(filename) filesize = File.size(filename) hash = filesize # Read 64 kbytes, divide up into 64 bits and add each # to hash. Do for beginning and end of file. File.open(filename, 'rb') do |f| # Q = unsigned long long = 64 bit f.read(CHUNK_SIZE).unpack("Q*").each do |n| hash = hash + n & 0xffffffffffffffff # to remain as 64 bit number end f.seek([0, filesize - CHUNK_SIZE].max, IO::SEEK_SET) # And again for the end of the file f.read(CHUNK_SIZE).unpack("Q*").each do |n| hash = hash + n & 0xffffffffffffffff end end sprintf("%016x", hash) end end if __FILE__ == $0 require 'test/unit' class MovieHasherTest < Test::Unit::TestCase def test_compute_hash assert_equal("8e245d9679d31e12", MovieHasher::compute_hash('breakdance.avi')) end def test_compute_hash_large_file assert_equal("61f7751fc2a72bfb", MovieHasher::compute_hash('dummy.bin')) end end end
Haskell
import IO(bracket) import System.Environment(getArgs) import System.IO(openBinaryFile,hClose,hFileSize,hSeek,IOMode(ReadMode),SeekMode(AbsoluteSeek,SeekFromEnd)) import qualified Data.ByteString.Lazy as L(hGet,unpack) import Data.Binary.Get(runGet,getWord64le) import Data.Binary.Put(runPut,putWord64le) import Data.Word(Word64) import Control.Monad(foldM) import Data.Bits.Utils(w82s) import Data.Hex(hex) shortsum :: FilePath -> IO Word64 shortsum filename = bracket (openBinaryFile filename ReadMode) hClose $ \h -> do fs <- hFileSize h hSeek h AbsoluteSeek 0 ; begin <- L.hGet h chunksize hSeek h SeekFromEnd (-(toInteger chunksize)) ; end <- L.hGet h chunksize return $ (flip runGet $ begin) $ chunksum $ (flip runGet $ end) (chunksum . fromInteger $ fs) where chunksize = 0x10000 chunksum n = foldM (\a _ -> getWord64le >>= return . (+a)) n [1..(chunksize`div`8)] main :: IO () main = do args <- getArgs let fn = head $ args p <- shortsum fn putStrLn $ "The hash of file " ++ fn ++ ": " ++ (hex $ w82s $ reverse (L.unpack $ runPut $ putWord64le p))
AutoIT
#cs Hash code is based on Media Player Classic. It calculates: size + 64bit checksum of the first and last 64k (even if they overlap because the file is smaller than 128k). Authors: Authenticity & Emanuel "Datenshi" Lindgren @ AutoIT Forums. AutoIT v3.3.2.0 #ce Func _Compute_Hash($sFileName) Local $hFile, $tRet, $tTmp, $iFileSize, $iRead, $iChunk, $iI $hFile = FileOpen($sFileName, 16) If Not $hFile Then Return SetError(1, 0, 0) $iFileSize = FileGetSize($sFileName) $iChunk = 65536 If $iFileSize < $iChunk * 2 Then FileClose($hFile) Return SetError(2, 0, 0) EndIf $tRet = DllStructCreate("uint64") $tTmp = DllStructCreate("uint64") DllStructSetData($tRet, 1, $iFileSize) For $iI = 0 To ($iChunk / 8) - 1 DllStructSetData($tTmp, 1, FileRead($hFile, 8)) DllStructSetData($tRet, 1, DllStructGetData($tRet, 1) + DllStructGetData($tTmp, 1)) Next FileSetPos($hFile, $iFileSize - $iChunk, 0) For $iI = 0 To ($iChunk / 8) - 1 DllStructSetData($tTmp, 1, FileRead($hFile, 8)) DllStructSetData($tRet, 1, DllStructGetData($tRet, 1) + DllStructGetData($tTmp, 1)) Next FileClose($hFile) Return SetError(0, 0, _HEX(DllStructGetData($tRet, 1))) EndFunc Func _HEX($iValue) Return StringFormat("%#.8x%.8x", $iValue / 4294967296, $iValue) EndFunc
FoxPro
PARAMETERS cfile PRIVATE ALL ******* * enviroment setup ******* cret='' glTalk=(SET("TALK")="ON") IF vartype(cfile)<>'C' cfile='breakdance.avi' ENDIF IF glTalk ? cfile ? cfile='' ? LEN(cfile) endif nfile=FOPEN(cfile) nsize=FSEEK(nfile,0,2) IF gltalk ? cfile ? 'size?>' ?? nsize endif FSEEK(nfile,0,0) ****** * length reencode to 64 uint ***** chash=hashsize(nsize) cempty=chr(0) cret='' IF LEN(chash)<8 FOR i=1 TO 8-LEN(chash) cret=cret+cempty ENDFOR ENDIF cret=cret+chash nSum=0 ******* * first 64kb ****** FOR i=1 TO 8192 cpom=FREAD(nfile,8) cpom=reverse(cpom) nSum=nSum+LEN(cpom) IF gltalk do buildhex WITH cret ?? '+' DO buildhex WITH cpom ? '=' ENDIF cret=adint64(cret,cpom) ENDFOR ******* * last 64kb ******* FSEEK(nfile,-65536,2) FOR i=1 TO 8192 cpom=FREAD(nfile,8) cpom=reverse(cpom) cret=adint64(cret,cpom) nSum=nSum+LEN(cpom) ENDFOR FCLOSE(nfile) **** * build hexa **** IF gltalk DO buildhex WITH cret ? ? 'Spocital som' ?? nSum ENDIF RETURN buildhex(cret) FUNCTION reverse PARAMETERS cstring PRIVATE ALL cret='' FOR i=1 TO LEN(cstring) cret=cret+SUBSTR(cstring,LEN(cstring)-i+1,1) ENDFOR RETURN cret FUNCTION buildhex PARAMETERS cstring,lkam PRIVATE ALL gcTalk=SET("TALK") cret='' FOR i=1 TO LEN(cstring) cpom=dec2basx(ASC(SUBSTR(cstring,i,1)),16) IF LEN(cpom)<2 cout='0'+cpom cpom=cout ENDIF cret=cret+cpom IF gcTALK="ON" ?? cpom ?? ':' ENDIF ENDFOR RETURN cret FUNCTION adint64 PARAMETERS cstring1,cstring2 PRIVATE ALL DIMENSION car (8,1) as Character *** * 8 bytes both *** nincrement=0 cret='' FOR i=8 TO 1 STEP -1 nfir=ASC(SUBSTR(cstring1,i,1)) nsec=ASC(SUBSTR(cstring2,i,1)) nout=nincrement+nfir+nsec IF nout>255 nincrement=INT(nout/256) nout=nout-(nincrement*256) ELSE nincrement=0 ENDIF car(i)=CHR(nout) ENDFOR FOR i=1 TO 8 cret=cret+car(i) ENDFOR RETURN cret FUNCTION hashsize PARAMETERS ncislo PRIVATE ALL cret='' creverse='' DO WHILE .t. npom=INT(ncislo/256) npom2=ncislo-npom*256 creverse=creverse+CHR(npom2) ncislo=npom IF ncislo=0 EXIT ENDIF ENDDO FOR i=1 TO LEN(creverse) cret=cret+SUBSTR(creverse,LEN(creverse)-i+1,1) ENDFOR RETURN cret *.............................................................................. * Function: DEC2BASX * Purpose: Convert whole number 0-?, to base 2-16 * * Parameters: nTempNum - number to convert (0-9007199254740992) * base - base to convert to i.e., 2 4 8 16... * returns: string * Usage: cresult=Dec2BasX(nParm1, nParm2) * STORE Dec2BasX(255, 16) TO cMyString &&... cMyString contains 'ff' *.............................................................................. FUNCTION dec2basx PARAMETERS nTempNum, nNewBase STORE 0 TO nWorkVal,; remainder,; dividend,; nextnum,; digit nWorkVal = nTempNum ret_str = '' DO WHILE .T. digit = MOD(nWorkVal, nNewBase) dividend = nWorkVal / nNewBase nWorkVal = INT(dividend) DO CASE CASE digit = 10 ret_str = 'a' + ret_str CASE digit = 11 ret_str = 'b' + ret_str CASE digit = 12 ret_str = 'c' + ret_str CASE digit = 13 ret_str = 'd' + ret_str CASE digit = 14 ret_str = 'e' + ret_str CASE digit = 15 ret_str = 'f' + ret_str OTHERWISE ret_str = LTRIM(STR(digit)) + ret_str ENDCASE IF nWorkVal = 0 EXIT ENDIF ( nWorkVal = 0 ) ENDDO ( .T. ) RETURN ret_str
Powershell 2.0
You can use GetHash?.dll.
http://trac.opensubtitles.org/projects/opensubtitles/attachment/wiki/HashSourceCodes/GetHash.dll
Use Example:
Add-Type -Path "GetHash.dll" function MovieHash([string]$path) { $hash = [GetHash.Main] $hash::ToHexadecimal($hash::ComputeHash($path)) } MovieHash $filename
or without using GetHash?.dll:
$dataLength = 65536 function LongSum([UInt64]$a, [UInt64]$b) { [UInt64](([Decimal]$a + $b) % ([Decimal]([UInt64]::MaxValue) + 1)) } function StreamHash([IO.Stream]$stream) { $hashLength = 8 [UInt64]$lhash = 0 [byte[]]$buffer = New-Object byte[] $hashLength $i = 0 while ( ($i -lt ($dataLength / $hashLength)) -and ($stream.Read($buffer,0,$hashLength) -gt 0) ) { $i++ $lhash = LongSum $lhash ([BitConverter]::ToUInt64($buffer,0)) } $lhash } function MovieHash([string]$path) { try { $stream = [IO.File]::OpenRead($path) [UInt64]$lhash = $stream.Length $lhash = LongSum $lhash (StreamHash $stream) $stream.Position = [Math]::Max(0L, $stream.Length - $dataLength) $lhash = LongSum $lhash (StreamHash $stream) "{0:X}" -f $lhash } finally { $stream.Close() } } MovieHash $filename
MASM
Calc_Hash proc uses esi ebx edx pFile:dword, pBuf:dword LOCAL hFile:dword, fSize:dword, NBR:dword, pMem:dword invoke CreateFile,pFile,GENERIC_ALL,0,0,OPEN_EXISTING,0,0 mov hFile,eax cmp eax,INVALID_HANDLE_VALUE jz @Error invoke SetFilePointer,hFile,0,NULL,FILE_END mov fSize,eax push eax invoke GlobalAlloc,GPTR,131072 mov pMem,eax invoke SetFilePointer,hFile,0,NULL,FILE_BEGIN invoke ReadFile,hFile,pMem,65536,addr NBR,NULL sub fSize,65536 add pMem,65536 invoke SetFilePointer,hFile,fSize,NULL,FILE_BEGIN invoke ReadFile,hFile,pMem,65536,addr NBR,NULL sub pMem,65536 mov esi,pMem mov ecx,131072 pop eax mov edx,eax push eax @@: add edx,[esi] adc ebx,[esi+4] add esi,8 sub ecx,8 jnz @B push edx push ebx invoke wsprintf,pBuf,addr HashFormat pop eax pop eax invoke CloseHandle,hFile invoke GlobalFree,pMem pop ecx @Error: ; If error eax returns (INVALID_HANDLE_VALUE) ; Hash value is copied to pBuf ; eax returns Movie Filesize ret Calc_Hash endp
Objective-C
This is implementation of hash for Objective-C for Mac by subsmarine.com
OSHashAlgorithm.m
#import "OSHashAlgorithm.h" @implementation OSHashAlgorithm +(NSString*)stringForHash:(uint64_t)hash { return [[NSString stringWithFormat:@"%qx", hash ] autorelease]; } +(VideoHash)hashForPath:(NSString*)path { VideoHash hash; hash.fileHash =0; hash.fileSize =0; NSFileHandle *readFile = [NSFileHandle fileHandleForReadingAtPath:path]; hash = [OSHashAlgorithm hashForFile:readFile]; [readFile closeFile]; return hash; } +(VideoHash)hashForURL:(NSURL*)url { VideoHash hash; hash.fileHash =0; hash.fileSize =0; NSFileHandle *readfile = [NSFileHandle fileHandleForReadingFromURL:url error:NULL]; hash = [OSHashAlgorithm hashForFile:readfile]; return hash; } +(VideoHash)hashForFile:(NSFileHandle*)handle { VideoHash retHash; retHash.fileHash =0; retHash.fileSize =0; if( handle == nil ) return retHash; const NSUInteger CHUNK_SIZE=65536; NSData *fileDataBegin, *fileDataEnd; uint64_t hash=0; fileDataBegin = [handle readDataOfLength:(NSUInteger)CHUNK_SIZE]; [handle seekToEndOfFile]; unsigned long long fileSize = [handle offsetInFile]; if(fileSize < CHUNK_SIZE ) return retHash; [handle seekToFileOffset:MAX(0,fileSize-CHUNK_SIZE) ]; fileDataEnd = [handle readDataOfLength:(NSUInteger)CHUNK_SIZE]; // // Calculate hash // // 1st. File size hash += fileSize; // 2nd. Begining data block uint64_t * data_bytes= (uint64_t*)[fileDataBegin bytes]; for( int i=0; i< CHUNK_SIZE/sizeof(uint64_t); i++ ) hash+=data_bytes[i];; // 3rd. Ending data block data_bytes= (uint64_t*)[fileDataEnd bytes]; for( int i=0; i< CHUNK_SIZE/sizeof(uint64_t); i++ ) hash+= data_bytes[i]; retHash.fileHash = hash; retHash.fileSize = fileSize; return retHash; } @end
OSHashAlgorithm.h
#import <Cocoa/Cocoa.h> typedef struct { uint64_t fileHash; uint64_t fileSize; } VideoHash; @interface OSHashAlgorithm : NSObject { } +(VideoHash)hashForPath:(NSString*)path; +(VideoHash)hashForURL:(NSURL*)url; +(VideoHash)hashForFile:(NSFileHandle*)handle; +(NSString*)stringForHash:(uint64_t)hash; @end
Vala
public uint64 hash(File file) { try { uint64 h; //get filesize and add it to hash var file_info = file.query_info("*", FileQueryInfoFlags.NONE); h = file_info.get_size(); //add first 64kB of file to hash var dis = new DataInputStream(file.read()); dis.set_byte_order(DataStreamByteOrder.LITTLE_ENDIAN); for(int i=0; i<65536/sizeof(uint64); i++) { h += dis.read_uint64(); } //add last 64kB of file to hash dis = new DataInputStream(file.read()); dis.set_byte_order(DataStreamByteOrder.LITTLE_ENDIAN); dis.skip((size_t)(file_info.get_size() - 65536)); for(int i=0; i<65536/sizeof(uint64); i++) { h += dis.read_uint64(); } return h; } catch (Error e) { error("%s", e.message); } } int main () { var file = File.new_for_path ("breakdance.avi"); if (!file.query_exists ()) { stderr.printf ("File '%s' doesn't exist.\n", file.get_path ()); return 1; } stdout.printf("%016llx\n", hash(file)); file = File.new_for_path ("dummy.bin"); if (!file.query_exists ()) { stderr.printf ("File '%s' doesn't exist.\n", file.get_path ()); return 1; } stdout.printf("%016llx\n", hash(file)); return 0; }
Build with: valac --pkg gio-2.0 hash.vala
AutoHotKey
#NoEnv SetBatchLines, -1 ; http://www.opensubti.../breakdance.avi ; OpenSubtitles Hash = 8E245D9679D31E12 FilePath := "Breakdance.avi" MsgBox, 0, OpenSubtitlesHash, % Filepath . ":`r`n" . GetOpenSubtitlesHash(FilePath) ExitApp ; ================================================================================================== GetOpenSubtitlesHash(FilePath) { ; http://trac.opensubt...HashSourceCodes Static X := { 0: "0", 1: "1", 2: "2", 3: "3", 4: "4", 5: "5", 6: "6", 7: "7" , 8: "8", 9: "9", 10: "A", 11: "B", 12: "C", 13: "D", 14: "E", 15: "F"} ; Check the file size --------------------------------------------------------------------------- ; 9000000000 > $moviebytesize >= 131072 bytes (changed > to >= for the lower limit) FileGetSize, FileSize, %FilePath% If (FileSize < 131072) || (FileSize >= 9000000000) Return "" ; Read the first and last 64 KB ----------------------------------------------------------------- VarSetCapacity(FileParts, 131072) ; allocate sufficient memory File := FileOpen(FilePath, "r") ; open the file File.Seek(0, 0) ; set the file pointer (just for balance) File.RawRead(FileParts, 65536) ; read the first 64 KB File.Seek(-65536, 2) ; set the file pointer for the last 64 KB File.RawRead(&FileParts + 65536, 65536) ; read the last 64 KB File.Close() ; got all we need, so the file can be closed ; Now calculate the hash using two UINTs for the low- and high-order parts of an UINT64 --------- LoUINT := FileSize & 0xFFFFFFFF ; store low-order UINT of file size HiUINT := FileSize >> 32 ; store high-order UINT of file size Offset := -4 ; to allow adding 4 on first iteration Loop, 16384 { ; 131072 / 8 LoUINT += NumGet(FileParts, Offset += 4, "UInt") ; add first UINT value to low-order UINT HiUINT += NumGet(FileParts, Offset += 4, "UInt") ; add second UINT value to high-order UINT } ; Adjust the probable overflow of the low-order UINT HiUINT += LoUINT >> 32 ; add the overflow to the high-order UINT LoUINT &= 0xFFFFFFFF ; remove the overflow from the low-order UINT ; Now get the hex string, i.e. the hash --------------------------------------------------------- Hash := "" VarSetCapacity(UINT64, 8, 0) NumPut((HiUINT << 32) | LoUINT, UINT64, 0, "UInt64") Loop, 8 Hash .= X[(Byte := NumGet(UINT64, 8 - A_Index, "UChar")) >> 4] . X[Byte & 0x0F] Return Hash } ; ==================================================================================================
Lisp
; opensubtitle hash, common lisp, sbcl ; sean langton 2013 (defun get-lvalue(stream) (let ((c)(n 0)(m 1)) (loop for x from 0 to 7 do (setf c (read-byte stream)) (setf n (+ n (* c m))) (setf m (* m 256)) ) n)) (defun hashfile(path) (let ((hash '(unsigned-byte 64))(len)) (with-open-file (in path :element-type '(unsigned-byte 8)) (setf len (file-length in)) (setf hash len) (cond ((< len (* 2 65536)) (print "file too small to hash") (return-from hashfile nil))) (loop for x from 0 to 8191 do (setf hash (logand (+ hash (get-lvalue in)) #xFFFFFFFFFFFFFFFF ))) (file-position in (- len 65536)) (loop for x from 0 to 8191 do (setf hash (logand (+ hash (get-lvalue in)) #xFFFFFFFFFFFFFFFF ))) (format t "~&~16,'0x" hash)))) ; (hashfile #p"~/Downloads/breakdance.avi") ; (hashfile #p"~/Downloads/dummy/dummy.bin")
Pascal
procedure ComputeHash(const Stream : TStream; out Size : qword; out Hash : string); var hashQ : qword; fsize : qword; i : integer; read : integer; s : array[0..7] of char; tmp : qword absolute s; begin Stream.Seek(0, soFromBeginning); Size := Stream.Size; hashQ := size;; i := 0; read := 1; while ((i < 8192) and (read > 0)) do begin read := Stream.Read(s, sizeof(s)); if read > 0 then begin hashQ := hashQ + tmp; end; i := i + 1; end; Stream.Seek(-65536, soFromEnd); i := 0; read := 1; while ((i < 8192) and (read > 0)) do begin read := Stream.Read(s, sizeof(s)); if read > 0 then begin hashQ := hashQ + tmp; end; i := i + 1; end; Hash := lowercase(Format('%.16x',[hashQ])); end;
Scala
import java.io.{FileInputStream, File} import java.nio.{LongBuffer, ByteOrder, ByteBuffer} import java.nio.channels.FileChannel.MapMode import scala.math._ class OpenSubtitlesHasher { private val hashChunkSize = 64L * 1024L def computeHash(file: File) : String = { val fileSize = file.length val chunkSizeForFile = min(fileSize, hashChunkSize) val fileChannel = new FileInputStream(file).getChannel try { val head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile)) val tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, max(fileSize - hashChunkSize, 0), chunkSizeForFile)) "%016x".format(fileSize + head + tail) } finally { fileChannel.close() } } private def computeHashForChunk(buffer: ByteBuffer) : Long = { def doCompute(longBuffer: LongBuffer, hash: Long) : Long = { longBuffer.hasRemaining match { case false => hash case true => doCompute(longBuffer, hash + longBuffer.get) } } val longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer() doCompute(longBuffer, 0L) } }
Javascript
There is some WRONG implementations floating around, please always check correct hash codes with test files at start of this document. This implementation works fine, credits go to Rasmus - THANKS!
function(file, callback) { var HASH_CHUNK_SIZE = 65536, //64 * 1024 longs = [], temp = file.size; function read(start, end, callback) { var reader = new FileReader(); reader.onload = function(e) { callback.call(reader, process(e.target.result)); }; if (end === undefined) { reader.readAsBinaryString(file.slice(start)); } else { reader.readAsBinaryString(file.slice(start, end)); } } function process(chunk) { for (var i = 0; i < chunk.length; i++) { longs[(i + 8) % 8] += chunk.charCodeAt(i); } } function binl2hex(a) { var b = 255, d = '0123456789abcdef', e = '', c = 7; a[1] += a[0] >> 8; a[0] = a[0] & b; a[2] += a[1] >> 8; a[1] = a[1] & b; a[3] += a[2] >> 8; a[2] = a[2] & b; a[4] += a[3] >> 8; a[3] = a[3] & b; a[5] += a[4] >> 8; a[4] = a[4] & b; a[6] += a[5] >> 8; a[5] = a[5] & b; a[7] += a[6] >> 8; a[6] = a[6] & b; a[7] = a[7] & b; for (d, e, c; c > -1; c--) { e += d.charAt(a[c] >> 4 & 15) + d.charAt(a[c] & 15); } return e; } for (var i = 0; i < 8; i++) { longs[i] = temp & 255; temp = temp >> 8; } read(0, HASH_CHUNK_SIZE, function() { read(file.size - HASH_CHUNK_SIZE, undefined, function() { callback.call(null, file, binl2hex(longs)); }); }); }
Groovy
import java.nio.ByteBuffer import java.nio.ByteOrder import java.nio.channels.FileChannel import java.nio.channels.FileChannel.MapMode class OpenSubtitlesHasher { def static HASH_CHUNK_SIZE = 64 * 1024 def static computeHash(file) { def size = file.length() def chunkSizeForFile = Math.min(HASH_CHUNK_SIZE, size) def fileChannel = new FileInputStream(file).getChannel() try { def head = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, 0, chunkSizeForFile)) def tail = computeHashForChunk(fileChannel.map(MapMode.READ_ONLY, Math.max(size - HASH_CHUNK_SIZE, 0), chunkSizeForFile)) return String.format("%016x", size + head + tail) } finally { fileChannel.close() } } def static computeHash(stream, length){ def chunkSizeForFile = (int) Math.min(HASH_CHUNK_SIZE, length) def chunkBytes = new byte[(int) Math.min(2 * HASH_CHUNK_SIZE, length)] def dis = new DataInputStream(stream) dis.readFully(chunkBytes, 0, chunkSizeForFile) def position = chunkSizeForFile def tailChunkPosition = length - chunkSizeForFile while (position < tailChunkPosition && (position += dis.skip(tailChunkPosition - position)) >= 0) dis.readFully(chunkBytes, chunkSizeForFile, chunkBytes.length - chunkSizeForFile) def head = computeHashForChunk(ByteBuffer.wrap(chunkBytes, 0, chunkSizeForFile)) def tail = computeHashForChunk(ByteBuffer.wrap(chunkBytes, chunkBytes.length - chunkSizeForFile, chunkSizeForFile)) return String.format("%016x", length + head + tail) } def static computeHashForChunk(buffer) { def longBuffer = buffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer() def hash = 0 while (longBuffer.hasRemaining()) { hash += longBuffer.get() } return hash } }
Bash
#!/bin/bash # Copyright (C) # 2014 - Tomasz Wisniewski dagon666 # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. correct_64bit() { local pow32=$(( 1 << 32 )) while [ "$g_lo" -ge $pow32 ]; do g_lo=$(( g_lo - pow32 )) g_hi=$(( g_hi + 1 )) done while [ "$g_hi" -ge $pow32 ]; do g_hi=$(( g_hi - pow32 )) done } hash_part() { local file="$1" local curr=0 local dsize=$((8192*8)) local bytes_at_once=2048 local groups=$(( (bytes_at_once / 8) - 1 )) local k=0 local i=0 local offset=0 declare -a num=() while [ "$curr" -lt "$dsize" ]; do num=( $(od -t u1 -An -N "$bytes_at_once" -w$bytes_at_once -j "$curr" "$file") ) for k in $(seq 0 $groups); do offset=$(( k * 8 )) g_lo=$(( g_lo + \ num[$(( offset + 0 ))] + \ (num[$(( offset + 1 ))] << 8) + \ (num[$(( offset + 2 ))] << 16) + \ (num[$(( offset + 3 ))] << 24) )) g_hi=$(( g_hi + \ num[$(( offset + 4 ))] + \ (num[$(( offset + 5 ))] << 8) + \ (num[$(( offset + 6 ))] << 16) + \ (num[$(( offset + 7 ))] << 24) )) correct_64bit done curr=$(( curr + bytes_at_once )) done } hash_file() { g_lo=0 g_hi=0 local file="$1" local size=$(stat -c%s "$file") local offset=$(( size - 65536 )) local part1=$(mktemp part1.XXXXXXXX) local part2=$(mktemp part2.XXXXXXXX) dd if="$file" bs=8192 count=8 of="$part1" 2> /dev/null dd if="$file" skip="$offset" bs=1 of="$part2" 2> /dev/null hash_part "$part1" hash_part "$part2" g_lo=$(( g_lo + size )) correct_64bit unlink "$part1" unlink "$part2" printf "%08x%08x\n" $g_hi $g_lo } hash_file "breakdance.avi" echo "8e245d9679d31e12 <- should be" hash_file "dummy.bin" echo "61f7751fc2a72bfb <- should be"
GO
https://github.com/oz/osdb/blob/6a89d7f831a6a3874260fe4677e546d551cad79d/osdb.go#L42
import ( "bytes" "encoding/binary" "fmt" "os" ) const ( ChunkSize = 65536 // 64k ) // Generate an OSDB hash for an *os.File. func HashFile(file *os.File) (hash uint64, err error) { fi, err := file.Stat() if err != nil { return } if fi.Size() < ChunkSize { return 0, fmt.Errorf("File is too small") } // Read head and tail blocks. buf := make([]byte, ChunkSize*2) err = readChunk(file, 0, buf[:ChunkSize]) if err != nil { return } err = readChunk(file, fi.Size()-ChunkSize, buf[ChunkSize:]) if err != nil { return } // Convert to uint64, and sum. var nums [(ChunkSize * 2) / 8]uint64 reader := bytes.NewReader(buf) err = binary.Read(reader, binary.LittleEndian, &nums) if err != nil { return 0, err } for _, num := range nums { hash += num } return hash + uint64(fi.Size()), nil } // Read a chunk of a file at `offset` so as to fill `buf`. func readChunk(file *os.File, offset int64, buf []byte) (err error) { n, err := file.ReadAt(buf, offset) if err != nil { return } if n != ChunkSize { return fmt.Errorf("Invalid read %v", n) } return }
Attachments (1)
-
GetHash.dll
(4.5 KB) -
added by guest 15 years ago.
GetHash?.dll
Download all attachments as: .zip