Version 1 (modified by os, 16 years ago) (diff) |
---|
Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit chksum of the first and last 64k (even if they overlap because the file is smaller than 128k).
Feel free to edit/add source-codes if you have faster/better implementation. Also don't forget to check, if hash is right for test. Test these 2 files please to ensure your algo is completely OK:
- AVI file (12 909 756 bytes)
- hash: 8e245d9679d31e12
- DUMMY RAR file (2 565 922 bytes, 4 295 033 890 after RAR unpacking)
- hash: 61f7751fc2a72bfb
C
#include <stdio.h> #include <stdlib.h> #define MAX(x,y) (((x) > (y)) ? (x) : (y)) #ifndef uint64_t #define uint64_t unsigned long long #endif uint64_t compute_hash(FILE * handle) { uint64_t hash, fsize; fseek(handle, 0, SEEK_END); fsize = ftell(handle); fseek(handle, 0, SEEK_SET); hash = fsize; for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++); fseek(handle, (long)MAX(0, fsize - 65536), SEEK_SET); for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && fread((char*)&tmp, sizeof(tmp), 1, handle); hash += tmp, i++); return hash; } int main(int argc, char *argv) { FILE * handle; uint64_t myhash; handle = fopen("breakdance.avi", "rb"); if (!handle) { printf("Error openning file!"); return 1; } myhash = compute_hash(handle); printf("%I64x", myhash); fclose(handle); return 0; }
C++
#include <iostream> #include <fstream> typedef unsigned __int64 uint64_t; using namespace std; int MAX(int x, int y) { if((x) > (y)) return x; else return y; } uint64_t compute_hash(ifstream& f) { uint64_t hash, fsize; f.seekg(0, ios::end); fsize = f.tellg(); f.seekg(0, ios::beg); hash = fsize; for(uint64_t tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp); f.seekg(MAX(0, (uint64_t)fsize - 65536), ios::beg); for(tmp = 0, i = 0; i < 65536/sizeof(tmp) && f.read((char*)&tmp, sizeof(tmp)); i++, hash += tmp); return hash; } int main(int argc, char *argv) { ifstream f; uint64_t myhash; f.open("c:\\test.avi", ios::in|ios::binary|ios::ate); if (!f.is_open()) { cerr << "Error opening file" << endl; return 1; } myhash = compute_hash(f); cout << setw(16) << setfill('0') << hex << myhash; f.close(); return 0; }
Java
import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.nio.ByteOrder; import java.nio.LongBuffer; import java.nio.MappedByteBuffer; import java.nio.channels.FileChannel; import java.nio.channels.FileChannel.MapMode; /** * Hash code is based on Media Player Classic. In natural language it calculates: size + 64bit * checksum of the first and last 64k (even if they overlap because the file is smaller than * 128k). */ public class OpenSubtitlesHasher { /** * Size of the chunks that will be hashed in bytes (64 KB) */ private static final int HASH_CHUNK_SIZE = 64 * 1024; public static String computeHash(File file) throws IOException { long size = file.length(); long chunkSizeForFile = Math.min(HASH_CHUNK_SIZE, size); FileChannel fileChannel = new FileInputStream(file).getChannel(); long head = computeHashForChunk(fileChannel, 0, chunkSizeForFile); long tail = computeHashForChunk(fileChannel, Math.max(size - HASH_CHUNK_SIZE, 0), chunkSizeForFile); fileChannel.close(); return String.format("%016x", size + head + tail); } private static long computeHashForChunk(FileChannel fileChannel, long start, long size) throws IOException { MappedByteBuffer byteBuffer = fileChannel.map(MapMode.READ_ONLY, start, size); LongBuffer longBuffer = byteBuffer.order(ByteOrder.LITTLE_ENDIAN).asLongBuffer(); long hash = 0; while (longBuffer.hasRemaining()) { hash += longBuffer.get(); } return hash; } }
C#
using System; using System.Text; using System.IO; namespace MovieHasher { class Program { private static byte[] ComputeMovieHash(string filename) { byte[] result; using (Stream input = File.OpenRead(filename)) { result = ComputeMovieHash(input); } return result; } private static byte[] ComputeMovieHash(Stream input) { long lhash, streamsize; streamsize = input.Length; lhash = streamsize; long i = 0; byte[] buffer = new byte[sizeof(long)]; while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0)) { i++; lhash += BitConverter.ToInt64(buffer, 0); } input.Position = Math.Max(0, streamsize - 65536); i = 0; while (i < 65536 / sizeof(long) && (input.Read(buffer, 0, sizeof(long)) > 0)) { i++; lhash += BitConverter.ToInt64(buffer, 0); } input.Close(); byte[] result = BitConverter.GetBytes(lhash); Array.Reverse(result); return result; } private static string ToHexadecimal(byte[] bytes) { StringBuilder hexBuilder = new StringBuilder(); for(int i = 0; i < bytes.Length; i++) { hexBuilder.Append(bytes[i].ToString("x2")); } return hexBuilder.ToString(); } static void Main(string[] args) { byte[] moviehash = ComputeMovieHash(@"C:\test.avi"); Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash)); } } }
VB.Net
Imports System Imports System.Text Imports System.IO 'Note: you must remove integer overflow checking. Namespace MovieHasher Class Program Private Shared Function ComputeMovieHash(ByVal filename As String) As Byte() Dim result As Byte() Using input As Stream = File.OpenRead(filename) result = ComputeMovieHash(input) End Using Return result End Function Private Function ComputeMovieHash(ByVal input As Stream) As Byte() Dim lhash As System.Int64, streamsize As Long streamsize = input.Length lhash = streamsize Dim i As Long = 0 Dim buffer As Byte() = New Byte(Marshal.SizeOf(GetType(Long)) - 1) {} While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0) i += 1 lhash += BitConverter.ToInt64(buffer, 0) End While input.Position = Math.Max(0, streamsize - 65536) i = 0 While i < 65536 / Marshal.SizeOf(GetType(Long)) AndAlso (input.Read(buffer, 0, Marshal.SizeOf(GetType(Long))) > 0) i += 1 lhash += BitConverter.ToInt64(buffer, 0) End While input.Close() Dim result As Byte() = BitConverter.GetBytes(lhash) Array.Reverse(result) Return result End Function Private Shared Function ToHexadecimal(ByVal bytes As Byte()) As String Dim hexBuilder As New StringBuilder() For i As Integer = 0 To bytes.Length - 1 hexBuilder.Append(bytes(i).ToString("x2")) Next Return hexBuilder.ToString() End Function Private Shared Sub Main(ByVal args As String()) Dim moviehash As Byte() = ComputeMovieHash("C:\test.avi") Console.WriteLine("The hash of the movie-file is: {0}", ToHexadecimal(moviehash)) End Sub End Class End Namespace
Python
def hashFile(name): try: longlongformat = 'q' # long long bytesize = struct.calcsize(longlongformat) f = file(name, "rb") filesize = os.path.getsize(name) hash = filesize if filesize < 65536 * 2: return "SizeError" for x in range(65536/bytesize): buffer = f.read(bytesize) (l_value,)= struct.unpack(longlongformat, buffer) hash += l_value hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number f.seek(max(0,filesize-65536),0) for x in range(65536/bytesize): buffer = f.read(bytesize) (l_value,)= struct.unpack(longlongformat, buffer) hash += l_value hash = hash & 0xFFFFFFFFFFFFFFFF f.close() returnedhash = "%016x" % hash return returnedhash except(IOError): return "IOError"
Delphi
This is just a quick conversion of Gabest's original C code. Anyone who can come up with a cleaner code, please feel free to do so and post here.
function CalcGabestHash(const Filename: string): string; var i: Integer; f: File; s: Array[1..8] of char; tmp: Int64 absolute s; hash: Int64; readed: Integer; OldFM: Byte; begin Result := ''; if FileExists(Filename) then begin { Open file } OldFM := FileMode; try FileMode := fmShareDenyNone; AssignFile(f,Filename); Reset(f,1); try { Start hashing } hash := filesize(f); { Read from begining } for i := 0 to 8191 do begin blockread(f,s,sizeof(s),readed); if readed > 0 then hash := hash + tmp else break; end; { Read from the end } seek(f,Max(0,filesize(f)-65536)); for i := 0 to 8191 do begin blockread(f,s,sizeof(s),readed); if readed > 0 then hash := hash + tmp else break; end; { Finished } Result := Format('%.16x',[hash]); finally CloseFile(f); end; finally FileMode := OldFM; end; end; end;
RealBasic
Combined routine that will calculate a fast hash for videofiles over 65K and a normal md5 for subtitles
dim b as BinaryStream dim mb as MemoryBlock dim hash,bytesize as UINT64 dim i, x, chunksize, filelen, difference as integer hash = 0 //Reset Hash difference = 0 if f <> nil and f.Exists then b= f.OpenAsBinaryFile hash = b.Length bytesize = b.Length bytesizestr = str(bytesize) if bytesize >= 65536 and routine = "video" then chunksize = 65536 mb = b.Read(65536) mb.LittleEndian = True for i= 0 to chunksize -1 step 8 hash = hash+ mb.UINT64Value(i) next b.Position = max(b.Length-chunksize, 0) mb= b.Read(chunksize) mb.LittleEndian = True for i= 0 to chunksize -1 step 8 hash = hash+ mb.UINT64Value(i) next myhash = Lowercase(str(hex(hash))) elseif routine = "subtitle" then dim c,result as string mb = md5(b.Read(b.Length)) mb.LittleEndian = True for i = 0 to mb.size-1 x = mb.byte( i ) c = right( "00"+hex( x ), 2 ) result = result + c next result = lowercase( result ) myhash = result end
PHP 4/5
function OpenSubtitlesHash($file) { $handle = fopen($file, "rb"); $fsize = filesize($file); $hash = array(3 => 0, 2 => 0, 1 => ($fsize >> 16) & 0xFFFF, 0 => $fsize & 0xFFFF); for ($i = 0; $i < 8192; $i++) { $tmp = ReadUINT64($handle); $hash = AddUINT64($hash, $tmp); } $offset = $fsize - 65536; fseek($handle, $offset > 0 ? $offset : 0, SEEK_SET); for ($i = 0; $i < 8192; $i++) { $tmp = ReadUINT64($handle); $hash = AddUINT64($hash, $tmp); } fclose($handle); return UINT64FormatHex($hash); } function ReadUINT64($handle) { $u = unpack("va/vb/vc/vd", fread($handle, 8)); return array(0 => $u["a"], 1 => $u["b"], 2 => $u["c"], 3 => $u["d"]); } function AddUINT64($a, $b) { $o = array(0 => 0, 1 => 0, 2 => 0, 3 => 0); $carry = 0; for ($i = 0; $i < 4; $i++) { if (($a[$i] + $b[$i] + $carry) > 0xffff ) { $o[$i] += ($a[$i] + $b[$i] + $carry) & 0xffff; $carry = 1; } else { $o[$i] += ($a[$i] + $b[$i] + $carry); $carry = 0; } } return $o; } function UINT64FormatHex($n) { return sprintf("%04x%04x%04x%04x", $n[3], $n[2], $n[1], $n[0]); }
Perl
#!/usr/bin/perl use strict; use warnings; print OpenSubtitlesHash('breakdance.avi'); sub OpenSubtitlesHash { my $filename = shift or die("Need video filename"); open my $handle, "<", $filename or die $!; my $fsize = -s $filename; my $hash = [$fsize & 0xFFFF, ($fsize >> 16) & 0xFFFF, 0, 0]; $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192); my $offset = $fsize - 65536; seek($handle, $offset > 0 ? $offset : 0, 0) or die $!; $hash = AddUINT64($hash, ReadUINT64($handle)) for (1..8192); close $handle or die $!; return UINT64FormatHex($hash); } sub ReadUINT64 { read($_[0], my $u, 8); return [unpack("vvvv", $u)]; } sub AddUINT64 { my $o = [0,0,0,0]; my $carry = 0; for my $i (0..3) { if (($_[0]->[$i] + $_[1]->[$i] + $carry) > 0xffff ) { $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry) & 0xffff; $carry = 1; } else { $o->[$i] += ($_[0]->[$i] + $_[1]->[$i] + $carry); $carry = 0; } } return $o; } sub UINT64FormatHex { return sprintf("%04x%04x%04x%04x", $_[0]->[3], $_[0]->[2], $_[0]->[1], $_[0]->[0]); }
Ruby
This is a quick translation/transliteration of the Perl script.
class Hasher def open_subtitles_hash(filename) raise "Need video filename" unless filename fh = File.open(filename) fsize = File.size(filename) hash = [fsize & 0xffff, (fsize >> 16) & 0xffff, 0, 0] 8192.times { hash = add_unit_64(hash, read_uint_64(fh)) } offset = fsize - 65536 fh.seek([0,offset].max, 0) 8192.times { hash = add_unit_64(hash, read_uint_64(fh)) } fh.close return uint_64_format_hex(hash) end def read_uint_64(stream) stream.read(8).unpack("vvvv") end def add_unit_64(hash, input) res = [0,0,0,0] carry = 0 hash.zip(input).each_with_index do |(h,i),n| sum = h + i + carry if sum > 0xffff res[n] += sum & 0xffff carry = 1 else res[n] += sum carry = 0 end end return res end def uint_64_format_hex(hash) sprintf("%04x%04x%04x%04x", *hash.reverse) end end if __FILE__ == $0 require 'test/unit' class HashTester < Test::Unit::TestCase def setup @h = Hasher.new end def test_test_file_hash assert_equal("8e245d9679d31e12", @h.open_subtitles_hash('breakdance.avi')) end end end
Another more "rubyesque" implementation.
class MovieHasher HASH_CHUNK_SIZE = 64 * 1024 # in bytes HASH_SIZE = 8 # in bytes def self.compute_hash(filename) file = File.open(filename, "rb") filesize = File.size(filename) hash = filesize # Read 64 kbytes, divide up into 64 bits and add each # to hash. Do for beginning and end of file. # Q = unsigned long long (64 bit = HASH_SIZE) file.read(HASH_CHUNK_SIZE).unpack("Q*").each { |n| hash += n } file.seek([0, filesize - HASH_CHUNK_SIZE].max, IO::SEEK_SET) file.read(HASH_CHUNK_SIZE).unpack("Q*").each { |n| hash += n } file.close hash &= 0xffffffffffffffff # keep the first 8 bytes / 64 bits sprintf("%0" + (HASH_SIZE * 2).to_s + "x", hash) end end if __FILE__ == $0 require 'test/unit' class MovieHasherTest < Test::Unit::TestCase def test_compute_hash assert_equal("8e245d9679d31e12", MovieHasher::compute_hash('breakdance.avi')) end end end
Attachments (1)
-
GetHash.dll
(4.5 KB) -
added by guest 15 years ago.
GetHash?.dll
Download all attachments as: .zip