383 | | |
384 | | def hashFile(name): |
385 | | try: |
386 | | |
387 | | longlongformat = '<q' # little-endian long long |
388 | | bytesize = struct.calcsize(longlongformat) |
389 | | |
390 | | f = open(name, "rb") |
391 | | |
392 | | filesize = os.path.getsize(name) |
393 | | hash = filesize |
394 | | |
395 | | if filesize < 65536 * 2: |
396 | | return "SizeError" |
397 | | |
398 | | for x in range(65536/bytesize): |
399 | | buffer = f.read(bytesize) |
400 | | (l_value,)= struct.unpack(longlongformat, buffer) |
401 | | hash += l_value |
402 | | hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number |
403 | | |
404 | | |
405 | | f.seek(max(0,filesize-65536),0) |
406 | | for x in range(65536/bytesize): |
407 | | buffer = f.read(bytesize) |
408 | | (l_value,)= struct.unpack(longlongformat, buffer) |
409 | | hash += l_value |
410 | | hash = hash & 0xFFFFFFFFFFFFFFFF |
411 | | |
412 | | f.close() |
413 | | returnedhash = "%016x" % hash |
414 | | return returnedhash |
415 | | |
416 | | except(IOError): |
417 | | return "IOError" |
| 387 | import urllib |
| 388 | __64k = 65536 |
| 389 | __longlong_format_char = 'q' |
| 390 | __byte_size = struct.calcsize(__longlong_format_char) |
| 391 | |
| 392 | def temp_file(): |
| 393 | import tempfile |
| 394 | file = tempfile.NamedTemporaryFile() |
| 395 | filename = file.name |
| 396 | return filename |
| 397 | |
| 398 | def is_local(_str): |
| 399 | from urllib.parse import urlparse |
| 400 | if os.path.exists(_str): |
| 401 | return True |
| 402 | elif urlparse(_str).scheme in ['','file']: |
| 403 | return True |
| 404 | return False |
| 405 | |
| 406 | |
| 407 | def hashFile_url(filepath): |
| 408 | #https://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes |
| 409 | #filehash = filesize + 64bit sum of the first and last 64k of the file |
| 410 | name = filepath |
| 411 | if is_local(filepath): |
| 412 | local_file = True |
| 413 | else: |
| 414 | local_file = False |
| 415 | |
| 416 | if local_file == False: |
| 417 | from urllib import request |
| 418 | f = None |
| 419 | opener = None |
| 420 | url = name |
| 421 | request.urlcleanup() |
| 422 | |
| 423 | f = request.urlopen(url) |
| 424 | |
| 425 | filesize = int(f.headers['Content-Length']) |
| 426 | if filesize < __64k * 2: |
| 427 | try: filesize = int(str(f.headers['Content-Range']).split('/')[1]) |
| 428 | except: pass |
| 429 | |
| 430 | opener = request.build_opener() |
| 431 | bytes_range = ('bytes=0-%s') % (str(__64k)) |
| 432 | opener.addheaders = [('Range',bytes_range)] |
| 433 | |
| 434 | first_64kb = temp_file() |
| 435 | last_64kb = temp_file() |
| 436 | |
| 437 | request.install_opener(opener) |
| 438 | request.urlretrieve(url, first_64kb) |
| 439 | opener = request.build_opener() |
| 440 | |
| 441 | |
| 442 | if filesize > 0: |
| 443 | opener.addheaders = [('Range', 'bytes=%s-%s' % (filesize - __64k, filesize))] |
| 444 | else: |
| 445 | f.close() |
| 446 | os.remove(first_64kb) |
| 447 | return "SizeError" |
| 448 | |
| 449 | try: |
| 450 | request.install_opener(opener) |
| 451 | request.urlretrieve(url, last_64kb) |
| 452 | except: |
| 453 | f.close() |
| 454 | if os.path.exists(last_64kb): |
| 455 | os.remove(last_64kb) |
| 456 | os.remove(first_64kb) |
| 457 | return 'IOError' |
| 458 | f = open(first_64kb, 'rb') |
| 459 | |
| 460 | try: |
| 461 | |
| 462 | longlongformat = '<q' # little-endian long long |
| 463 | bytesize = struct.calcsize(longlongformat) |
| 464 | |
| 465 | if local_file: |
| 466 | f = open(name, "rb") |
| 467 | filesize = os.path.getsize(name) |
| 468 | hash = filesize |
| 469 | |
| 470 | if filesize < __64k * 2: |
| 471 | f.close() |
| 472 | if local_file == False: |
| 473 | os.remove(last_64kb) |
| 474 | os.remove(first_64kb) |
| 475 | return "SizeError" |
| 476 | |
| 477 | range_value = __64k / __byte_size |
| 478 | range_value = round(range_value) |
| 479 | |
| 480 | for x in range(range_value): |
| 481 | buffer = f.read(bytesize) |
| 482 | (l_value,)= struct.unpack(longlongformat, buffer) |
| 483 | hash += l_value |
| 484 | hash = hash & 0xFFFFFFFFFFFFFFFF #to remain as 64bit number |
| 485 | |
| 486 | if local_file: |
| 487 | f.seek(max(0,filesize-__64k),0) |
| 488 | else: |
| 489 | f.close() |
| 490 | f = open(last_64kb, 'rb') |
| 491 | for x in range(range_value): |
| 492 | buffer = f.read(bytesize) |
| 493 | (l_value,)= struct.unpack(longlongformat, buffer) |
| 494 | hash += l_value |
| 495 | hash = hash & 0xFFFFFFFFFFFFFFFF |
| 496 | |
| 497 | f.close() |
| 498 | if local_file == False: |
| 499 | os.remove(last_64kb) |
| 500 | os.remove(first_64kb) |
| 501 | returnedhash = "%016x" % hash |
| 502 | return returnedhash |
| 503 | |
| 504 | except(IOError): |
| 505 | if local_file == False: |
| 506 | os.remove(last_64kb) |
| 507 | os.remove(first_64kb) |
| 508 | return 'IOError' |
| 509 | |
| 510 | ''' |
| 511 | #example |
| 512 | import subs_file_hash |
| 513 | print(subs_file_hash.hashFile_url('https://static.opensubtitles.org/addons/avi/breakdance.avi') |
| 514 | 8e245d9679d31e12 |
| 515 | ''' |
| 516 | |