PK!ܔGwwdiff_pdf_visually/__init__.pyfrom typing import List __all__ = [] # type: List[str] from .diff import pdfdiff from .diff import pdftopng, imgdiff PK!Ddiff_pdf_visually/__main__.pyimport argparse, sys from . import pdfdiff from .constants import DEFAULT_THRESHOLD, DEFAULT_VERBOSITY, DEFAULT_DPI from .constants import MAX_VERBOSITY def main(): description = """ Compare two PDFs visually. The exit code is 0 if they are the same, and 2 if there are significant differences. """.strip() parser = argparse.ArgumentParser(description=description) verbosity = DEFAULT_VERBOSITY def more_silent(): assert verbosity <= DEFAULT_VERBOSITY, "cannot be both silent and verbose" verbosity -= 1 def more_verbose(): assert verbosity >= DEFAULT_VERBOSITY, "cannot be both silent and verbose" verbosity += 1 parser.add_argument('a', metavar='a.pdf') parser.add_argument('b', metavar='b.pdf') parser.add_argument('--silent', '-q', action='count', default=0, help="silence output (can be used only once)") assert DEFAULT_VERBOSITY==1 parser.add_argument('--verbose', '-v', action='count', default=0, help="show more information (can be used {} times)".format( MAX_VERBOSITY - DEFAULT_VERBOSITY)) parser.add_argument('--threshold', default=DEFAULT_THRESHOLD, type=float, help="PSNR threshold to consider a change significant, " "higher is more sensitive (default: %(default)s)") parser.add_argument('--dpi', default=DEFAULT_DPI, type=int, help="resolution for the rasterised files (default: %(default)s)") parser.add_argument('--time', default=0, type=int, help="number of seconds to wait before discarding temporary files, " "or 0 to immediately discard (hint: use -v)") args = parser.parse_args() assert args.silent == 0 or args.verbose == 0, "cannot be silent and verbose" assert 1 <= args.dpi verbosity = DEFAULT_VERBOSITY + args.verbose - args.silent if pdfdiff(args.a, args.b, verbosity=verbosity, threshold=args.threshold, dpi=args.dpi, time_to_inspect=args.time): sys.exit(0) else: sys.exit(2) if __name__ == '__main__': main() PK!s?,,diff_pdf_visually/constants.pyINFINITY = float('inf') # Default threshold: lower means to ignore more. DEFAULT_THRESHOLD = 100 # Default verbosity. Zero means quiet. DEFAULT_VERBOSITY = 1 MAX_VERBOSITY = 3 # Resolution (in dots per inch) in which to render pages DEFAULT_DPI = 50 # Minimum verbosity for printing what the result is, and why VERB_PRINT_REASON=1 # Minimum verbosity for printing what the temporary directory is VERB_PRINT_TMPDIR=2 # Minimum verbosity for printing the significance for each page VERB_PERPAGE=2 # Minimum verbosity for printing commands VERB_PRINT_CMD=3 PK!1~~diff_pdf_visually/diff.py#!/usr/bin/env python3 """ Test if there is a significant difference between two PDFs using ImageMagick and pdftocairo. """ INFINITY = float('inf') import os.path, pathlib, subprocess, sys, tempfile, time from .constants import DEFAULT_THRESHOLD, DEFAULT_VERBOSITY, DEFAULT_DPI from .constants import VERB_PRINT_REASON, VERB_PRINT_TMPDIR from .constants import VERB_PERPAGE, VERB_PRINT_CMD def pdftopng(sourcepath, destdir, basename, verbosity, dpi): """ Invoke pdftocairo to convert the given PDF path to a PNG per page. Return a list of page numbers (as strings). """ if [] != list(destdir.glob(basename + '*')): raise ValueError("destdir not clean: " + repr(destdir)) verbose_run((verbosity > VERB_PRINT_CMD), [ 'pdftocairo', '-png', '-r', str(dpi), str(sourcepath), str(destdir / basename) ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, ) # list of strings with decimals numbers = sorted(path.name for path in destdir.glob(basename + '*' + '.png')) return [s[len(basename)+1:-4] for s in numbers] # returns a float, which can be inf def imgdiff(a, b, diff, log, print_cmds): assert a.is_file() assert b.is_file() assert not diff.exists() assert not log.exists() with log.open('wb') as f: cmdresult = verbose_run(print_cmds, [ 'compare', '-verbose', '-metric', 'PSNR', str(a), str(b), str(diff), ], stdout=f, stderr=subprocess.STDOUT, ) if cmdresult.returncode > 1: raise ValueError("compare crashed, status="+str(cmdresult.returncode)) with log.open('r') as f: lines = f.readlines() if any('image widths or heights differ' in l for l in lines): raise ValueError("image widths or heights differ") PREF=' all: ' all_line = [l for l in lines if l.startswith(PREF)] assert len(all_line) == 1 all_str = all_line[0][len(PREF):].strip() all_num = INFINITY if all_str == '0' else float(all_str) return all_num def pdfdiff(a, b, threshold=DEFAULT_THRESHOLD, verbosity=DEFAULT_VERBOSITY, dpi=DEFAULT_DPI, time_to_inspect=0): """Given two filenames, return whether the PDFs are sufficiently similar.""" assert os.path.isfile(a), "file {} must exist".format(a) assert os.path.isfile(b), "file {} must exist".format(b) with tempfile.TemporaryDirectory(prefix="diffpdf") as d: p = pathlib.Path(d) if verbosity >= VERB_PRINT_TMPDIR: print(" Temporary directory: {}".format(p)) # expand a a_i = pdftopng(a, p, "a", verbosity=verbosity, dpi=dpi) b_i = pdftopng(b, p, "b", verbosity=verbosity, dpi=dpi) if a_i != b_i: if verbosity >= VERB_PRINT_REASON: print("Different number of pages: {} vs {}", a_i, b_i) return False assert len(a_i) > 0 significances = [] for pageno in a_i: # remember pageno is a string pageapath = p / "a-{}.png".format(pageno) pagebpath = p / "b-{}.png".format(pageno) diffpath = p / "diff-{}.png".format(pageno) logpath = p / "log-{}.txt".format(pageno) s = imgdiff(pageapath, pagebpath, diffpath, logpath, (verbosity > VERB_PRINT_CMD)) if verbosity >= VERB_PERPAGE: print("- Page {}: significance={}".format(pageno, s)) significances.append(s) min_significance = min(significances, default=INFINITY) significant = (min_significance <= threshold) if verbosity >= VERB_PRINT_REASON: freetext = "different" if significant else "the same" print("Min sig = {}, significant?={}. The PDFs are {}.".format( min_significance, significant, freetext )) if time_to_inspect > 0: print( "Waiting for {} seconds before removing temporary directory..." .format(time_to_inspect), end='', flush=True ) time.sleep(time_to_inspect) print(" done.") return not significant def verbose_run(print_cmd, args, *restargs, **kw): if print_cmd: print(" Running: {}".format(' '.join(args)), file=sys.stderr) return subprocess.run(args, *restargs, **kw) PK!H+;E2diff_pdf_visually-1.2.1.dist-info/entry_points.txtN+I/N.,()JLK-HI-,.MɩEa"V񹉙yz PK!HW"TT'diff_pdf_visually-1.2.1.dist-info/WHEEL A н#J."jm)Afb~ ڡ5 G7hiޅF4+-3ڦ/̖?XPK!H*i=*diff_pdf_visually-1.2.1.dist-info/METADATAXrF}WL)[eYE$U$^+Ee*nm!0&0\H1دȏv֕Z>P/O^?*i'"{B/iW,ҕvA& =HlFD<N7]˭FxXWW RB;!!Xzsz> m\Rk#n9nKqRΗB5R[%MQy߹x\j_yftȉWw~|Z$9sd6l"i)C?}~eB';\U]Lqxu[g-)Y̌-{ ޓA^|v\0T% [!dbj&I*A8̗5\FXUe7 s"+IխW5Ɗ6P^~PNtQ(UP *$WUUVzct,0yh,CY GX_&FԘڍ(By1+sJK筂&xL]#THl>yT(,޸ d Gsx_DK<iJ92E{#ؖs˚Xt{$#fݜ3Q8Q /1L%w($B Xp*РiK=@ $rqѺhc0;OmElCr|7zjw] 2굹=ZsP,G$%+S`=ƾb&T"_5\Nr 2!δ) De[G4D pG -KRGa6{Ha=pvھ%h{jB&-+1kqLz0/. 1'u(hIQii _5"x-8 ;~W@oGTM|;}Ϭl*:-nԄ~l;(oz4w7lG+AuP$gϩpP)kqx0`bbY?s{lCZ@uQﬣ*^+r6Iw{{;)hΜG=5X}U':VskF.%Gz3}f/=:]0 6"^l#rZ 5G;c{%)|f'|O{v-)8Vp3YKMD  ri }><۞.4ҫz3naFz Ǔ]b'ìIu,vspeCA$#b5RjSH=GL1Ya)Ip6Lt0f%jf8 -$5`lp'UNnj3 Ir4{wqcoB3|c7ij\}|A/mjE%Gp37-ktE4ҁ4I\Ĝǐr>1N\O&F$PK!Hz?,(diff_pdf_visually-1.2.1.dist-info/RECORD9o@>epؗ"f31lhaLa=0rxORx欎SRY} +ۉ,Z_e gKx PK!ܔGwwdiff_pdf_visually/__init__.pyPK!Ddiff_pdf_visually/__main__.pyPK!s?,, diff_pdf_visually/constants.pyPK!1~~ diff_pdf_visually/diff.pyPK!H+;E2diff_pdf_visually-1.2.1.dist-info/entry_points.txtPK!HW"TT'+diff_pdf_visually-1.2.1.dist-info/WHEELPK!H*i=*diff_pdf_visually-1.2.1.dist-info/METADATAPK!Hz?,('diff_pdf_visually-1.2.1.dist-info/RECORDPK(