PK!ܔGwwdiff_pdf_visually/__init__.pyfrom typing import List __all__ = [] # type: List[str] from .diff import pdfdiff from .diff import pdftopng, imgdiff PK!zdiff_pdf_visually/__main__.pyimport argparse, sys from . import pdfdiff from .constants import DEFAULT_VERBOSITY, MAX_VERBOSITY def main(): description = """ Compare two PDFs visually. The exit code is 0 if they are the same, and 2 if there are significant differences. """.strip() parser = argparse.ArgumentParser(description=description) verbosity = DEFAULT_VERBOSITY def more_silent(): assert verbosity <= DEFAULT_VERBOSITY, "cannot be both silent and verbose" verbosity -= 1 def more_verbose(): assert verbosity >= DEFAULT_VERBOSITY, "cannot be both silent and verbose" verbosity += 1 parser.add_argument('a', metavar='a.pdf') parser.add_argument('b', metavar='b.pdf') parser.add_argument('--silent', '-q', action='count', default=0, help="silence output (can be used only once)") assert DEFAULT_VERBOSITY==1 parser.add_argument('--verbose', '-v', action='count', default=0, help="show more information (can be used {} times)".format( MAX_VERBOSITY - DEFAULT_VERBOSITY)) args = parser.parse_args() assert args.silent == 0 or args.verbose == 0, "cannot be silent and verbose" verbosity = DEFAULT_VERBOSITY + args.verbose - args.silent if pdfdiff(args.a, args.b, verbosity=verbosity): sys.exit(0) else: sys.exit(2) if __name__ == '__main__': main() PK!s?,,diff_pdf_visually/constants.pyINFINITY = float('inf') # Default threshold: lower means to ignore more. DEFAULT_THRESHOLD = 100 # Default verbosity. Zero means quiet. DEFAULT_VERBOSITY = 1 MAX_VERBOSITY = 3 # Resolution (in dots per inch) in which to render pages DEFAULT_DPI = 50 # Minimum verbosity for printing what the result is, and why VERB_PRINT_REASON=1 # Minimum verbosity for printing what the temporary directory is VERB_PRINT_TMPDIR=2 # Minimum verbosity for printing the significance for each page VERB_PERPAGE=2 # Minimum verbosity for printing commands VERB_PRINT_CMD=3 PK!1~~diff_pdf_visually/diff.py#!/usr/bin/env python3 """ Test if there is a significant difference between two PDFs using ImageMagick and pdftocairo. """ INFINITY = float('inf') import os.path, pathlib, subprocess, sys, tempfile, time from .constants import DEFAULT_THRESHOLD, DEFAULT_VERBOSITY, DEFAULT_DPI from .constants import VERB_PRINT_REASON, VERB_PRINT_TMPDIR from .constants import VERB_PERPAGE, VERB_PRINT_CMD def pdftopng(sourcepath, destdir, basename, verbosity, dpi): """ Invoke pdftocairo to convert the given PDF path to a PNG per page. Return a list of page numbers (as strings). """ if [] != list(destdir.glob(basename + '*')): raise ValueError("destdir not clean: " + repr(destdir)) verbose_run((verbosity > VERB_PRINT_CMD), [ 'pdftocairo', '-png', '-r', str(dpi), str(sourcepath), str(destdir / basename) ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, ) # list of strings with decimals numbers = sorted(path.name for path in destdir.glob(basename + '*' + '.png')) return [s[len(basename)+1:-4] for s in numbers] # returns a float, which can be inf def imgdiff(a, b, diff, log, print_cmds): assert a.is_file() assert b.is_file() assert not diff.exists() assert not log.exists() with log.open('wb') as f: cmdresult = verbose_run(print_cmds, [ 'compare', '-verbose', '-metric', 'PSNR', str(a), str(b), str(diff), ], stdout=f, stderr=subprocess.STDOUT, ) if cmdresult.returncode > 1: raise ValueError("compare crashed, status="+str(cmdresult.returncode)) with log.open('r') as f: lines = f.readlines() if any('image widths or heights differ' in l for l in lines): raise ValueError("image widths or heights differ") PREF=' all: ' all_line = [l for l in lines if l.startswith(PREF)] assert len(all_line) == 1 all_str = all_line[0][len(PREF):].strip() all_num = INFINITY if all_str == '0' else float(all_str) return all_num def pdfdiff(a, b, threshold=DEFAULT_THRESHOLD, verbosity=DEFAULT_VERBOSITY, dpi=DEFAULT_DPI, time_to_inspect=0): """Given two filenames, return whether the PDFs are sufficiently similar.""" assert os.path.isfile(a), "file {} must exist".format(a) assert os.path.isfile(b), "file {} must exist".format(b) with tempfile.TemporaryDirectory(prefix="diffpdf") as d: p = pathlib.Path(d) if verbosity >= VERB_PRINT_TMPDIR: print(" Temporary directory: {}".format(p)) # expand a a_i = pdftopng(a, p, "a", verbosity=verbosity, dpi=dpi) b_i = pdftopng(b, p, "b", verbosity=verbosity, dpi=dpi) if a_i != b_i: if verbosity >= VERB_PRINT_REASON: print("Different number of pages: {} vs {}", a_i, b_i) return False assert len(a_i) > 0 significances = [] for pageno in a_i: # remember pageno is a string pageapath = p / "a-{}.png".format(pageno) pagebpath = p / "b-{}.png".format(pageno) diffpath = p / "diff-{}.png".format(pageno) logpath = p / "log-{}.txt".format(pageno) s = imgdiff(pageapath, pagebpath, diffpath, logpath, (verbosity > VERB_PRINT_CMD)) if verbosity >= VERB_PERPAGE: print("- Page {}: significance={}".format(pageno, s)) significances.append(s) min_significance = min(significances, default=INFINITY) significant = (min_significance <= threshold) if verbosity >= VERB_PRINT_REASON: freetext = "different" if significant else "the same" print("Min sig = {}, significant?={}. The PDFs are {}.".format( min_significance, significant, freetext )) if time_to_inspect > 0: print( "Waiting for {} seconds before removing temporary directory..." .format(time_to_inspect), end='', flush=True ) time.sleep(time_to_inspect) print(" done.") return not significant def verbose_run(print_cmd, args, *restargs, **kw): if print_cmd: print(" Running: {}".format(' '.join(args)), file=sys.stderr) return subprocess.run(args, *restargs, **kw) PK!H+;E2diff_pdf_visually-1.1.0.dist-info/entry_points.txtN+I/N.,()JLK-HI-,.MɩEa"V񹉙yz PK!HW"TT'diff_pdf_visually-1.1.0.dist-info/WHEEL A н#J."jm)Afb~ ڡ5 G7hiޅF4+-3ڦ/̖?XPK!H% _ *diff_pdf_visually-1.1.0.dist-info/METADATAWrF}TcB6@8ŅHЬ4Z +i\^&rzF ^RbZMtӧ/2{Ӧ|WQ::;.ȶ]s|7'CI>9 "WZ=xrV_# Ek]GXKEsWJW9 N :B ].I!oJSـ5n6.oKu| e*R0-qDuzRg>H:a$YFź89[i"z+Zp1Kn07+"hyzPVGY]!(aoKOS:1ǞBl1oy]/5(#_SgDSC9₂` Qxq@8(P›ǀnicV7Q|٥ƆkQj-4g0+r ")2tѳx mpHع*p1{Zݫģh[Ul ΦTlޚr =2 AtsUcc=>^Kؠ#F;d[5wxmc'__BkT&?Р=qqQqaá^q+cN<]$ Ŵ/ N':Zz%v;zْH41WDciƮs&(C !Z 3Гumש .YAL YHE{ 3.tj<]U9EJAFzcjl4Wb#&I/WEntay&$Q~qۮ7HCl &rgHH1L( 9F *F3^vr:΀c?Y[v(.N韒x_cÁaH ?ZO8D۞bb@PK!HEg(diff_pdf_visually-1.1.0.dist-info/RECORD9o@>0-fY 6_(.Ҟy?2&,1*.7By?kAlo ;@*R4LbY%yVQQ@Od4dKt"Qb)ǍԢXTz迱f1^;#A60k\8anuw଀yD|5~ro̢<~19CFܤ"n I~:k^z6a0[U|"̪lHuO˥ ]v.QZgجYq/>.ƍtKsdUvGm#:'rʤ0ծͽ.K\@5m^PK!ܔGwwdiff_pdf_visually/__init__.pyPK!zdiff_pdf_visually/__main__.pyPK!s?,,xdiff_pdf_visually/constants.pyPK!1~~diff_pdf_visually/diff.pyPK!H+;E2diff_pdf_visually-1.1.0.dist-info/entry_points.txtPK!HW"TT' diff_pdf_visually-1.1.0.dist-info/WHEELPK!H% _ *diff_pdf_visually-1.1.0.dist-info/METADATAPK!HEg( "diff_pdf_visually-1.1.0.dist-info/RECORDPK#