PK!ܔGwwdiff_pdf_visually/__init__.pyfrom typing import List __all__ = [] # type: List[str] from .diff import pdfdiff from .diff import pdftopng, imgdiff PK!Ddiff_pdf_visually/__main__.pyimport argparse, sys from . import pdfdiff from .constants import DEFAULT_THRESHOLD, DEFAULT_VERBOSITY, DEFAULT_DPI from .constants import MAX_VERBOSITY def main(): description = """ Compare two PDFs visually. The exit code is 0 if they are the same, and 2 if there are significant differences. """.strip() parser = argparse.ArgumentParser(description=description) verbosity = DEFAULT_VERBOSITY def more_silent(): assert verbosity <= DEFAULT_VERBOSITY, "cannot be both silent and verbose" verbosity -= 1 def more_verbose(): assert verbosity >= DEFAULT_VERBOSITY, "cannot be both silent and verbose" verbosity += 1 parser.add_argument('a', metavar='a.pdf') parser.add_argument('b', metavar='b.pdf') parser.add_argument('--silent', '-q', action='count', default=0, help="silence output (can be used only once)") assert DEFAULT_VERBOSITY==1 parser.add_argument('--verbose', '-v', action='count', default=0, help="show more information (can be used {} times)".format( MAX_VERBOSITY - DEFAULT_VERBOSITY)) parser.add_argument('--threshold', default=DEFAULT_THRESHOLD, type=float, help="PSNR threshold to consider a change significant, " "higher is more sensitive (default: %(default)s)") parser.add_argument('--dpi', default=DEFAULT_DPI, type=int, help="resolution for the rasterised files (default: %(default)s)") parser.add_argument('--time', default=0, type=int, help="number of seconds to wait before discarding temporary files, " "or 0 to immediately discard (hint: use -v)") args = parser.parse_args() assert args.silent == 0 or args.verbose == 0, "cannot be silent and verbose" assert 1 <= args.dpi verbosity = DEFAULT_VERBOSITY + args.verbose - args.silent if pdfdiff(args.a, args.b, verbosity=verbosity, threshold=args.threshold, dpi=args.dpi, time_to_inspect=args.time): sys.exit(0) else: sys.exit(2) if __name__ == '__main__': main() PK!s?,,diff_pdf_visually/constants.pyINFINITY = float('inf') # Default threshold: lower means to ignore more. DEFAULT_THRESHOLD = 100 # Default verbosity. Zero means quiet. DEFAULT_VERBOSITY = 1 MAX_VERBOSITY = 3 # Resolution (in dots per inch) in which to render pages DEFAULT_DPI = 50 # Minimum verbosity for printing what the result is, and why VERB_PRINT_REASON=1 # Minimum verbosity for printing what the temporary directory is VERB_PRINT_TMPDIR=2 # Minimum verbosity for printing the significance for each page VERB_PERPAGE=2 # Minimum verbosity for printing commands VERB_PRINT_CMD=3 PK!1~~diff_pdf_visually/diff.py#!/usr/bin/env python3 """ Test if there is a significant difference between two PDFs using ImageMagick and pdftocairo. """ INFINITY = float('inf') import os.path, pathlib, subprocess, sys, tempfile, time from .constants import DEFAULT_THRESHOLD, DEFAULT_VERBOSITY, DEFAULT_DPI from .constants import VERB_PRINT_REASON, VERB_PRINT_TMPDIR from .constants import VERB_PERPAGE, VERB_PRINT_CMD def pdftopng(sourcepath, destdir, basename, verbosity, dpi): """ Invoke pdftocairo to convert the given PDF path to a PNG per page. Return a list of page numbers (as strings). """ if [] != list(destdir.glob(basename + '*')): raise ValueError("destdir not clean: " + repr(destdir)) verbose_run((verbosity > VERB_PRINT_CMD), [ 'pdftocairo', '-png', '-r', str(dpi), str(sourcepath), str(destdir / basename) ], stdout=subprocess.PIPE, stderr=subprocess.PIPE, check=True, ) # list of strings with decimals numbers = sorted(path.name for path in destdir.glob(basename + '*' + '.png')) return [s[len(basename)+1:-4] for s in numbers] # returns a float, which can be inf def imgdiff(a, b, diff, log, print_cmds): assert a.is_file() assert b.is_file() assert not diff.exists() assert not log.exists() with log.open('wb') as f: cmdresult = verbose_run(print_cmds, [ 'compare', '-verbose', '-metric', 'PSNR', str(a), str(b), str(diff), ], stdout=f, stderr=subprocess.STDOUT, ) if cmdresult.returncode > 1: raise ValueError("compare crashed, status="+str(cmdresult.returncode)) with log.open('r') as f: lines = f.readlines() if any('image widths or heights differ' in l for l in lines): raise ValueError("image widths or heights differ") PREF=' all: ' all_line = [l for l in lines if l.startswith(PREF)] assert len(all_line) == 1 all_str = all_line[0][len(PREF):].strip() all_num = INFINITY if all_str == '0' else float(all_str) return all_num def pdfdiff(a, b, threshold=DEFAULT_THRESHOLD, verbosity=DEFAULT_VERBOSITY, dpi=DEFAULT_DPI, time_to_inspect=0): """Given two filenames, return whether the PDFs are sufficiently similar.""" assert os.path.isfile(a), "file {} must exist".format(a) assert os.path.isfile(b), "file {} must exist".format(b) with tempfile.TemporaryDirectory(prefix="diffpdf") as d: p = pathlib.Path(d) if verbosity >= VERB_PRINT_TMPDIR: print(" Temporary directory: {}".format(p)) # expand a a_i = pdftopng(a, p, "a", verbosity=verbosity, dpi=dpi) b_i = pdftopng(b, p, "b", verbosity=verbosity, dpi=dpi) if a_i != b_i: if verbosity >= VERB_PRINT_REASON: print("Different number of pages: {} vs {}", a_i, b_i) return False assert len(a_i) > 0 significances = [] for pageno in a_i: # remember pageno is a string pageapath = p / "a-{}.png".format(pageno) pagebpath = p / "b-{}.png".format(pageno) diffpath = p / "diff-{}.png".format(pageno) logpath = p / "log-{}.txt".format(pageno) s = imgdiff(pageapath, pagebpath, diffpath, logpath, (verbosity > VERB_PRINT_CMD)) if verbosity >= VERB_PERPAGE: print("- Page {}: significance={}".format(pageno, s)) significances.append(s) min_significance = min(significances, default=INFINITY) significant = (min_significance <= threshold) if verbosity >= VERB_PRINT_REASON: freetext = "different" if significant else "the same" print("Min sig = {}, significant?={}. The PDFs are {}.".format( min_significance, significant, freetext )) if time_to_inspect > 0: print( "Waiting for {} seconds before removing temporary directory..." .format(time_to_inspect), end='', flush=True ) time.sleep(time_to_inspect) print(" done.") return not significant def verbose_run(print_cmd, args, *restargs, **kw): if print_cmd: print(" Running: {}".format(' '.join(args)), file=sys.stderr) return subprocess.run(args, *restargs, **kw) PK!H+;E2diff_pdf_visually-1.2.0.dist-info/entry_points.txtN+I/N.,()JLK-HI-,.MɩEa"V񹉙yz PK!HW"TT'diff_pdf_visually-1.2.0.dist-info/WHEEL A н#J."jm)Afb~ ڡ5 G7hiޅF4+-3ڦ/̖?XPK!H6 _ *diff_pdf_visually-1.2.0.dist-info/METADATAWrF}TlB6@8ŅHЬ4Z +i\^&rzF ^RbZMtӧ/2{Ӧn~_Q::;.ȶ]sn~Ov=}rZEz y<eiG^׺Зʯɯ =&BJol{_ 4 QL:ѠctQe:&eԘM8CeD C#J6T*֥`REG[tcxx}t"Iu)qsީUF-EW66[c`SoVEa!5TBP0Tt.c=Dc,oa^k,PF0N/ψ: Rr1\ tqP<Յ7+mÏC{{Ƭ2o2, h K עԺ[h `VDR d*&gdYn3$ñsUcWGVIfKM:$?ϱRټ5z8De"#z}L7;:Aq?=-F wn&tk O%>רM>A{<,l%$k)C10V.x|#ĻH*i_Nt>dB'K\)펋w9%G%hbN(Bӌ]LPC-Cf'S\$:\)7@`Es7j#g5]x$rȃjƴ$)بiXAKGL^4߿MHУ,|.]oFxreG]b <8tǖأlK3]$ qW 8. ۇe@0@O!|q̦:eԕTtYcq Ko4P~LϮ&tx42JQ<XJKJQK'uk\N_͠ Qtnc"y5T8ݨD51)g'QuN.UҺdʦ9mYWdaKI/}b?Mtcڙ* 9QstUdg@|uN͝~@糖|.Q\?%6>Ɔ‘D~O8D۞bb@PK!H Z(diff_pdf_visually-1.2.0.dist-info/RECORDйr@2K @$]XHmh~ʁ<5NonZ9j}NwPY#BPCpGi^HbƋ,Һ[ $)m0LTۆbY%]AIY֦ LH`Gq#!m=-}Ty] `8VW[ScR,{o1ǽPȐG(s[aގ(AWޭJ}j, b*8e[aRH+;ivmMZ#]ycOP۔y|b{̣%5>tN5% 4!mW_mWt^KA&a>U(8ϗs$/b|P2nË$G;'3Y\/gήnM"(/PK!ܔGwwdiff_pdf_visually/__init__.pyPK!Ddiff_pdf_visually/__main__.pyPK!s?,, diff_pdf_visually/constants.pyPK!1~~ diff_pdf_visually/diff.pyPK!H+;E2diff_pdf_visually-1.2.0.dist-info/entry_points.txtPK!HW"TT'+diff_pdf_visually-1.2.0.dist-info/WHEELPK!H6 _ *diff_pdf_visually-1.2.0.dist-info/METADATAPK!H Z(%diff_pdf_visually-1.2.0.dist-info/RECORDPK&