commit e9b1188d55451a0272e47b157d21bd48b3e144dd Author: Merlijn Wajer Date: Sat Apr 25 13:05:35 2020 +0200 Initial parsing script diff --git a/parse_scandata.py b/parse_scandata.py new file mode 100644 index 0000000..ac64c1a --- /dev/null +++ b/parse_scandata.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3.6 + +import math +import sys +import os +import xmltodict + +#from pprint import pprint + +from os.path import join, exists + +from PIL import Image, ImageDraw + +# find -iname \*.jp2 -exec opj_decompress -i {} -o {}.ppm \; + +BASE_PATH = sys.argv[1] +ITEM_ID = sys.argv[2] +FILTER_PAGE_NR = None + +if len(sys.argv) > 3: + # ex: 0020, or ANY + FILTER_PAGE_NR = sys.argv[3] + +if len(sys.argv) > 4: + OUTPATH = sys.argv[4] + +r = xmltodict.parse(open(join(BASE_PATH, ITEM_ID, ITEM_ID + '_scandata.xml'), 'rb')) + +relevant = r['book']['pageData']['page'] + +#pprint(relevant) +#PIL.ImageDraw.Draw.polygon(xy, fill=None, outline=None) + +for r in relevant: + + # Only want 'Normal' pagetype, for now + pagetype = r['pageType'] + if pagetype != 'Normal': + continue + + orig_file_name = r['origFileName'] + rotation = int(r['rotateDegree']) + skew = float(r['skewAngle']) + apply_skew = r['skewActive'] + crop = r['cropBox'] + crop_x, crop_y, crop_w, crop_h = int(crop['x']), int(crop['y']), int(crop['w']), int(crop['h']) + + if FILTER_PAGE_NR and FILTER_PAGE_NR != 'ANY' and orig_file_name[0:4] != FILTER_PAGE_NR: + continue + + #print(pagetype, orig_file_name, rotation, skew, apply_skew, crop_x, crop_y, crop_w, crop_h) + + skew_r = math.radians(-skew) + + if apply_skew: + top_l = (crop_x, crop_y) + top_r = (crop_x + crop_w * math.cos(skew_r), crop_y + crop_w * math.sin(skew_r)) + bottom_l = (crop_x + crop_h * math.sin(skew_r), crop_y + crop_h * math.cos(skew_r)) + bottom_r = (top_r[0], bottom_l[1] + crop_w * math.sin(skew_r)) + else: + top_l = (crop_x, crop_y) + top_r = (crop_x + crop_w, crop_y) + bottom_l = (crop_x, crop_y + crop_h) + bottom_r = (top_r[0], bottom_l[1]) + + coords = [str(int(x)) for x in top_l + top_r + bottom_r + bottom_l] + + data = [ITEM_ID + '_orig_' + orig_file_name.replace('.jpg', '.jp2.ppm')] + data.extend(coords) + print(','.join(data)) + + if OUTPATH: + outbase = join(OUTPATH, ITEM_ID) + if not exists(outbase): + os.makedirs(outbase) + i = Image.open(join(BASE_PATH + ITEM_ID, ITEM_ID + '_orig_jp2', ITEM_ID + '_orig_' + orig_file_name.replace('.jpg', '.jp2'))) + #i = Image.open(join(BASE_PATH + ITEM_ID, ITEM_ID + '_orig_jp2', ITEM_ID + '_orig_' + orig_file_name.replace('.jpg', '.jp2.ppm'))) + i = i.rotate(-rotation, expand=True) + iw, ih = i.size + + p = join(outbase, orig_file_name).replace('.jpg', '.orig.jpg') + print(p) + isave=i.copy() + isave.thumbnail((iw/4, ih/4), Image.ANTIALIAS) + isave.save(p) + #i.save(p) + + idraw = ImageDraw.Draw(i) + idraw.rectangle([0, 0, iw, ih], outline=0, fill=0) + idraw.polygon([top_l, top_r, bottom_r, bottom_l], outline=255, fill=255) + + p = join(outbase, orig_file_name).replace('.jpg', '.coloured.jpg') + print(p) + + isave=i.copy() + isave.thumbnail((iw/4, ih/4), Image.ANTIALIAS) + isave.save(p) + #i.save(p)