Browse Source

Initial parsing script

master
Merlijn Wajer 5 years ago
commit
e9b1188d55
1 changed files with 98 additions and 0 deletions
  1. +98
    -0
      parse_scandata.py

+ 98
- 0
parse_scandata.py View File

@@ -0,0 +1,98 @@
#!/usr/bin/env python3.6

import math
import sys
import os
import xmltodict

#from pprint import pprint

from os.path import join, exists

from PIL import Image, ImageDraw

# find -iname \*.jp2 -exec opj_decompress -i {} -o {}.ppm \;

BASE_PATH = sys.argv[1]
ITEM_ID = sys.argv[2]
FILTER_PAGE_NR = None

if len(sys.argv) > 3:
# ex: 0020, or ANY
FILTER_PAGE_NR = sys.argv[3]

if len(sys.argv) > 4:
OUTPATH = sys.argv[4]

r = xmltodict.parse(open(join(BASE_PATH, ITEM_ID, ITEM_ID + '_scandata.xml'), 'rb'))

relevant = r['book']['pageData']['page']

#pprint(relevant)
#PIL.ImageDraw.Draw.polygon(xy, fill=None, outline=None)

for r in relevant:

# Only want 'Normal' pagetype, for now
pagetype = r['pageType']
if pagetype != 'Normal':
continue

orig_file_name = r['origFileName']
rotation = int(r['rotateDegree'])
skew = float(r['skewAngle'])
apply_skew = r['skewActive']
crop = r['cropBox']
crop_x, crop_y, crop_w, crop_h = int(crop['x']), int(crop['y']), int(crop['w']), int(crop['h'])

if FILTER_PAGE_NR and FILTER_PAGE_NR != 'ANY' and orig_file_name[0:4] != FILTER_PAGE_NR:
continue

#print(pagetype, orig_file_name, rotation, skew, apply_skew, crop_x, crop_y, crop_w, crop_h)

skew_r = math.radians(-skew)

if apply_skew:
top_l = (crop_x, crop_y)
top_r = (crop_x + crop_w * math.cos(skew_r), crop_y + crop_w * math.sin(skew_r))
bottom_l = (crop_x + crop_h * math.sin(skew_r), crop_y + crop_h * math.cos(skew_r))
bottom_r = (top_r[0], bottom_l[1] + crop_w * math.sin(skew_r))
else:
top_l = (crop_x, crop_y)
top_r = (crop_x + crop_w, crop_y)
bottom_l = (crop_x, crop_y + crop_h)
bottom_r = (top_r[0], bottom_l[1])

coords = [str(int(x)) for x in top_l + top_r + bottom_r + bottom_l]

data = [ITEM_ID + '_orig_' + orig_file_name.replace('.jpg', '.jp2.ppm')]
data.extend(coords)
print(','.join(data))

if OUTPATH:
outbase = join(OUTPATH, ITEM_ID)
if not exists(outbase):
os.makedirs(outbase)
i = Image.open(join(BASE_PATH + ITEM_ID, ITEM_ID + '_orig_jp2', ITEM_ID + '_orig_' + orig_file_name.replace('.jpg', '.jp2')))
#i = Image.open(join(BASE_PATH + ITEM_ID, ITEM_ID + '_orig_jp2', ITEM_ID + '_orig_' + orig_file_name.replace('.jpg', '.jp2.ppm')))
i = i.rotate(-rotation, expand=True)
iw, ih = i.size

p = join(outbase, orig_file_name).replace('.jpg', '.orig.jpg')
print(p)
isave=i.copy()
isave.thumbnail((iw/4, ih/4), Image.ANTIALIAS)
isave.save(p)
#i.save(p)

idraw = ImageDraw.Draw(i)
idraw.rectangle([0, 0, iw, ih], outline=0, fill=0)
idraw.polygon([top_l, top_r, bottom_r, bottom_l], outline=255, fill=255)

p = join(outbase, orig_file_name).replace('.jpg', '.coloured.jpg')
print(p)

isave=i.copy()
isave.thumbnail((iw/4, ih/4), Image.ANTIALIAS)
isave.save(p)
#i.save(p)

Loading…
Cancel
Save