You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

99 lines
3.0KB

  1. #!/usr/bin/env python3.6
  2. import math
  3. import sys
  4. import os
  5. import xmltodict
  6. #from pprint import pprint
  7. from os.path import join, exists
  8. from PIL import Image, ImageDraw
  9. # find -iname \*.jp2 -exec opj_decompress -i {} -o {}.ppm \;
  10. BASE_PATH = sys.argv[1]
  11. ITEM_ID = sys.argv[2]
  12. FILTER_PAGE_NR = None
  13. if len(sys.argv) > 3:
  14. # ex: 0020, or ANY
  15. FILTER_PAGE_NR = sys.argv[3]
  16. if len(sys.argv) > 4:
  17. OUTPATH = sys.argv[4]
  18. r = xmltodict.parse(open(join(BASE_PATH, ITEM_ID, ITEM_ID + '_scandata.xml'), 'rb'))
  19. relevant = r['book']['pageData']['page']
  20. #pprint(relevant)
  21. #PIL.ImageDraw.Draw.polygon(xy, fill=None, outline=None)
  22. for r in relevant:
  23. # Only want 'Normal' pagetype, for now
  24. pagetype = r['pageType']
  25. if pagetype != 'Normal':
  26. continue
  27. orig_file_name = r['origFileName']
  28. rotation = int(r['rotateDegree'])
  29. skew = float(r['skewAngle'])
  30. apply_skew = r['skewActive']
  31. crop = r['cropBox']
  32. crop_x, crop_y, crop_w, crop_h = int(crop['x']), int(crop['y']), int(crop['w']), int(crop['h'])
  33. if FILTER_PAGE_NR and FILTER_PAGE_NR != 'ANY' and orig_file_name[0:4] != FILTER_PAGE_NR:
  34. continue
  35. #print(pagetype, orig_file_name, rotation, skew, apply_skew, crop_x, crop_y, crop_w, crop_h)
  36. skew_r = math.radians(skew)
  37. if apply_skew:
  38. top_l = (crop_x, crop_y)
  39. top_r = (crop_x + crop_w * math.cos(skew_r), crop_y - crop_w * math.sin(skew_r))
  40. bottom_l = (crop_x + crop_h * math.sin(skew_r), crop_y + crop_h * math.cos(skew_r))
  41. bottom_r = (top_r[0], bottom_l[1] - crop_w * math.sin(skew_r))
  42. else:
  43. top_l = (crop_x, crop_y)
  44. top_r = (crop_x + crop_w, crop_y)
  45. bottom_l = (crop_x, crop_y + crop_h)
  46. bottom_r = (top_r[0], bottom_l[1])
  47. coords = [str(int(x)) for x in top_l + top_r + bottom_r + bottom_l]
  48. data = [ITEM_ID + '_orig_' + orig_file_name.replace('.jpg', '.jp2.ppm')]
  49. data.extend(coords)
  50. print(','.join(data))
  51. if OUTPATH:
  52. outbase = join(OUTPATH, ITEM_ID)
  53. if not exists(outbase):
  54. os.makedirs(outbase)
  55. i = Image.open(join(BASE_PATH + ITEM_ID, ITEM_ID + '_orig_jp2', ITEM_ID + '_orig_' + orig_file_name.replace('.jpg', '.jp2')))
  56. #i = Image.open(join(BASE_PATH + ITEM_ID, ITEM_ID + '_orig_jp2', ITEM_ID + '_orig_' + orig_file_name.replace('.jpg', '.jp2.ppm')))
  57. i = i.rotate(-rotation, expand=True)
  58. iw, ih = i.size
  59. p = join(outbase, orig_file_name).replace('.jpg', '.orig.jpg')
  60. print(p)
  61. isave=i.copy()
  62. isave.thumbnail((iw/4, ih/4), Image.ANTIALIAS)
  63. isave.save(p)
  64. #i.save(p)
  65. idraw = ImageDraw.Draw(i)
  66. idraw.rectangle([0, 0, iw, ih], outline=0, fill=0)
  67. idraw.polygon([top_l, top_r, bottom_r, bottom_l], outline=255, fill=255)
  68. p = join(outbase, orig_file_name).replace('.jpg', '.coloured.jpg')
  69. print(p)
  70. isave=i.copy()
  71. isave.thumbnail((iw/4, ih/4), Image.ANTIALIAS)
  72. isave.save(p)
  73. #i.save(p)