Page boundary detection in historical documents
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

334 lines
11KB

  1. #!/usr/bin/python
  2. #This takes a list of image files and acts as a tool to mark the crop region of the page
  3. import re
  4. import xml.etree.ElementTree as ET
  5. import os
  6. import sys
  7. from StringIO import StringIO
  8. import cv2
  9. def showControls():
  10. print(' -----------------------------------------------')
  11. print('| CONTROLS: |')
  12. print('| * set new corner (base on loc): left-click |')
  13. print('| * set new seam corner (two-page): middle-click|')
  14. print('| * confirm corners: enter |')
  15. print('| * mark page as abnormal: a |')
  16. print('| * undo: backspace |')
  17. print('| * start previous page over: backspace(+)|')
  18. #print('| * start current page over: delete |')
  19. print('| * exit: esc |')
  20. print('| |')
  21. print(' -----------------------------------------------')
  22. lastDidList=[]
  23. tl=(-1,-1)
  24. tr=(-1,-1)
  25. bl=(-1,-1)
  26. br=(-1,-1)
  27. tm=(-1,-1)
  28. bm=(-1,-1)
  29. image=None
  30. orig=None
  31. abnorm=False
  32. def draw():
  33. global image,tl,tr,bl,br,tm,bm,abnorm
  34. if tm[0]>=0 and bm[0]>=0:
  35. cv2.line(image, tm, bm, (0,255,0), 2)
  36. if tm[0]>=0 and tl[0]>=0:
  37. cv2.line(image, tm, tl, (0,255,0), 2)
  38. if tm[0]>=0 and tr[0]>=0:
  39. cv2.line(image, tm, tr, (0,255,0), 2)
  40. if bm[0]>=0 and bl[0]>=0:
  41. cv2.line(image, bm, bl, (0,255,0), 2)
  42. if bm[0]>=0 and br[0]>=0:
  43. cv2.line(image, bm, br, (0,255,0), 2)
  44. if tl[0]>=0 and tr[0]>=0:
  45. cv2.line(image, tl, tr, (255,0,0), 2)
  46. if br[0]>=0 and tr[0]>=0:
  47. cv2.line(image, br, tr, (255,0,0), 2)
  48. if br[0]>=0 and bl[0]>=0:
  49. cv2.line(image, br, bl, (255,0,0), 2)
  50. if tl[0]>=0 and bl[0]>=0:
  51. cv2.line(image, tl, bl, (255,0,0), 2)
  52. if tl[0]>=0:
  53. image[tl[1],tl[0]]=(0,0,255)
  54. cv2.circle(image, tl, 2, (0,0,200), 1)
  55. cv2.circle(image, tl, 5, (0,0,200), 2)
  56. if tr[0]>=0:
  57. image[tr[1],tr[0]]=(0,0,255)
  58. cv2.circle(image, tr, 2, (0,0,200), 1)
  59. cv2.circle(image, tr, 5, (0,0,200), 2)
  60. if bl[0]>=0:
  61. image[bl[1],bl[0]]=(0,0,255)
  62. cv2.circle(image, bl, 2, (0,0,200), 1)
  63. cv2.circle(image, bl, 5, (0,0,200), 2)
  64. if br[0]>=0:
  65. image[br[1],br[0]]=(0,0,255)
  66. cv2.circle(image, br, 2, (0,0,200), 1)
  67. cv2.circle(image, br, 5, (0,0,200), 2)
  68. if tm[0]>=0:
  69. image[tm[1],tm[0]]=(0,0,255)
  70. cv2.circle(image, tm, 2, (0,100,200), 1)
  71. cv2.circle(image, tm, 5, (0,100,200), 2)
  72. if bm[0]>=0:
  73. image[bm[1],bm[0]]=(0,0,255)
  74. cv2.circle(image, bm, 2, (0,100,200), 1)
  75. cv2.circle(image, bm, 5, (0,100,200), 2)
  76. if abnorm:
  77. cv2.putText(image, 'ABNORMAL', (image.shape[1]/2,image.shape[0]/2), cv2.FONT_HERSHEY_PLAIN, 2, (0,0,255))
  78. cv2.imshow("image", image)
  79. bimage=None
  80. def clicker(event, x, y, flags, param):
  81. # grab references to the global variables
  82. global image,tl,tr,bl,br,tm,bm,lastDidList,orig
  83. """if event == cv2.EVENT_LBUTTONDOWN:
  84. if len(segPts)>0:
  85. #change last boundary
  86. image=bimage.copy()
  87. segPts[-1]=x
  88. ll=max(0,segPts[-1]-1)
  89. rr=min(image.shape[1], segPts[-1]+1)
  90. image[:,ll:rr,0] = color[(colorIdx+len(color)-1)%len(color)][0] * image[:,ll:rr,0]
  91. image[:,ll:rr,1] = color[(colorIdx+len(color)-1)%len(color)][1] * image[:,ll:rr,1]
  92. image[:,ll:rr,2] = color[(colorIdx+len(color)-1)%len(color)][2] * image[:,ll:rr,2]
  93. cv2.imshow("image", image)
  94. """
  95. if event == cv2.EVENT_LBUTTONDOWN:
  96. # a new boundary
  97. if x<image.shape[1]/2 and y<image.shape[0]/2:
  98. tl=(x,y)
  99. if 0 in lastDidList:
  100. image=orig.copy()
  101. lastDidList.remove(0)
  102. lastDidList.append(0)
  103. if x>image.shape[1]/2 and y<image.shape[0]/2:
  104. tr=(x,y)
  105. if 1 in lastDidList:
  106. image=orig.copy()
  107. lastDidList.remove(1)
  108. lastDidList.append(1)
  109. if x<image.shape[1]/2 and y>image.shape[0]/2:
  110. bl=(x,y)
  111. if 2 in lastDidList:
  112. image=orig.copy()
  113. lastDidList.remove(2)
  114. lastDidList.append(2)
  115. if x>image.shape[1]/2 and y>image.shape[0]/2:
  116. br=(x,y)
  117. if 3 in lastDidList:
  118. image=orig.copy()
  119. lastDidList.remove(3)
  120. lastDidList.append(3)
  121. draw()
  122. elif event == cv2.EVENT_MBUTTONDOWN:
  123. # a new boundary
  124. if y<image.shape[0]/2:
  125. tm=(x,y)
  126. if 1 in lastDidList:
  127. image=orig.copy()
  128. lastDidList.remove(1)
  129. lastDidList.append(4)
  130. if y>image.shape[0]/2:
  131. bm=(x,y)
  132. if 3 in lastDidList:
  133. image=orig.copy()
  134. lastDidList.remove(3)
  135. lastDidList.append(5)
  136. draw()
  137. def segmenter(imDir,imagePath,dispHeight):
  138. global image,tl,tr,bl,br,tm,bm,lastDidList,orig,abnorm
  139. print 'opening '+imDir+imagePath
  140. orig = cv2.imread(imDir+imagePath)
  141. scale = orig.shape[0]/dispHeight
  142. orig = cv2.resize(orig,(0,0),None,1.0/scale,1.0/scale)
  143. #print 'opened'
  144. assert orig is not None
  145. redo=True
  146. while redo: #undo loop
  147. abnorm=False
  148. lastDidList=[]
  149. tl=(-1,-1)
  150. tr=(-1,-1)
  151. bl=(-1,-1)
  152. br=(-1,-1)
  153. tm=(-1,-1)
  154. bm=(-1,-1)
  155. redo=False
  156. image = orig.copy()
  157. draw()
  158. while True:
  159. # display the imageWork and wait for a keypress
  160. key = cv2.waitKey(33) & 0xFF #so it is robust on all systems
  161. #print key
  162. if key == 13 and tl[0]>=0 and tr[0]>=0 and bl[0]>=0 and br[0]>=0: #enter
  163. toWrite = imagePath+','+str(int(scale*tl[0]))+','+str(int(scale*tl[1]))+','+str(int(scale*tr[0]))+','+str(int(scale*tr[1]))+','+str(int(scale*br[0]))+','+str(int(scale*br[1]))+','+str(int(scale*bl[0]))+','+str(int(scale*bl[1]))
  164. if abnorm:
  165. if tm[0]>=0 and bm[0]>=0:
  166. toWrite += ',ABNORMAL,'+str(int(scale*tm[0]))+','+str(int(scale*tm[1]))+','+str(int(scale*bm[0]))+','+str(int(scale*bm[1]))
  167. else:
  168. toWrite += ',ABNORMAL'
  169. else:
  170. if tm[0]>=0 and bm[0]>=0:
  171. toWrite += ',DOUBLE,'+str(int(scale*tm[0]))+','+str(int(scale*tm[1]))+','+str(int(scale*bm[0]))+','+str(int(scale*bm[1]))
  172. else:
  173. toWrite += ',SINGLE'
  174. toWrite+='\n';
  175. return toWrite, False, False
  176. elif key == 8: #backspace
  177. if len(lastDidList)>0:
  178. imageWork = orig.copy()
  179. lastDid=lastDidList.pop()
  180. if lastDid==0:
  181. tl=(-1,-1)
  182. elif lastDid==1:
  183. tr=(-1,-1)
  184. elif lastDid==2:
  185. bl=(-1,-1)
  186. elif lastDid==3:
  187. br=(-1,-1)
  188. elif lastDid==4:
  189. tm=(-1,-1)
  190. elif lastDid==5:
  191. bm=(-1,-1)
  192. image=orig.copy()
  193. draw()
  194. else:
  195. return '', True, False
  196. elif key == 127: #del
  197. #if len(lastDidList)>0:
  198. print('[CLEAR]')
  199. redo=True
  200. break
  201. #else:
  202. # return '', True, False
  203. elif key == 27: #esc
  204. print('esc')
  205. return '', False, True
  206. #exit(0)
  207. #break
  208. elif key == 97: #'a'
  209. #return imagePath+',-1,-1,-1,-1,-1,-1,-1,-1,ABNORMAL\n', False, False
  210. abnorm = not abnorm
  211. image=orig.copy()
  212. draw()
  213. #return newWords, newWordBoxes
  214. if len(sys.argv)<4:
  215. print 'usage: '+sys.argv[0]+' imgDir imgList outAnn.csv [displayHeight]'
  216. print 'output format: imageFile, tlx, tly, trx, try, brx, bry, blx, bly, type (,tmx, tmy, bmx, bmy)'
  217. exit(0)
  218. inFile = sys.argv[2]
  219. imDir = sys.argv[1]
  220. if imDir[-1]!='/':
  221. imDir+='/'
  222. outFile = sys.argv[3]
  223. dispHeight=500.0
  224. if len(sys.argv)>4:
  225. dispHeight=float(sys.argv[4])
  226. cv2.namedWindow("image")
  227. cv2.setMouseCallback("image", clicker)
  228. didCount=0
  229. did=[]
  230. try:
  231. check = open(outFile,'r')
  232. did = check.read().splitlines()
  233. didCount=len(did)
  234. check.close()
  235. print 'found '+outFile+', appending. Note: this is sychronizing based on count alone, if '+inFile+' hash changed, but sure to align '+outFile
  236. except IOError:
  237. print ('making new out:'+outFile)
  238. out = open(outFile,'w')
  239. print ' =============================================== '
  240. print ' !!! INSTRUCTIONS !!!'
  241. print ' If the page does not contain a single page, or '
  242. print ' an open book, mark as abnormal with INSERT (e.g.'
  243. print ' two seperate pages).'
  244. print ' Click on the four corners to include all the '
  245. print ' full pages in the image (including two pages if '
  246. print ' fully present).'
  247. print ' If two pages a full present also mark page seam '
  248. print ' (middle-click).'
  249. print ' On placing points, prioritize the following to '
  250. print ' be included/discluded from the polygons in the '
  251. print ' following order:'
  252. print ' 1. Including the present page(s) content.'
  253. print ' 2. Discluding other pages and background.'
  254. print ' 3. Discluding the present page(s) boudary.'
  255. print ' 4. Including the present page(s) white area.'
  256. #print ' book). If a corner is torn, click where it ought'
  257. #print ' to be, based on page edges. The page seem on an '
  258. #print ' open book is the page edge.'
  259. print ' Use ESC to exit or the latest page you finished '
  260. print ' will be lost.'
  261. #i=didCount
  262. i=0
  263. #pageCount=-1
  264. prevSeg=''
  265. seg=''
  266. showControls()
  267. inF = open(inFile,'r')
  268. images = inF.read().splitlines()
  269. end=False
  270. doneOne=False
  271. while i<len(images) and not end:
  272. if i%10==9:
  273. showControls()
  274. print(str(i+1)+' of '+str(len(images)))
  275. if len(did)>i:
  276. line = did[i].strip().split(',')
  277. typ = line[8]
  278. #print typ
  279. if typ != '-1':
  280. out.write(did[i].strip()+'\n')
  281. i+=1
  282. continue
  283. seg, undo, end = segmenter(imDir, images[i],dispHeight)
  284. out.write(seg)
  285. seg=''
  286. i+=1
  287. else:
  288. seg, undo, end = segmenter(imDir, images[i],dispHeight)
  289. if len(seg)>0:
  290. doneOne=True
  291. if undo and i>0 and doneOne:
  292. prevSeg=''
  293. print(str(i)+' of '+str(len(images)))
  294. prevSeg, undo, end = segmenter(imDir, images[i-1],dispHeight)
  295. else:
  296. out.write(prevSeg)
  297. prevSeg=seg
  298. seg=''
  299. i+=1
  300. out.write(prevSeg)
  301. out.write(seg)