Page boundary detection in historical documents
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

113 lines
2.7KB

  1. #!/usr/bin/python
  2. import os
  3. import sys
  4. import argparse
  5. import numpy as np
  6. import caffe
  7. import cv2
  8. from process_pixel_labels import post_process
  9. NET_FILE = './models/cbad_train_val.prototxt'
  10. WEIGHT_FILE = './models/cbad_weights.caffemodel'
  11. def safe_mkdir(_dir):
  12. try:
  13. os.makedirs(_dir)
  14. except:
  15. pass
  16. def predict(network, im, output_blob, args):
  17. network.blobs["data"].data[0,:,:,:] = im
  18. network.forward()
  19. if args.model == 'ohio':
  20. # sigmoid
  21. response = network.blobs[output_blob].data[0,0,:,:].copy()
  22. response[response >= 0.5] = 1
  23. response[response <= 0.5] = 0
  24. return response
  25. else:
  26. # softmax
  27. response = network.blobs[output_blob].data[0,:].copy()
  28. return np.argmax(response, axis=0)
  29. def presolve(net, args):
  30. net.blobs["data"].reshape(args.batch_size, 3, args.image_size, args.image_size)
  31. net.blobs["gt"].reshape(args.batch_size, 1, args.image_size, args.image_size)
  32. def main(args):
  33. net = caffe.Net(NET_FILE, WEIGHT_FILE, caffe.TEST)
  34. presolve(net, args)
  35. file_list = map(lambda s: s.strip(), open(args.manifest, 'r').readlines())
  36. fd = open(args.out_file, 'w')
  37. for idx, line in enumerate(file_list):
  38. if idx % args.print_count == 0:
  39. print "Processed %d/%d Images" % (idx, len(file_list))
  40. tokens = line.split(',')
  41. f = tokens[0]
  42. resolved = os.path.join(args.image_dir, f)
  43. im = cv2.imread(resolved, 1)
  44. _input = 0.0039 * (cv2.resize(im, (256, 256)) - 127.)
  45. _input = np.transpose(_input, (2, 0, 1))
  46. raw = (255 * predict(net, _input, 'out', args)).astype(np.uint8)
  47. out_fn = os.path.join(args.out_dir, f.replace('/','_')[:-4] + "_raw.png")
  48. cv2.imwrite(out_fn, raw)
  49. post, coords = post_process(raw)
  50. out_fn = os.path.join(args.out_dir, f.replace('/','_')[:-4] + "_post.png")
  51. cv2.imwrite(out_fn, post)
  52. def get_args():
  53. parser = argparse.ArgumentParser(description="Outputs binary predictions")
  54. parser.add_argument("image_dir",
  55. help="The directory where images are stored")
  56. parser.add_argument("manifest",
  57. help="txt file listing images relative to image_dir")
  58. parser.add_argument("model",
  59. help="[cbad|ohio]")
  60. parser.add_argument("out_file", type=str,
  61. help="Output file")
  62. parser.add_argument("--out-dir", type=str, default='out',
  63. help="")
  64. parser.add_argument("--gpu", type=int, default=0,
  65. help="GPU to use for running the network")
  66. parser.add_argument("--print-count", default=10, type=int,
  67. help="Print interval")
  68. args = parser.parse_args()
  69. print args
  70. return args
  71. if __name__ == "__main__":
  72. args = get_args()
  73. safe_mkdir(args.out_dir)
  74. if args.model == 'ohio':
  75. NET_FILE = './models/ohio_train_val.prototxt'
  76. WEIGHT_FILE = './models/ohio_weights.caffemodel'
  77. if args.gpu >= 0:
  78. caffe.set_device(args.gpu)
  79. caffe.set_mode_gpu()
  80. else:
  81. caffe.set_mode_cpu()
  82. main(args)