1 package com.acumenvelocity.ath.filters.pdf;
2
3 import java.io.InputStream;
4 import java.util.ArrayList;
5 import java.util.List;
6
7 import com.acumenvelocity.ath.common.Log;
8 import com.acumenvelocity.ath.common.PdfUtil;
9
10 import net.sf.okapi.common.Event;
11 import net.sf.okapi.common.IParameters;
12 import net.sf.okapi.common.LocaleId;
13 import net.sf.okapi.common.MimeTypeMapper;
14 import net.sf.okapi.common.UsingParameters;
15 import net.sf.okapi.common.encoder.EncoderManager;
16 import net.sf.okapi.common.exceptions.OkapiIOException;
17 import net.sf.okapi.common.filters.FilterConfiguration;
18 import net.sf.okapi.common.filters.IFilter;
19 import net.sf.okapi.common.filters.IFilterConfigurationMapper;
20 import net.sf.okapi.common.filterwriter.IFilterWriter;
21 import net.sf.okapi.common.resource.RawDocument;
22 import net.sf.okapi.common.resource.StartDocument;
23 import net.sf.okapi.common.skeleton.GenericSkeletonWriter;
24 import net.sf.okapi.common.skeleton.ISkeletonWriter;
25 import net.sf.okapi.filters.openxml.ConditionalParameters;
26 import net.sf.okapi.filters.openxml.OpenXMLFilter;
27 import net.sf.okapi.filters.openxml.OpenXMLFilterWriter;
28
29
30
31
32
33 @UsingParameters(Parameters.class)
34 public class AthPdfFilter implements IFilter {
35 private EncoderManager encoderManager;
36 private RawDocument input;
37 private OpenXMLFilter docxFilter;
38 private Parameters params;
39 private LocaleId srcLoc;
40
41 public AthPdfFilter() {
42 super();
43 params = new Parameters();
44 }
45
46 @Override
47 public void cancel() {
48 if (docxFilter != null) {
49 docxFilter.cancel();
50 }
51 }
52
53 @Override
54 public void close() {
55 if (input != null) {
56 input.close();
57 }
58
59 if (docxFilter != null) {
60 docxFilter.close();
61 }
62 }
63
64 @Override
65 public String getName() {
66 return "okf_pdf";
67 }
68
69 @Override
70 public String getDisplayName() {
71 return "PDF Filter (Adobe services)";
72 }
73
74 @Override
75 public String getMimeType() {
76 return MimeTypeMapper.PDF_MIME_TYPE;
77 }
78
79 @Override
80 public Parameters getParameters() {
81 return params;
82 }
83
84 @Override
85 public void setParameters(IParameters params) {
86 this.params = (Parameters) params;
87 }
88
89 @Override
90 public void setFilterConfigurationMapper(IFilterConfigurationMapper fcMapper) {
91 }
92
93 @Override
94 public ISkeletonWriter createSkeletonWriter() {
95 return new GenericSkeletonWriter();
96 }
97
98 @Override
99 public IFilterWriter createFilterWriter() {
100 return new AthPdfFilterWriter((OpenXMLFilterWriter) docxFilter.createFilterWriter());
101 }
102
103 @Override
104 public List<FilterConfiguration> getConfigurations() {
105 List<FilterConfiguration> list = new ArrayList<>();
106
107 list.add(new FilterConfiguration("okf_pdf", MimeTypeMapper.PDF_MIME_TYPE, getClass().getName(),
108 "PDF (Portable Document Format)",
109 "Configuration for PDF documents using Adobe PDF Services",
110 null, ".pdf;"));
111
112 return list;
113 }
114
115 @Override
116 public EncoderManager getEncoderManager() {
117 if (encoderManager == null) {
118 encoderManager = new EncoderManager();
119
120 encoderManager.setMapping(MimeTypeMapper.PDF_MIME_TYPE,
121 "net.sf.okapi.common.encoder.DefaultEncoder");
122 }
123
124 return encoderManager;
125 }
126
127 @Override
128 public void open(RawDocument input) {
129 open(input, true);
130 }
131
132 @Override
133 public void open(RawDocument input, boolean generateSkeleton) {
134 this.input = input;
135
136 try {
137 Log.info(getClass(), "Converting PDF to DOCX using Adobe PDF Services...");
138
139
140 InputStream docxInputStream = PdfUtil.convertPdfToDocx(input.getStream(), srcLoc,
141 params.getOcrMode());
142
143 Log.info(getClass(), "PDF converted to DOCX successfully");
144
145
146 docxFilter = new OpenXMLFilter();
147
148 ConditionalParameters docxParams = new ConditionalParameters();
149 docxParams.fromString(params.toString());
150
151 docxFilter.setParameters(docxParams);
152
153 RawDocument docxRawDoc = new RawDocument(
154 docxInputStream,
155 input.getEncoding(),
156 input.getSourceLocale());
157
158 docxRawDoc.setFilterConfigId("okf_openxml");
159 docxRawDoc.setId(input.getId());
160
161 docxFilter.open(docxRawDoc, generateSkeleton);
162 Log.info(getClass(), "OpenXML filter opened successfully");
163
164 } catch (Exception e) {
165 throw new OkapiIOException("Error processing PDF file", e);
166 }
167 }
168
169 @Override
170 public boolean hasNext() {
171 return docxFilter != null && docxFilter.hasNext();
172 }
173
174 @Override
175 public Event next() {
176 Event e = docxFilter.next();
177
178 if (e.isStartDocument()) {
179 StartDocument sd = e.getStartDocument();
180 sd.setMimeType(MimeTypeMapper.PDF_MIME_TYPE);
181 sd.setName(input.getId());
182 this.srcLoc = sd.getLocale();
183 sd.setFilterWriter(createFilterWriter());
184 }
185
186 return e;
187 }
188 }