View Javadoc
1   package net.sf.okapi.steps.paraaligner;
2   
3   import java.util.Iterator;
4   import java.util.LinkedList;
5   import java.util.List;
6   
7   import net.sf.okapi.common.LocaleId;
8   import net.sf.okapi.common.resource.ISegments;
9   import net.sf.okapi.common.resource.ITextUnit;
10  import net.sf.okapi.common.resource.Segment;
11  import net.sf.okapi.common.resource.TextContainer;
12  
13  class AlignedParagraphs {
14  	private List<List<ITextUnit>> sourceParas;
15  	private List<List<ITextUnit>> targetParas;
16  	private List<ITextUnit> alignedParas;
17  	private LocaleId targetLocale;
18  
19  	public AlignedParagraphs(LocaleId targetLocale) {
20  		this.sourceParas = new LinkedList<>();
21  		this.targetParas = new LinkedList<>();
22  		this.alignedParas = new LinkedList<>();
23  		this.targetLocale = targetLocale;
24  	}
25  
26  	public void addAlignment(ITextUnit srcTu, ITextUnit trgTu) {
27  		List<ITextUnit> srcParas = new LinkedList<>();
28  		if (srcTu != null) {
29  			srcParas.add(srcTu);
30  		}
31  		List<ITextUnit> trgParas = new LinkedList<>();
32  		if (trgTu != null) {
33  			trgParas.add(trgTu);
34  		}
35  		sourceParas.add(srcParas);
36  		targetParas.add(trgParas);
37  	}
38  
39  	public void addAlignment(List<ITextUnit> srcTus, List<ITextUnit> trgTus) {
40  		sourceParas.add(srcTus);
41  		targetParas.add(trgTus);
42  	}
43  
44  	public List<ITextUnit> align() {
45  		// source and target lists are guaranteed to have the same number of elements, though some will be null
46  		for (List<ITextUnit> stus : sourceParas) {
47  			ITextUnit stu = null;
48  			List<ITextUnit> ttus = targetParas.remove(0);
49  			
50  			if (stus != null && !stus.isEmpty()) {
51  				stu = combineTextUnits(stus);
52  				if (stu != null && ttus != null && !ttus.isEmpty()) {
53  					ITextUnit combinedTarget = combineTextUnits(ttus);
54  					if (combinedTarget != null) {
55  						stu = addTargetTextUnitAsTarget(stu, combinedTarget, targetLocale);
56  					}
57  				}
58  			} else {
59  				if (ttus != null && !ttus.isEmpty()) {
60  					// source is null, use target tu (target content is source in this TU)
61  					ITextUnit tempTarget = combineTextUnits(ttus);
62  					if (tempTarget != null) {
63  						tempTarget.createTarget(targetLocale, true, ITextUnit.COPY_ALL);	
64  						// remove the source
65  						tempTarget.setSource(null);
66  						stu = tempTarget;
67  					}
68  				}
69  			}
70  			
71  			// Only add non-null aligned paragraphs
72  			if (stu != null) {
73  				alignedParas.add(stu);
74  			}
75  		}
76  		
77  		return alignedParas;
78  	}
79  
80  	private ITextUnit combineTextUnits(List<ITextUnit> tus) {
81  		// Guard against null or empty list to prevent NoSuchElementException
82  		if (tus == null || tus.isEmpty()) {
83  			return null;
84  		}
85  		
86  		Iterator<ITextUnit> its = tus.iterator();
87  		ITextUnit tuNew = its.next().clone();
88  		TextContainer tcAligned = tuNew.getSource();
89  		
90  		while (its.hasNext()) {
91  			ITextUnit tu2combin = its.next();
92  			TextContainer tcSource = tu2combin.getSource();
93  			tcSource.joinAll(); // joins all data parts and segments into one
94  			ISegments segs2add = tcSource.getSegments();
95  			Iterator<Segment> itSeg = segs2add.iterator();
96  			while (itSeg.hasNext()) {
97  				String cLast;
98  				String sCurNoTrim = tcAligned.toString();
99  				String sCur = sCurNoTrim.trim();
100 				int lonny = sCur.length();
101 				if (lonny > 0) {
102 					cLast = sCur.substring(lonny - 1);
103 					if (sCur.equals(sCurNoTrim)) {
104 						if (cLast.equals("!") || cLast.equals(".") || cLast.equals("?")) {
105 							// append two spaces so previous punc will be a break point
106 							tcAligned.append(" "); 
107 						}
108 							
109 						else {
110 							// didn't end with whitespace, so force a sentence break
111 							tcAligned.append(". "); 
112 						}
113 					}
114 				}
115 				
116 				// this combines content from next text unit
117 				tcAligned.append(itSeg.next().getContent()); 
118 			}
119 		}
120 		tcAligned.joinAll(); // join new parts together as one
121 		return tuNew;
122 	}
123 
124 	private ITextUnit addTargetTextUnitAsTarget(ITextUnit tuSource, ITextUnit tuTarget, LocaleId targetLocale) {
125 		tuSource.setTarget(targetLocale, tuTarget.getSource());
126 		return tuSource;
127 	}
128 }