@inbook{be80b14fef114503ac5d4993a9676fb3,
title = "Robust bilingual word alignment for machine aided translation",
abstract = "We have developed a new program called word_align for aligning parallel text, text such as the Canadian Hansards that are available in two or more languages. The program takes the output of char_align (Church, 1993), a robust alternative to sentence-based alignment programs, and applies word-level constraints using a version of Brown et al.'s Model 2 (Brown et al., 1993), modified and extended to deal with robustness issues. Word_align was tested on a subset of Canadian Hansards supplied by Simard (Simard et al., 1992). The combination of word_align plus char_align reduces the variance (average square error) by a factor of 5 over char_align alone. More importantly, because word_align and char_align were designed to work robustly on texts that are smaller and more noisy than the Hansards, it has been possible to successfully deploy the programs at AT&T Language Line Services, a commercial translation service, to help them with difficult terminology.",
author = "I. Dagan and Kenneth Church and William Gale",
year = "1999",
language = "American English",
isbn = "978-94-017-2390-9",
series = "Text, Speech and Language Technology",
publisher = "Springer Netherlands",
pages = "209--224",
editor = "Susan Armstrong and Kenneth Church and Pierre Isabelle and Sandra Manzi and Evelyne Tzoukermann and David Yarowsky",
booktitle = "Natural Language Processing Using Very Large Corpora",
address = "Netherlands",
}