@inproceedings{SisLab3170,
       booktitle = {International Conference on Asian Language Processing (IALP 2018)},
           month = {November},
           title = {An Investigation of Word Embeddings with Deep Bidirectional LSTM for Sentence Unit Detection in Automatic Speech Transcription},
          author = {Thi Nga Ho and Duy Cat Can and Eng Siong Chng},
            year = {2018},
             url = {https://eprints.uet.vnu.edu.vn/eprints/id/eprint/3170/},
        abstract = {This work investigates the effectiveness of using the word based and sub-word based embedding representations as input for a deep bidirectional Long Short-Term Memory Network for Sentence Unit Detection in Automatic Speech Recognition transcription. Our experimental results show that using sub-word based embedding can significantly improve the SUD performance when a limited text is used to train both the word embedding and the SUD model. The SUD model using the sub-word based embedding gains up to 2.07\% absolute improvement in F1-score as compared to the best model trained with the word-based embedding. When tested on a domain-mismatch condition, the SUD model with sub-word based embedding trained from the in-domain data gives an approximate 2\% and 1\% improvement over the best model using out-of-domain embedding with reference and ASR transcription with 29.5\% Word Error Rate respectively.}
}