@inproceedings{SisLab2776, booktitle = {The 8th International Conference on Knowledge and Systems Engineering (KSE)}, month = {December}, title = {Standardization procedure for automatic environmental data: A case study in Hanoi, Vietnam}, author = {Duc Linh Nguyen and Duc Chuc Man and Quang Hung Bui and Thi Nhat Thanh Nguyen}, year = {2016}, pages = {321--326}, keywords = {Atmospheric measurements;Correlation;Data models;Filling;Monitoring;Pollution measurement;Training;PM10;abnormal detection;environmental data;missing filling}, url = {https://eprints.uet.vnu.edu.vn/eprints/id/eprint/2776/}, abstract = {In Vietnam, environmental data collected from ground-based stations may contain abnormal or missing values due to several problems during operation, i.e. sensor's problems. This paper proposes a standardization procedure which try to detect unusual values and fill in missing data. Experiments were conducted for PM10 data. Two datasets measured in 01/2011 and 01/2012 at Nguyen Van Cu station in Hanoi, Vietnam is used for experiments. For the abnormal detection process, unusual data can be informed to the data analyzers at ground stations for judging. For the missing filling process, the first dataset is used as training dataset to construct regression models for predicting missing data, the second dataset is used as testing data. In the worst case, suppose 100\% PM10 is missing, Root Mean Square Error (RMSE) and Mean Absolute Percentage Error (MAPE) are 51 ?g/m3 and 45\% respectively. Correlation coefficient (R) between original PM10 data and predicted PM10 data is 0.56. In addition, different scenarios taking account of percentage of missing data of the whole testing dataset are also considered. Experimental results showed that it is best to perform missing filling process on datasets that contain 10\% to 30\% of missing data. For this case, RMSE ranges from 15?25 ?g/m3 and MAPE varies from 5 to 13\%.} }