@inproceedings{f4882bd864684dde83c79a5ed5179c9c,
title = "Lightweight dependency checking for parallelizing loops with non-deterministic dependency on GPU",
abstract = "General-purpose GPUs have been prevalent for a decade. Nevertheless, GPU programming remains an onerous job practically exclusive to veteran developers who must know both domain-specific knowledge and GPU architecture well. Although current parallelizing compilers that automatically parallelize and offload sizable loops onto the GPU have helped in unfettering the power of the GPU with minimal programming effort, there are still a family of loops that carry statically non-deterministic data dependencies and cannot be parallelized. To tackle this issue, we propose two lightweight dependency checking schemes that are very different from existing conservative compilers to assist parallelizing loops with non-deterministic data dependencies. Our schemes feature linear work complexity for memory operations, lower memory consumption compared to previous work, and minimal false positives by leveraging the lockstep execution on the GPU's SIMD lanes. Experiments done using microbenchmarking and real-life applications on the latest advanced AMD discrete GPUs show that our schemes can achieve 2.2 × speedup over existing solutions in dependency-free cases while only taking about 20\% of time compared to existing solutions in the case with statically unproven loop-carried dependencies.",
keywords = "Code Generation;, Dependency Checking, GPGPU, Loop Parallelization",
author = "Hongyuan Liu and Lam, \{King Tin\} and Huanxin Lin and Wang, \{Cho Li\} and Junchao Ma",
note = "Publisher Copyright: {\textcopyright} 2016 IEEE.; 22nd IEEE International Conference on Parallel and Distributed Systems, ICPADS 2016 ; Conference date: 13-12-2016 Through 16-12-2016",
year = "2016",
month = jul,
day = "2",
doi = "10.1109/ICPADS.2016.0119",
language = "English",
series = "Proceedings of the International Conference on Parallel and Distributed Systems - ICPADS",
pages = "884--893",
editor = "Xiaofei Liao and Robert Lovas and Xipeng Shen and Ran Zheng",
booktitle = "Proceedings - 22nd IEEE International Conference on Parallel and Distributed Systems, ICPADS 2016",
}