@article {561, title = {OpenICS: Open Image Compressive Sensing Toolbox and Benchmark}, journal = {Software Impacts}, volume = {9}, year = {2021}, month = {05/2021}, abstract = {
The real-world application of image compressive sensing is largely limited by the lack of standardization in implementation and evaluation. To address this limitation, we present OpenICS, an image compressive sensing toolbox that implements multiple popular image compressive sensing algorithms into a unified framework with a standardized user interface. Furthermore, a corresponding benchmark is also proposed to provide a fair and complete evaluation of the implemented algorithms. We hope this work can serve the growing research community of compressive sensing and the industry to facilitate the development and application of image compressive sensing.
}, keywords = {psclab}, doi = {https://doi.org/10.1016/j.simpa.2021.100081}, url = {https://arxiv.org/pdf/2103.00652.pdf}, author = {Jonathan Zhao and Matthew Westerham and Mark Lakatos-Toth and Zhikang Zhang and Avi Moskoff and Fengbo Ren} } @inproceedings {Seattle, WA, title = {Learning in the Frequency Domain}, year = {2020}, month = {06/2020}, pages = {1740-1749}, address = {Seattle, WA}, abstract = {Deep neural networks have achieved remarkable success in computer vision tasks. Existing neural networks mainly operate in the spatial domain with fixed input sizes. For practical applications, images are usually large and have to be downsampled to the predetermined input size of neural networks. Even though the downsampling operations reduce computation and the required communication bandwidth, it removes both redundant and salient information obliviously, which results in accuracy degradation. Inspired by digital signal processing theories, we analyze the spectral bias from the frequency perspective and propose a learning-based frequency selection method to identify the trivial frequency components which can be removed with- out accuracy loss. The proposed method of learning in the frequency domain leverages identical structures of the well- known neural networks, such as ResNet-50, MobileNetV2, and Mask R-CNN, while accepting the frequency-domain information as the input. Experiment results show that learning in the frequency domain with static channel selection can achieve higher accuracy than the conventional spatial downsampling approach and meanwhile further reduce the input data size. Specifically for ImageNet classification with the same input size, the proposed method achieves 1.41\% and 0.66\% top-1 accuracy improvements on ResNet-50 and MobileNetV2, respectively. Even with half input size, the proposed method still improves the top-1 accuracy on ResNet-50 by 1\%. In addition, we observe a 0.8\% average precision improvement on Mask R-CNN for instance segmentation on the COCO dataset.
}, keywords = {psclab}, author = {Kai Xu and Minghai Qin and Fei Sun and Yuhao Wang and Yen-Kuang Chen and Fengbo Ren} } @article {414, title = {A Review of Algorithm \& Hardware Design for AI-Based Biomedical Applications}, journal = {IEEE Transactions on Biomedical Circuits and Systems }, volume = {14}, year = {2020}, month = {04/2020}, pages = {145-163}, abstract = {This paper reviews the state of the arts and trends of the AI-based biomedical processing algorithms and hardwares. The algorithms and hardwares for different biomedical applications such as ECG, EEG and hearing aid have been reviewed and discussed. For algorithm design, various widely used biomedical signal classification algorithms have been discussed including support vector machine (SVM), back propagation neural network (BPNN), convolutional neural networks (CNN), probabilistic neural networks (PNN), recurrent neural networks (RNN), Short-term Memory Network (LSTM), fuzzy neural network and etc. The pros and cons of the classification algorithms have been analyzed and compared in the context of application scenarios. The research trends of AI-based biomedical processing algorithms and applications are also discussed. For hardware design, various AI-based biomedical processors have been reviewed and discussed, including ECG classification processor, EEG classification processor, EMG classification processor and hearing aid processor. Various techniques on architecture and circuit level have been analyzed and compared. The research trends of the AI-based biomedical processor have also been discussed.
}, keywords = {psclab}, issn = {1940-9990}, doi = {10.1109/TBCAS.2020.2974154}, author = {Ying Wei and Jun Zhou and Yin Wang and Yinggang Liu and Qingsong Liu and Jiansheng Luo and Chao Wang and Fengbo Ren and Li Huang} } @article {379, title = {A 34-FPS 698-GOP/s/W Binarized Deep Neural Network-based Natural Scene Text Interpretation Accelerator for Mobile Edge Computing}, journal = {IEEE Transactions on Industrial Electronics (TIE)}, volume = {66}, year = {2019}, month = {10/2018}, pages = {7407-7416}, abstract = {The scene text interpretation is a critical part of natural scene interpretation. Currently, most of the existing work is based on high-end GPU implementation, which is commonly used on the server side. However, in IoT application scenarios, the communication overhead from the edge device to the server is quite large, which sometimes even dominates the total processing time. Hence, the edgecomputing oriented design is needed to solve this problem. In this paper, we present an architectural design and implementation of a natural scene text interpretation (NSTI) accelerator, which can classify and localize the text region on pixel-level efficiently in real-time on mobile devices. To target the real-time and low-latency processing, the Binary Convolutional Encoder-decoder Network (B-CEDNet) is adopted as the core architecture to enable massive parallelism due to its binary feature. Massively parallelized computations and a highly pipelined data flow control enhance its latency and throughput performance. In addition, all the binarized intermediate results and parameters are stored on chip to eliminate the power consumption and latency overhead of the off-chip communication. The NSTI accelerator is implemented in a 40nm CMOS technology, which can process scene text images (size of 128x32) at 34 fps and latency of 40 ms for pixelwise interpretation with the pixelwise classification accuracy over 90\% on ICDAR- 03 and ICDAR-13 dataset. The real energy-efficiency is 698 GOP/s/W and the peak energy-efficiency can get up to 7825 GOP/s/W. The proposed accelerator is 7 more energy efficient than its optimized GPU-based implementation counterpart, while maintaining a real-time throughput with latency of 40 ms.
}, keywords = {psclab}, doi = {10.1109/TIE.2018.2875643}, url = {https://ieeexplore.ieee.org/document/8513982}, author = {YIxing Li and Zichuan Liu and Wenye Liu and Yu Jiang and Yongliang Wang and Wang Ling Goh and Hao Yu and Fengbo Ren} } @article {2016, title = {Data-Driven Sampling Matrix Boolean Optimization for Energy-Efficient Biomedical Signal Acquisition by Compressive Sensing}, journal = {IEEE Transactions on Biomedical Circuits and Systems}, volume = {11}, year = {2017}, month = {11/2016}, pages = {255-266}, abstract = {Compressive sensing is widely used in biomedical applications, and the sampling matrix plays a critical role on both quality and power consumption of signal acquisition. It projects a high-dimensional vector of data into a low-dimensional subspace by matrix-vector multiplication. An optimal sampling matrix can ensure accurate data reconstruction and/or high compression ratio. Most existing optimization methods can only produce real-valued embedding matrices that result in large energy consumption during data acquisition. In this paper, we propose an efficient method that finds an optimal Boolean sampling matrix in order to reduce the energy consumption. Compared to random Boolean embedding, our data-driven Boolean sampling matrix can improve the image recovery quality by 9 dB. Moreover, in terms of sampling hardware complexity, it reduces the energy consumption by 4.6\× and the silicon area by 1.9\× over the data-driven real-valued embedding.
}, keywords = {psclab}, author = {Wang, Yuhao and Li, Xin and Xu, Kai and Ren, Fengbo and Yu, Hao} } @article {palangi2016distributed, title = {Distributed Compressive Sensing: A Deep Learning Approach.}, journal = {IEEE Trans. Signal Processing}, volume = {64}, number = {17}, year = {2016}, pages = {4504{\textendash}4518}, keywords = {Compressive Sensing, ref}, author = {Palangi, Hamid and Ward, Rabab K and Deng, Li} } @inproceedings {wang2015optimizing, title = {Optimizing boolean embedding matrix for compressive sensing in rram crossbar}, year = {2015}, pages = {13{\textendash}18}, publisher = {IEEE}, keywords = {Compressive Sensing, ref}, author = {Wang, Yuhao and Li, Xin and Yu, Hao and Ni, Leibin and Yang, Wei and Weng, Chuliang and Zhao, Junfeng} } @article {32, title = {Body Voltage Sensing Based Short Pulse Reading Circuit}, number = {PCT/US2012/056136}, year = {2014}, month = {Mar 28, 2013}, chapter = {WO2013043738A1}, abstract = {As memory geometries continue to scale down, current density of magnetic tunnel junctions (MTJs) make conventional low current reading scheme problematic with regard to performance and reliability. A body-voltage sense circuit (BVSC) short pulse reading (SPR) circuit is described using body connected load transistors and a novel sensing circuit with second stage amplifier which allows for very short read pulses providing much higher read margins, less sensing time, and shorter sensing current pulses. Simulation results (using 65-nm CMOS model SPICE simulations) show that our technique can achieve 550 mV of read margin at 1 ns performance under a 1 V supply voltage, which is greater than reference designs achieve at 5 ns performance.
}, keywords = {psclab}, issn = {US8917562B2}, url = {http://www.google.com/patents/WO2013043738A1}, author = {Kang-Lung Wang and Chih-Kong K. Yang and Dejan Markovic and Fengbo Ren} } @inproceedings {chen2014dadiannao, title = {Dadiannao: A machine-learning supercomputer}, year = {2014}, pages = {609{\textendash}622}, publisher = {IEEE Computer Society}, keywords = {Deep Learning, ref}, author = {Chen, Yunji and Luo, Tao and Liu, Shaoli and Zhang, Shijin and He, Liqiang and Wang, Jia and Li, Ling and Chen, Tianshi and Xu, Zhiwei and Sun, Ninghui and others} } @inproceedings {chen2014diannao, title = {Diannao: A small-footprint high-throughput accelerator for ubiquitous machine-learning}, volume = {49}, number = {4}, year = {2014}, pages = {269{\textendash}284}, publisher = {ACM}, keywords = {Deep Learning, ref}, author = {Chen, Tianshi and Du, Zidong and Sun, Ninghui and Wang, Jia and Wu, Chengyong and Chen, Yunji and Temam, Olivier} } @inproceedings {ouyang2014sda, title = {SDA: Software-defined accelerator for large-scale DNN systems}, year = {2014}, pages = {1{\textendash}23}, publisher = {IEEE}, keywords = {Deep Learning, ref}, author = {Ouyang, Jian and Lin, Shiding and Qi, Wei and Wang, Yong and Yu, Bo and Jiang, Song} } @book {james2013introduction, title = {An introduction to statistical learning}, volume = {112}, year = {2013}, publisher = {Springer}, organization = {Springer}, keywords = {Deep Learning, ref}, url = {https://lagunita.stanford.edu/courses/HumanitiesSciences/StatLearning/Winter2016/about}, author = {James, Gareth and Witten, Daniela and Hastie, Trevor and Tibshirani, Robert} } @inproceedings {palangi2013using, title = {Using deep stacking network to improve structured compressed sensing with Multiple Measurement Vectors.}, year = {2013}, pages = {3337{\textendash}3341}, keywords = {Compressive Sensing, ref}, author = {Palangi, Hamid and Ward, Rabab K and Deng, Li} } @inproceedings {157, title = {From OpenCL to high-performance hardware on FPGAs}, year = {2012}, pages = {531{\textendash}534}, publisher = {IEEE}, keywords = {Parallel and Reconfigurable Computing, ref}, author = {Czajkowski, Tomasz S and Aydonat, Utku and Denisenko, Dmitry and Freeman, John and Kinsner, Michael and Neto, David and Wong, Jason and Yiannacouras, Peter and Singh, Deshanand P} } @inproceedings {zinkevich2010parallelized, title = {Parallelized stochastic gradient descent}, year = {2010}, pages = {2595{\textendash}2603}, keywords = {Deep Learning, ref}, author = {Zinkevich, Martin and Weimer, Markus and Li, Lihong and Smola, Alex J} } @article {candes2008introduction, title = {An introduction to compressive sampling}, journal = {IEEE signal processing magazine}, volume = {25}, number = {2}, year = {2008}, pages = {21{\textendash}30}, publisher = {IEEE}, keywords = {Compressive Sensing, ref}, author = {Cand{\`e}s, Emmanuel J and Wakin, Michael B} } @article {candes2008introduction, title = {An introduction to compressive sampling}, journal = {IEEE signal processing magazine}, volume = {25}, number = {2}, year = {2008}, pages = {21{\textendash}30}, publisher = {IEEE}, keywords = {ref}, author = {Cand{\`e}s, Emmanuel J and Wakin, Michael B} } @article {figueiredo2007gradient, title = {Gradient projection for sparse reconstruction: Application to compressed sensing and other inverse problems}, journal = {IEEE Journal of selected topics in signal processing}, volume = {1}, number = {4}, year = {2007}, pages = {586{\textendash}597}, publisher = {IEEE}, keywords = {Compressive Sensing, ref}, author = {Figueiredo, M{\'a}rio AT and Nowak, Robert D and Wright, Stephen J} }