forked from alshedivat/al-folio
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpapers.bib
146 lines (135 loc) · 16.6 KB
/
papers.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
---
---
@article{mdm,
abbr={SIGAsia},
title={Motion to Dance Music Generation Using Latent Diffusion Model},
abstract={Music's role in games and animation, particularly in dance content, is essential for creating immersive experiences. Although recent studies have made strides in generating dance music from videos, their practicality in integrating music into games and animation remains limited. In this context, we present a method capable of generating plausible dance music from 3D motion data and genre labels. Our approach leverages a combination of a UNET based latent diffusion model and a pre-trained VAE model. To evaluate our model's performance, we employed metrics that assess various audio properties, including beat alignment, audio quality, motion-music correlation, and genre score. Quantitative results show that our approach is better than previous methods. Furthermore, we demonstrated that our model can generate audio that seamlessly fits to in-the-wild motion data. This capability enables us to create plausible dance music that complements the dynamic movements of characters and enhances the overall audiovisual experience in interactive media.},
author={Tan, V.* and Nam, J.* and Nam, J. and Noh, J.},
journal={SIGGRAPH Asia 2023 Technical Communications},
year={2023},
pages={1--4},
doi={10.1145/3610543.3626164},
html={https://doi.org/10.1145/3610543.3626164},
publisher={Association for Computing Machinery},
blog={https://dmdproject.github.io/},
selected={true}
}
@article{mata-rl2,
abbr={AEROSPACE},
author={Sarmiento, J-A. and Tan, V. and Talampas, M.C. and Naval Jr., P.},
title={Sample Efficient Deep Reinforcement Learning for Diwata Microsatellite Reaction Wheel Attitude Control},
abstract={The Philippines has launched Diwata satellites to undertake different scientific missions. Low-orbit microsatellites are prone to external disturbances affecting their pointing accuracy; hence, an autonomous attitude control mechanism is vital to its operations. Deep reinforcement learning (DRL) has been proven effective in learning optimal control. There has been prior work regarding using DRL for the satellite’s reaction wheel attitude control in Mission, Attitude, and Telemetry Analysis (MATA)—a simulation environment using Unity for Diwata satellites. However, results show that the applied methods are sample inefficient and still underperform on specific metrics against Diwata’s current attitude control system. In addition, using Unity’s Machine Learning Agents toolkit (ML-Agents) limits the training to Soft-Actor Critic (SAC) and Proximal Policy Optimization (PPO). This study aims to extend the prior research using Twin-Delayed Deep Deterministic Policy Gradient (TD3) and Prioritized Experience Replay (PER) to improve the performance and sample efficiency of the satellite agent. The trainingwas done usingOpenAIGym connected to theMATAsimulation environment.We conclude that TD3-PER outperforms the algorithms of SAC, PPO, and PID of the prior study in both sample efficiency and control performance.},
journal={Aerospace Systems},
year={2022},
html={https://link.springer.com/article/10.1007/s42401-022-00169-3},
}
@article{mata-rl,
abbr={SMALLSAT},
author={Tan, V. and Labrador, J.L. and Talampas, M.C.},
title={MATA-RL: Continuous Reaction Wheel Attitude Control Using the MATA Simulation Software and Reinforcement Learning},
abstract={As earth observation satellites, Diwata microsatellites need to have a high degree of target pointing accuracy. Additionally, being in low orbit, they could experience strong external disturbances. Current methods for attitude control have proven to be effective. However, they are prone to changes in control and mass parameters. In this paper, we explore using Deep Reinforcement Learning (RL) for attitude control. This paper also leverages on Diwata’s simulator, MATA: Mission, Attitude, and Telemetry Analysis (MATA) software, in training the RL agent. We implemented two RL algorithms: Proximal Policy Optimization (PPO) and Soft Actor-Critic (SAC). We then simulated different scenarios and compared the performance of these algorithms to that of Diwata’s current attitude controller, the Proportional-Integral-Derivative (PID) control. Our results show that reinforcement learning can outperform traditional controllers in terms of settling time, overshoot, and stability. The results of this research will help solve problems in conventional attitude controllers and enable satellite engineers to design a better Attitude Determination and Control System (ADCS).},
journal={Proceedings of the AIAA/USU Conference on Small Satellites, Year in Review - Research & Academia},
year={2021},
html={https://digitalcommons.usu.edu/smallsat/2021/all2021/246/},
blog={https://youtu.be/libBcmhUO4Q},
selected={true}
}
@article{mata-cloud,
abbr={SMALLSAT},
title={MATA-Cloud: A Cloud Detection and Dynamic Attitude Correction Evaluation Software},
abstract={With the increasing demand for high-resolution images from earth observation satellites, there is a need to optimize the usability of the images being downloaded in the ground stations. Most captured satellite images are not usable for certain applications due to high cloud cover percentage. To address this problem, this research demonstrates a cloud detection and dynamic attitude correction evaluation software. This software explores two key experiments. First is evaluating different image processing and machine learning-based approaches to detect cloud cover. The cloud detection algorithms were evaluated based on their accuracy, latency, and memory consumption. The second is exploring dynamic attitude correction to minimize the effect of cloud cover on captured images. Results show that our software can help test algorithms that increase the usability of captured images.},
author={Tan, V. and Banatao, J.A. and Labrador, J.L. and Mabaquiao, L.C. and Fortes, F.F. and Talampas, M.C.},
journal={Proceedings of the AIAA/USU Conference on Small Satellites, Flight & Ground Software},
year={2021},
blog={https://youtu.be/fj8iwdpnfYA},
html={https://digitalcommons.usu.edu/smallsat/2021/all2021/103/},
}
@article{mata,
abbr={TENCON},
title={MATA: Mission, Attitude, and Telemetry Analysis Software for Micro-Satellites},
abstract={With the rise in popularity of small satellites, there has been an increasing demand for a software tool that covers different stages of satellite development. In this paper, we extend a small satellite simulation software originally developed for earth-observation satellites Diwata-1 and Diwata-2 to support other satellite missions. This support covers various stages: from ideation, development, and up to post-launch assessment. This paper focuses on the Mission, Attitude, and Telemetry Analysis (MATA) software, which can simulate orbit, attitude, and camera views from planned earth-observation missions. Satellite engineers can also use MATA in a hardware-in-the- loop configuration, serving as one of the last functionality checks before launching the satellite. MATA can also read telemetry files from an orbiting satellite and re-project it in a virtual environment for a more intuitive assessment. This paper also covers the implemented framework for the simulator. This framework would help future developers to extend the simulator to other applications like star tracking simulations, mixed reality satellite training, and space educational software.},
author={Tan, V.* and Labrador, J.L.* and Talampas, M.C.},
journal={IEEE REGION 10 CONFERENCE (TENCON)},
year={2020},
pages={614--619},
doi={10.1109/TENCON50793.2020.9293937},
html={https://ieeexplore.ieee.org/abstract/document/9293937},
blog={https://youtu.be/maknppT0bT8},
publisher={IEEE},
}
@article{multitask,
abbr={TENCON},
title={Multi-task Learning for Detection, Recovery, and Separation of Polyphonic Music},
abstract={Music separation aims to extract the signals of individual sources from a given audio mixture. Recent studies explored the use of deep learning algorithms for this problem. Although these algorithms have proven to have good performance, they are inefficient as they need to learn an independent model for each sound source. In this study, we demonstrate a multi-task learning system for music separation, detection, and recovery. The proposed system separates polyphonic music into four sound sources using a single model. It also detects the presence of a source in the given mixture. Lastly, it reconstructs the input mixture to help the network further learn the audio representation. Our novel approach exploits the shared information in each task, thus, improving the separation performance of the system. It was determined that the best configuration for the multi-task learning is to separate the sources first, followed by parallel modules for classification and recovery. Quantitative and qualitative results show that the performance of our system is comparable to baselines for separation and classification.},
author={Tan, V. and De Leon, F.},
journal={IEEE REGION 10 CONFERENCE (TENCON)},
year={2020},
pages={1112--1117},
doi={10.1109/TENCON50793.2020.9293783},
html={https://ieeexplore.ieee.org/abstract/document/9293783/},
blog={https://youtu.be/3Q87nJLslrU},
publisher={IEEE},
}
@article{ismac,
abbr={ISMAC},
title={Time-Frequency Representations for Single-Channel Music Source Separation},
abstract={Inspired by the success of image classification and speech recognition, deep learning algorithms have been explored to solve music source separation. Solving this problem would open to a wide range of applications like automatic transcription, audio post-production, and many more. Most algorithms usually use the Short Time Fourier Transform (STFT) as the Time-Frequency (T-F) input representation. Each deep learning model has a different configuration for STFT. There is no constant STFT parameters that is used in solving music source separation. This paper explores the different parameters for STFT and investigates another representation, the Constant-Q Transform, in separating three individual sound sources. Results of experiments show that dilated convolutional layers are great for STFT while normal convolutional layers are great for CQT. The best T-F representation for music source separation is STFT with dilated CNNs and a soft masking method. Furthermore, researchers should still consider the parameters of the T-F representations to have better performance for their deep learning models.},
author={Tan, V. and de Leon, F.},
journal={International Symposium on Multimedia and Communication Technology (ISMAC)},
year={2019},
pages={1--6},
doi={10.1109/ISMAC.2019.8836141},
html={https://ieeexplore.ieee.org/abstract/document/9293783/},
publisher={IEEE}
}
@article{wicon,
abbr={WICON},
title={Audio Event Detection Using Wireless Sensor Networks Based on Deep Learning},
abstract={Wireless acoustic sensor network is useful for ambient assisted living applications. Its capability of incorporating an audio event detection and classification system helps its users, especially elderly, on their everyday needs. In this paper, we propose using convolutional neural networks (CNN) for classifying audio streams. In contrast to AAL systems using traditional machine learning, our solution is capable of learning and inferring activities in an end-to-end manner. To demonstrate the system, we developed a wireless sensor network composed of Raspberry Pi boards with microphones as nodes. The audio classification system results to an accuracy of 83.79% using a parallel network for the Urban8k dataset, extracting constant-Q transform (CQT) features as system inputs. The overall system is scalabale and flexible in terms of the number of nodes, hence it is applicable on wide areas where assisted living applications are utilized.},
author={Mendoza, J.M.* and Tan, V.* and Fuentes, V. and Perez, G. and Tiglao, N.M.},
journal={Lecture Notes of the Institute for Computer Sciences, Social Informatics and Telecommunications Engineering},
year={2019},
pages={105--115},
publisher={Springer, Cham},
doi={10.1007/978-3-030-06158-6_11},
html={https://link.springer.com/chapter/10.1007%2F978-3-030-06158-6_11},
}
@article{vrex,
abbr={TENCON},
title={Vrex: A Framework for Immersive Virtual Reality Experiences},
abstract={Virtual Reality (VR) is believed to be the future of gaming and even application platforms. However, creating a VR application from scratch takes up a lot of time and research. Virtual Reality frameworks simplify game development by allowing the developer focus on the actual design and system rather than dealing with the core functionalities and interactions of a VR application. In this paper, we present a Virtual Reality framework using Unity3D and the HTC Vive. With this framework, any developer can easily create a VR environment with interactions, scene objectives, player explorations, and many more. This framework is used in the creation of the adventure fantasy game, Eldervine, and adapted for the scene creator application, ANEEME. Results of experiments conducted show the framework's usability in creating different VR applications and its capability to make the interactions intuitive and the experience immersive.},
author={Blonna, R. and Tan, M.S. and Tan, V. and Mora, A.P. and Atienza, R.},
journal={IEEE Region Ten Symposium (Tensymp)},
year={2018},
pages={118--123},
doi={10.1109/TENCONSpring.2018.8692018},
html={https://ieeexplore.ieee.org/abstract/document/8692018/},
blog={https://youtu.be/-fD6U2DncZU},
publisher={IEEE},
}
@article{aneeme,
abbr={SIGAsia},
title={ANEEME: Synthesizing and Sharing Animation Building Blocks for Rapid Creation of 3D Virtual Scenes},
abstract={ANEEME focuses on building technologies that rapidly synthesize animated visual scenes. This virtual reality experience immerses users to different styles and culture of houses around the world. There are two modes in this application: build mode and play mode. In build mode, users can integrate 3D models from the local computer or from ANEEME's online repository into the virtual environment. Using ANEEME's automatic skeletal rigging, users can easily incorporate and animate humanoid objects such as toys, figurines, and even their own avatar. These enable users to design, build and customize their dream house. During play mode, users can interact with the objects inside the environment. They can watch videos, listen to music, play with different instruments, and many more. Photo and video capture capabilities available in this mode also allow users to easily share their virtual environment through their social media accounts.},
author={Tan, V. and Atienza, R. and Saludares, M.I. and Casimiro, J. and Viola, M.S.},
journal={SIGGRAPH Asia VR Showcase},
year={2017},
pages={1--2},
doi={10.1145/3139468.3139479},
html={https://dl.acm.org/doi/abs/10.1145/3139468.3139479},
blog={https://youtu.be/ZC19KRPFkVw},
publisher={Association for Computing Machinery},
selected={true}
}
@article{bs,
abbr={ICCSCE},
title={Study of Automatic Melody Extraction Methods for Philippine Indigenous Music},
abstract={In this study, we compared two methods for extracting the melody pitch from select Philippine indigenous music. Pitch is expressed as the fundamental frequency of the main melodic voice or lead instrument of a music sample. Our implementation of automatic melody extraction involves blind source separation and pitch detection. For blind source separation, we implemented the Harmonic-Percussive Source Separation (HPSS) algorithm and the Shifted Non-negative Matrix Factorization (SNMF) algorithm. The HPSS algorithm identifies the harmonic component from the prominent peaks in the spectrogram of a signal while the SNMF algorithm use timbre as criterion. The harmonic component is used to estimate the melody pitch. The HPSS and SNMF source separation algorithms are complemented with salience-based and data driven pitch detection algorithms, respectively. The two systems are evaluated using ten samples of Philippine indigenous music. After source separation, the estimated harmonic and percussive tracks were evaluated through subjective listening tests. Results from subjective tests show that SNMF perform better than HPSS for harmonic and percussive source separation. Moreover, objective tests using standard metrics indicate that the salience-based approach has higher accuracy in identifying the melody than the data driven approach.},
author={Disuanco, J.* and Tan, V.* and de Leon, F.},
journal={IEEE International Conference on Control System, Computing and Engineering (ICCSCE)},
year={2015},
pages={464--469},
doi={10.1109/ICCSCE.2015.7482230},
html={https://ieeexplore.ieee.org/abstract/document/7482230},
publisher={IEEE},
}