From 168a66f412d834bd54407e48090d66d10009fe6b Mon Sep 17 00:00:00 2001 From: sindre0830 Date: Sun, 6 Mar 2022 17:19:55 +0100 Subject: [PATCH] [#59] Replace Pafy with YT-DLP to fix download speed --- NN/API/preprocessing.py | 40 ++-------------------------------------- NN/API/requirements.txt | 2 +- 2 files changed, 3 insertions(+), 39 deletions(-) diff --git a/NN/API/preprocessing.py b/NN/API/preprocessing.py index 9320041..a576bba 100644 --- a/NN/API/preprocessing.py +++ b/NN/API/preprocessing.py @@ -2,9 +2,6 @@ import dictionary as dict # import foreign modules import os -from pathlib import Path -from pydub import AudioSegment -import pafy # Downloads an audio file from given URL. @@ -14,38 +11,5 @@ def downloadAudio(id): os.makedirs(dict.NATIVE_DIR) # branch if audio file doesn't exist if not os.path.isfile(dict.getNativeAudioPath(id)): - url = "https://www.youtube.com/watch?v=" + id - audiostreams = pafy.new(url).audiostreams - # get audio format with best quality - best = 0 - for idx, val in enumerate(audiostreams): - temp = int(val.get_filesize()) - if best == 0 or temp > best: - best = idx - print(val.bitrate, val.extension, val.get_filesize()) - tempFilename = id + "." + audiostreams[best].extension - # download audio file - if os.path.exists(dict.NATIVE_DIR + tempFilename) is False: - audiostreams[best].download(filepath=dict.NATIVE_DIR + tempFilename) - # convert file to wav format and remove temporary file - convertToWav(id, tempFilename) - os.remove(dict.NATIVE_DIR + tempFilename) - - -# Attempts to convert a file into wav format. -def convertToWav(id, file): - path = dict.NATIVE_DIR + file - newPath = dict.getNativeAudioPath(id) - if os.path.exists(newPath) is False and testExt(file): - sound = AudioSegment.from_file(path) - sound.export(newPath, format="wav") - else: - pass - - -# Checks if file is a supported audio format. -def testExt(file): - if Path(file).suffix in dict.EXTENSIONS: - return True - else: - return False + # download audio file with best quality then convert to wav + os.system("yt-dlp -q -f 'ba' -x --audio-format wav https://www.youtube.com/watch?v=" + id + " -o '"+ dict.NATIVE_DIR + "%(id)s.%(ext)s'") diff --git a/NN/API/requirements.txt b/NN/API/requirements.txt index cbd2746..7c95ed8 100644 --- a/NN/API/requirements.txt +++ b/NN/API/requirements.txt @@ -1,8 +1,8 @@ flask pydub -pafy youtube-dl==2020.12.02 numpy==1.21 librosa matplotlib aubio +yt-dlp