How to use the pydub.AudioSegment function in pydub

To help you get started, we’ve selected a few pydub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github CwbhX / Jamais-Vu / jamaisvu / testing.py View on Github external
def get_length_audio(audiopath, extension):
    """
    Returns length of audio in seconds.
    Returns None if format isn't supported or in case of error.
    """
    try:
        audio = AudioSegment.from_file(audiopath, extension.replace(".", ""))
    except:
        print "Error in get_length_audio(): %s" % traceback.format_exc()
        return None
    return int(len(audio) / 1000.0)
github edouardpoitras / eva / clients / headless.py View on Github external
def play(filepath, content_type='audio/wav'):
    """
    Will attempt to play various audio file types (wav, ogg, mp3).
    """
    if 'wav' in content_type:
        sound = AudioSegment.from_wav(filepath)
    elif 'ogg' in content_type or 'opus' in content_type:
        sound = AudioSegment.from_ogg(filepath)
    elif 'mp3' in content_type or 'mpeg' in content_type:
        sound = AudioSegment.from_mp3(filepath)
    pydub_play(sound)
github netankit / AudioMLProject1 / speech_noise_ir_audio_mixing_script.py View on Github external
wav.write(normalizedFile , rate_speech, data_normalized)

	# Loudness test of normalized example speech
	test = audiotools.open(normalizedFile)
	test_replay_gain = audiotools.calculate_replay_gain([test])
	test_track_gain = list(list(test_replay_gain)[0])[1]
	#print test_track_gain

	# Randomly choosing one noise file from the pool
	# here I just fix one waiting for implementation later

	# Using pydub API to calculate the length of normalized speech file and the noise file
	speech_normalized = pydub.AudioSegment.from_wav(normalizedFile)
	
	#We have converted all the noise files to 16 bit int format and then passed the directoyr location to randomly choose noise files, which is different for each speech file.
	noise = pydub.AudioSegment.from_wav(noiseFile)
	speech_normalized_length = speech_normalized.duration_seconds
	noise_length = noise.duration_seconds

	# Selecting a randow start point of the noise file to get a segment of the required length
	start = random.randrange(0,int(noise_length-speech_normalized_length)*1000)
	# pydub does things in milliseconds
	noise_segmented = noise[start:int(start+speech_normalized_length*1000)]
	noise_segmented.export("noise_segmented.wav",format="wav")

	# Linear fading of sharply segmented noised segment
	# 1 sec fade in, 1 sec fade out
	noise_faded = noise_segmented.fade_in(1000).fade_out(1000)
	noise_faded.export("noise_faded.wav",format="wav")

	# how long is good? 1 sec?
github dr-pato / audio_visual_speech_enhancement / mixed_speech_generator.py View on Github external
def two_files_audio_sum(file_1_path, file_2_path, file_sum_name, volume_reduction=0):
    s1 = AudioSegment.from_file(file_1_path)
    s2 = AudioSegment.from_file(file_2_path) - volume_reduction # volume_reduction in dB

    s2_shift = (len(s1)-len(s2)) / 2 if len(s1) > len(s2) else 0
    
    audio_sum = s1.overlay(s2, position=s2_shift)
    audio_sum.export(file_sum_name, format='wav')

    return np.array(audio_sum.get_array_of_samples())
github phreeza / keras-GAN / simple_gan.py View on Github external
from keras.layers.core import Dense,Dropout
from keras.optimizers import SGD
from keras.initializations import normal
import numpy as np
from matplotlib import pyplot as plt
from scipy.stats import gaussian_kde
from scipy.io import wavfile
import theano.tensor as T
import theano
import pydub

batch_size = 128*128

print "loading data"

f = pydub.AudioSegment.from_mp3('../ml-music/07_-_Brad_Sucks_-_Total_Breakdown.mp3')
data = np.fromstring(f._data, np.int16)
data = data.astype(np.float64).reshape((-1,2))
print data.shape
data = data[:,0]+data[:,1]
#data = data[:,:subsample*int(len(data)/subsample)-1,:]
data -= data.min()
data /= data.max() / 2.
data -= 1.
print data.shape

print "Setting up decoder"
decoder = Sequential()
decoder.add(Dense(2048, input_dim=32768, activation='relu'))
decoder.add(Dropout(0.5))
decoder.add(Dense(1024, activation='relu'))
decoder.add(Dropout(0.5))
github kokimame / joytan / tools / handler / pyduber.py View on Github external
def setupAudio(self):
        # Setup SFX and BGM by organizing them into groups and adjusting volume.
        for key, sfxInfos in self.setting['sfx'].items():
            if len(sfxInfos) == 0:
                continue
            sfxs = []
            for sfxInfo in sfxInfos:
                sfx = Aseg.from_mp3(sfxInfo['path'])
                vtr = self.volToReduce(sfx.dBFS, (1 - sfxInfo['volume']/100))
                sfxs.append(sfx - vtr)
            self.sfxMap[key] = sum(sfxs)

        for bgmInfo in self.setting['loop']:
            bgm = Aseg.from_mp3(bgmInfo['path'])
            vtr = self.volToReduce(bgm.dBFS,(1 - bgmInfo['volume']/100))
            self.bgmLoop.append(bgm - vtr)
github mRokita / sMusic-core / smusicclient / player.py View on Github external
def __make_chunks(self):
        self.__segment = match_target_amplitude(AudioSegment.from_file(self.__path), -20)
        self.__chunks = make_chunks(self.__segment, 100)
github sudnya / video-classifier / lucius / deploy / audio-search / server / artifacts / artifacts.py View on Github external
raise ValueError('Missing \'data\' field in request json payload.')

        dataType = dataDictionary['type']
        key = dataDictionary['key']

        if not self.validateKey(key):
            raise ValueError('Invalid api key \'' + key + '\'.')

        if not self.validateDataType(dataType):
            raise ValueError('Invalid data type \'' + dataType + '\'.')

        audioData = dataDictionary['data']

        fileString = self.getStringAsFile(audioData)

        audioSegment = AudioSegment.from_file(fileString, format=dataType)

        description = self.describeAudio(audioSegment)

        return self.formResponse(description, dataType)
github MaxStrange / AudioSegment / docs / api / audiosegment.py View on Github external
ret = detect._homogeneity_filter(filter_indices, window_size=int(round(0.25 * MS_PER_S / ms_per_input)))

        # Group the consecutive ones together
        ret = detect._group_filter_values(self, ret, ms_per_input)

        # Take the groups and turn them into AudioSegment objects
        real_ret = []
        for i, (this_yesno, next_timestamp) in enumerate(ret):
            if i > 0:
                _next_yesno, timestamp = ret[i - 1]
            else:
                timestamp = 0

            ms_per_s = 1000
            data = self[timestamp * ms_per_s:next_timestamp * ms_per_s].raw_data
            seg = AudioSegment(pydub.AudioSegment(data=data, sample_width=self.sample_width,
                                                    frame_rate=self.frame_rate, channels=self.channels), self.name)
            real_ret.append((this_yesno, seg))
        return real_ret