How to use the pydub.AudioSegment.from_wav function in pydub

To help you get started, we’ve selected a few pydub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github dodo1210 / tcc_code / elementos / Spectral_Crest_Factor / executa.py View on Github external
# -*- coding: utf-8 -*-
from pydub import AudioSegment
import librosa
#abertura do arquivo
arq = open('/home/douglas/Música/musicas/wav/tristes/tristes.txt','r')
lines = arq.readlines()
arq.close()

lista = []

count=0
for l in lines:
    #carregamento dos arquivos
    music, erro = l.split("\n",1)
    y, sr = librosa.load('/home/douglas/Música/musicas/wav/tristes/'+music,sr=44100)
    sound = AudioSegment.from_wav("/home/douglas/Música/musicas/wav/tristes/"+music)
    cent = librosa.feature.rmse(y=y, frame_length=2048, hop_length=512)
    a = sound.max_dBFS/cent.mean()
    print(music,a)
    lista.append(a)

arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','r')
musics = arq.readlines()
arq.close()


count=0
arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','w')
for m in musics:
    music, erro = m.split("\n",1)
    arq.write(music+","+str(lista[count])+"\n")
    count+=1
github MaxStrange / AudioSegment / audiosegment.py View on Github external
# Get a temp file to put our data and a temp file to store the result
        tmp = _get_random_tmp_file()
        othertmp = _get_random_tmp_file()

        # Store our data in the temp file
        self.export(tmp.name, format="WAV")

        # Write the command to sox
        stdout = stderr = subprocess.PIPE if console_output else subprocess.DEVNULL
        command = cmd.format(inputfile=tmp.name, outputfile=othertmp.name)
        res = subprocess.call(command.split(' '), stdout=stdout, stderr=stderr)
        assert res == 0, "Sox did not work as intended, or perhaps you don't have Sox installed?"

        # Create a new AudioSegment from the other temp file (where Sox put the result)
        other = AudioSegment(pydub.AudioSegment.from_wav(othertmp.name), self.name)

        # Clean up the temp files
        if on_windows:
            os.remove(tmp.name)
            os.remove(othertmp.name)
        else:
            tmp.close()
            othertmp.close()

        return other
github CoEDL / kaldi_helpers / kaldi_helpers / input_scripts / split_eafs.py View on Github external
input_eaf = Eaf(ie)

    # Check if the tiers we have been given exist
    tier_names = input_eaf.get_tier_names()
    if tier not in tier_names:
        print('missing tier: ' + tier, file=sys.stderr)
        return False
    if silence_tier not in tier_names:
        print('missing silence tier: ' + silence_tier, file=sys.stderr)

    # get the input_scripts audio file
    inDir, name = os.path.split(ie)
    basename, ext = os.path.splitext(name)
    ia = os.path.join(inDir, basename + ".wav")
    input_audio = AudioSegment.from_wav(ia)

    # We can pass in an arg for a ref tier that has silence labels
    check_silence_ref_tier = False
    if silence_tier in tier_names:
        silence_tier_info = input_eaf.get_parameters_for_tier(silence_tier)
        if silence_tier_info.get("PARENT_REF") == tier:
            check_silence_ref_tier = True

    # Get annotation values, start and end times, and speaker id
    annotations = sorted(input_eaf.get_annotation_data_for_tier(tier))
    params = input_eaf.get_parameters_for_tier(tier)
    if 'PARTICIPANT' in params:
        speaker_id = params['PARTICIPANT']

    i = 0
    for ann in annotations:
github dodo1210 / tcc_code / elementos / loudness_std / executa.py View on Github external
# -*- coding: utf-8 -*-
from pydub import AudioSegment
import numpy as np
import math
#abertura do arquivo
arq = open('/home/douglas/Música/musicas/wav/tristes/tristes.txt','r')
lines = arq.readlines()
arq.close()

lista = []

count=0
for l in lines:
    #carregamento dos arquivos
    music, erro = l.split("\n",1)
    sound = AudioSegment.from_wav("/home/douglas/Música/musicas/wav/tristes/"+music)
    a = math.sqrt(math.pow(float(sound.max_dBFS),2))
    print(music,a)
    lista.append(a)
    
arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','r')
musics = arq.readlines()
arq.close()


count=0
arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','w')
for m in musics:
    music, erro = m.split("\n",1)
    arq.write(music+","+str(lista[count])+"\n")
    count+=1
github jim-schwoebel / voicebook / chapter_4_modeling / load_audioclassify.py View on Github external
y, sr = librosa.load(filename)
    duration=float(librosa.core.get_duration(y))
    
    #Now splice an audio signal into individual elements of 100 ms and extract
    #all these features per 100 ms
    segnum=round(duration/timesplit)
    deltat=duration/segnum
    timesegment=list()
    time=0

    for i in range(segnum):
        #milliseconds
        timesegment.append(time)
        time=time+deltat*1000

    newAudio = AudioSegment.from_wav(filename)
    filelist=list()
    
    for i in range(len(timesegment)-1):
        filename=exportfile(newAudio,timesegment[i],timesegment[i+1],filename,i)
        filelist.append(filename)

        featureslist=np.array([0,0,0,0,
                               0,0,0,0,
                               0,0,0,0,
                               0,0,0,0,
                               0,0,0,0,
                               0,0,0,0,
                               0,0,0,0,
                               0,0,0,0,
                               0,0,0,0,
                               0,0,0,0,
github netankit / AudioMLProject1 / speech_noise_ir_audio_mixing_script.py View on Github external
gain = ref_track_gain-speech_track_gain
	data_normalized = numpy.asarray(data_speech*math.pow(10, (-(gain)/20)), dtype=numpy.int16)
	normalizedFile = "speech_normalized.wav"
	wav.write(normalizedFile , rate_speech, data_normalized)

	# Loudness test of normalized example speech
	test = audiotools.open(normalizedFile)
	test_replay_gain = audiotools.calculate_replay_gain([test])
	test_track_gain = list(list(test_replay_gain)[0])[1]
	#print test_track_gain

	# Randomly choosing one noise file from the pool
	# here I just fix one waiting for implementation later

	# Using pydub API to calculate the length of normalized speech file and the noise file
	speech_normalized = pydub.AudioSegment.from_wav(normalizedFile)
	
	#We have converted all the noise files to 16 bit int format and then passed the directoyr location to randomly choose noise files, which is different for each speech file.
	noise = pydub.AudioSegment.from_wav(noiseFile)
	speech_normalized_length = speech_normalized.duration_seconds
	noise_length = noise.duration_seconds

	# Selecting a randow start point of the noise file to get a segment of the required length
	start = random.randrange(0,int(noise_length-speech_normalized_length)*1000)
	# pydub does things in milliseconds
	noise_segmented = noise[start:int(start+speech_normalized_length*1000)]
	noise_segmented.export("noise_segmented.wav",format="wav")

	# Linear fading of sharply segmented noised segment
	# 1 sec fade in, 1 sec fade out
	noise_faded = noise_segmented.fade_in(1000).fade_out(1000)
	noise_faded.export("noise_faded.wav",format="wav")
github antiboredom / audiogrep / audiogrep / audiogrep.py View on Github external
def extract_words(files):
    ''' Extracts individual words form files and exports them to individual files. '''
    output_directory = 'extracted_words'
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    for f in files:
        file_format = None
        source_segment = None
        if f.lower().endswith('.mp3'):
            file_format = 'mp3'
            source_segment = AudioSegment.from_mp3(f)
        elif f.lower().endswith('.wav'):
            file_format = 'wav'
            source_segment = AudioSegment.from_wav(f)
        if not file_format or source_segment:
            print('Unsupported audio format for ' + f)
        sentences = convert_timestamps(files)
        for s in sentences:
            for word in s['words']:
                start = float(word[1]) * 1000
                end = float(word[2]) * 1000
                word = word[0]
                total_time = end - start
                audio = AudioSegment.silent(duration=total_time)
                audio = audio.overlay(source_segment[start:end])
                number = 0
                output_path = None
                while True:
                    output_filename = word
                    if number:
github gswyhq / hello-world / speech_synthesis_语音合成 / 利用pydub对音频文件进行处理.py View on Github external
# raw还需要,sample_width,frame_rate,channels三个参数。

# 生成文件:
# export()方法可以使一个AudioSegment对象转化成一个文件。
sound = AudioSegment.from_file("/path/to/sound.wav", format="wav")
file_handle = sound.export("/path/to/output.mp3", format="mp3")     # 简单输出
file_handle2 = sound.export("/path/to/output.mp3",
                           format="mp3",
                           bitrate="192k",
                           tags={"album": "The Bends", "artist": "Radiohead"})         # 复杂输出

# AudioSegment.empty()用于生成一个长度为0的AudioSegment对象,一般用于多个音频的合并。

sounds = [
  AudioSegment.from_wav("sound1.wav"),
  AudioSegment.from_wav("sound2.wav"),
  AudioSegment.from_wav("sound3.wav"),
]
playlist = AudioSegment.empty()
for sound in sounds:
  playlist += sound



# AudioSegment.silent():
ten_second_silence = AudioSegment.silent(duration=10000)  # 产生一个持续时间为10s的无声AudioSegment对象

# 此外,还能通过AudioSegment获取音频的参数,同时还能修改原始参数。


# AudioSegments are immutable
#
github thomasweng15 / E.V.E. / totext / google.py View on Github external
def get_text(self):
		if not self.text is None:
			return self.text

		# convert wav file to FLAC
		(_,stt_flac_filename) = tempfile.mkstemp('.flac')
		sound = AudioSegment.from_wav(self.audio.filename())
		sound.export(stt_flac_filename, format="flac")

		# send to Google to interpret into text
		google_speech_url = "http://www.google.com/speech-api/v1/recognize?lang=en"
		headers = {'Content-Type': 'audio/x-flac; rate= %d;' % self.recordingRate}
		recording_flac_data = open(stt_flac_filename, 'rb').read()
		r = requests.post(google_speech_url, data=recording_flac_data, headers=headers)

		# housekeeping
		os.remove(stt_flac_filename)
		self.audio.housekeeping()

		# get response as text
		response = r.text

		if not 'hypotheses' in response: