Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# -*- coding: utf-8 -*-
from pydub import AudioSegment
import librosa
#abertura do arquivo
arq = open('/home/douglas/Música/musicas/wav/tristes/tristes.txt','r')
lines = arq.readlines()
arq.close()
lista = []
count=0
for l in lines:
#carregamento dos arquivos
music, erro = l.split("\n",1)
y, sr = librosa.load('/home/douglas/Música/musicas/wav/tristes/'+music,sr=44100)
sound = AudioSegment.from_wav("/home/douglas/Música/musicas/wav/tristes/"+music)
cent = librosa.feature.rmse(y=y, frame_length=2048, hop_length=512)
a = sound.max_dBFS/cent.mean()
print(music,a)
lista.append(a)
arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','r')
musics = arq.readlines()
arq.close()
count=0
arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','w')
for m in musics:
music, erro = m.split("\n",1)
arq.write(music+","+str(lista[count])+"\n")
count+=1
# Get a temp file to put our data and a temp file to store the result
tmp = _get_random_tmp_file()
othertmp = _get_random_tmp_file()
# Store our data in the temp file
self.export(tmp.name, format="WAV")
# Write the command to sox
stdout = stderr = subprocess.PIPE if console_output else subprocess.DEVNULL
command = cmd.format(inputfile=tmp.name, outputfile=othertmp.name)
res = subprocess.call(command.split(' '), stdout=stdout, stderr=stderr)
assert res == 0, "Sox did not work as intended, or perhaps you don't have Sox installed?"
# Create a new AudioSegment from the other temp file (where Sox put the result)
other = AudioSegment(pydub.AudioSegment.from_wav(othertmp.name), self.name)
# Clean up the temp files
if on_windows:
os.remove(tmp.name)
os.remove(othertmp.name)
else:
tmp.close()
othertmp.close()
return other
input_eaf = Eaf(ie)
# Check if the tiers we have been given exist
tier_names = input_eaf.get_tier_names()
if tier not in tier_names:
print('missing tier: ' + tier, file=sys.stderr)
return False
if silence_tier not in tier_names:
print('missing silence tier: ' + silence_tier, file=sys.stderr)
# get the input_scripts audio file
inDir, name = os.path.split(ie)
basename, ext = os.path.splitext(name)
ia = os.path.join(inDir, basename + ".wav")
input_audio = AudioSegment.from_wav(ia)
# We can pass in an arg for a ref tier that has silence labels
check_silence_ref_tier = False
if silence_tier in tier_names:
silence_tier_info = input_eaf.get_parameters_for_tier(silence_tier)
if silence_tier_info.get("PARENT_REF") == tier:
check_silence_ref_tier = True
# Get annotation values, start and end times, and speaker id
annotations = sorted(input_eaf.get_annotation_data_for_tier(tier))
params = input_eaf.get_parameters_for_tier(tier)
if 'PARTICIPANT' in params:
speaker_id = params['PARTICIPANT']
i = 0
for ann in annotations:
# -*- coding: utf-8 -*-
from pydub import AudioSegment
import numpy as np
import math
#abertura do arquivo
arq = open('/home/douglas/Música/musicas/wav/tristes/tristes.txt','r')
lines = arq.readlines()
arq.close()
lista = []
count=0
for l in lines:
#carregamento dos arquivos
music, erro = l.split("\n",1)
sound = AudioSegment.from_wav("/home/douglas/Música/musicas/wav/tristes/"+music)
a = math.sqrt(math.pow(float(sound.max_dBFS),2))
print(music,a)
lista.append(a)
arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','r')
musics = arq.readlines()
arq.close()
count=0
arq = open('/home/douglas/Documentos/tcc_code/resultado/resultados_tristes.csv','w')
for m in musics:
music, erro = m.split("\n",1)
arq.write(music+","+str(lista[count])+"\n")
count+=1
y, sr = librosa.load(filename)
duration=float(librosa.core.get_duration(y))
#Now splice an audio signal into individual elements of 100 ms and extract
#all these features per 100 ms
segnum=round(duration/timesplit)
deltat=duration/segnum
timesegment=list()
time=0
for i in range(segnum):
#milliseconds
timesegment.append(time)
time=time+deltat*1000
newAudio = AudioSegment.from_wav(filename)
filelist=list()
for i in range(len(timesegment)-1):
filename=exportfile(newAudio,timesegment[i],timesegment[i+1],filename,i)
filelist.append(filename)
featureslist=np.array([0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
0,0,0,0,
gain = ref_track_gain-speech_track_gain
data_normalized = numpy.asarray(data_speech*math.pow(10, (-(gain)/20)), dtype=numpy.int16)
normalizedFile = "speech_normalized.wav"
wav.write(normalizedFile , rate_speech, data_normalized)
# Loudness test of normalized example speech
test = audiotools.open(normalizedFile)
test_replay_gain = audiotools.calculate_replay_gain([test])
test_track_gain = list(list(test_replay_gain)[0])[1]
#print test_track_gain
# Randomly choosing one noise file from the pool
# here I just fix one waiting for implementation later
# Using pydub API to calculate the length of normalized speech file and the noise file
speech_normalized = pydub.AudioSegment.from_wav(normalizedFile)
#We have converted all the noise files to 16 bit int format and then passed the directoyr location to randomly choose noise files, which is different for each speech file.
noise = pydub.AudioSegment.from_wav(noiseFile)
speech_normalized_length = speech_normalized.duration_seconds
noise_length = noise.duration_seconds
# Selecting a randow start point of the noise file to get a segment of the required length
start = random.randrange(0,int(noise_length-speech_normalized_length)*1000)
# pydub does things in milliseconds
noise_segmented = noise[start:int(start+speech_normalized_length*1000)]
noise_segmented.export("noise_segmented.wav",format="wav")
# Linear fading of sharply segmented noised segment
# 1 sec fade in, 1 sec fade out
noise_faded = noise_segmented.fade_in(1000).fade_out(1000)
noise_faded.export("noise_faded.wav",format="wav")
def extract_words(files):
''' Extracts individual words form files and exports them to individual files. '''
output_directory = 'extracted_words'
if not os.path.exists(output_directory):
os.makedirs(output_directory)
for f in files:
file_format = None
source_segment = None
if f.lower().endswith('.mp3'):
file_format = 'mp3'
source_segment = AudioSegment.from_mp3(f)
elif f.lower().endswith('.wav'):
file_format = 'wav'
source_segment = AudioSegment.from_wav(f)
if not file_format or source_segment:
print('Unsupported audio format for ' + f)
sentences = convert_timestamps(files)
for s in sentences:
for word in s['words']:
start = float(word[1]) * 1000
end = float(word[2]) * 1000
word = word[0]
total_time = end - start
audio = AudioSegment.silent(duration=total_time)
audio = audio.overlay(source_segment[start:end])
number = 0
output_path = None
while True:
output_filename = word
if number:
# raw还需要,sample_width,frame_rate,channels三个参数。
# 生成文件:
# export()方法可以使一个AudioSegment对象转化成一个文件。
sound = AudioSegment.from_file("/path/to/sound.wav", format="wav")
file_handle = sound.export("/path/to/output.mp3", format="mp3") # 简单输出
file_handle2 = sound.export("/path/to/output.mp3",
format="mp3",
bitrate="192k",
tags={"album": "The Bends", "artist": "Radiohead"}) # 复杂输出
# AudioSegment.empty()用于生成一个长度为0的AudioSegment对象,一般用于多个音频的合并。
sounds = [
AudioSegment.from_wav("sound1.wav"),
AudioSegment.from_wav("sound2.wav"),
AudioSegment.from_wav("sound3.wav"),
]
playlist = AudioSegment.empty()
for sound in sounds:
playlist += sound
# AudioSegment.silent():
ten_second_silence = AudioSegment.silent(duration=10000) # 产生一个持续时间为10s的无声AudioSegment对象
# 此外,还能通过AudioSegment获取音频的参数,同时还能修改原始参数。
# AudioSegments are immutable
#
def get_text(self):
if not self.text is None:
return self.text
# convert wav file to FLAC
(_,stt_flac_filename) = tempfile.mkstemp('.flac')
sound = AudioSegment.from_wav(self.audio.filename())
sound.export(stt_flac_filename, format="flac")
# send to Google to interpret into text
google_speech_url = "http://www.google.com/speech-api/v1/recognize?lang=en"
headers = {'Content-Type': 'audio/x-flac; rate= %d;' % self.recordingRate}
recording_flac_data = open(stt_flac_filename, 'rb').read()
r = requests.post(google_speech_url, data=recording_flac_data, headers=headers)
# housekeeping
os.remove(stt_flac_filename)
self.audio.housekeeping()
# get response as text
response = r.text
if not 'hypotheses' in response: