How to use the pydub.AudioSegment.from_file function in pydub

To help you get started, we’ve selected a few pydub examples, based on popular ways it is used in public projects.

Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.

github nyumaya / nyumaya_audio_recognition / python / test_accuracy.py View on Github external
def load_audio_file(filename,resize=False):
	sound = None
	try:
		if filename.endswith('.mp3') or filename.endswith('.MP3'):
			sound = AudioSegment.from_mp3(filename)
		elif filename.endswith('.wav') or filename.endswith('.WAV'):
			sound = AudioSegment.from_wav(filename)
		elif filename.endswith('.ogg'):
			sound = AudioSegment.from_ogg(filename)
		elif filename.endswith('.flac'):
			sound = AudioSegment.from_file(filename, "flac")
		elif filename.endswith('.3gp'):
			sound = AudioSegment.from_file(filename, "3gp")
		elif filename.endswith('.3g'):
			sound = AudioSegment.from_file(filename, "3gp")

		sound = sound.set_frame_rate(samplerate)
		sound = sound.set_channels(1)
		sound = sound.set_sample_width(2)
		duration = sound.duration_seconds
	except:
		print("Couldn't load file")
		return None,None
		
		
	
	return sound,duration
github nyumaya / nyumaya_audio_recognition / python / test_accuracy.py View on Github external
def load_audio_file(filename,resize=False):
	sound = None
	try:
		if filename.endswith('.mp3') or filename.endswith('.MP3'):
			sound = AudioSegment.from_mp3(filename)
		elif filename.endswith('.wav') or filename.endswith('.WAV'):
			sound = AudioSegment.from_wav(filename)
		elif filename.endswith('.ogg'):
			sound = AudioSegment.from_ogg(filename)
		elif filename.endswith('.flac'):
			sound = AudioSegment.from_file(filename, "flac")
		elif filename.endswith('.3gp'):
			sound = AudioSegment.from_file(filename, "3gp")
		elif filename.endswith('.3g'):
			sound = AudioSegment.from_file(filename, "3gp")

		sound = sound.set_frame_rate(samplerate)
		sound = sound.set_channels(1)
		sound = sound.set_sample_width(2)
		duration = sound.duration_seconds
	except:
		print("Couldn't load file")
		return None,None
		
		
	
	return sound,duration
github avewells / audio-sentiment-analysis-pipeline / audio_sentiment_analysis / process_raw_data.py View on Github external
'''
    Attempts to split a call file into different segments each time the speaker changes using
    speaker diarization. This method assumes there are two speakers in the file (sales and customer)
    and will cut out dial tones and any receptionists before the two speakers' conversation.
    '''
    # set output directories
    no_rings_out_dir = os.path.join(out_loc, 'calls_no_ringtones')
    if not os.path.exists(no_rings_out_dir):
        os.makedirs(no_rings_out_dir)
    diarized_out_dir = os.path.join(out_loc, 'calls_split_by_speaker')
    if not os.path.exists(diarized_out_dir):
        os.makedirs(diarized_out_dir)

    # load in raw audio file
    print(call_file)
    raw_audio = AudioSegment.from_file(call_file, 'wav')
    file_name = os.path.splitext(os.path.basename(call_file))[0]

    # uses trained HMM to determine where the ringtones are and only use audio from after
    # last detected ring and exports intermediate file
    curr_path = os.path.dirname(os.path.realpath(__file__))
    ring_labels = aS.hmmSegmentation(call_file, os.path.join(curr_path, 'hmmRingDetect'), False)
    segs, flags = aS.flags2segs(ring_labels[0], 1.0) # 1.0 is the mid-term window step from above model
    no_rings_audio = raw_audio[segs[-1, 0]*1000:segs[-1, 1]*1000]
    temp_out_loc = os.path.join(no_rings_out_dir, file_name) + '.wav'
    no_rings_audio.export(temp_out_loc, format='wav')

    # split on speakers now setting num speakers to 2
    diarized = aS.speakerDiarization(temp_out_loc, 2, mtSize=0.5, mtStep=0.1)

    # determine which label was given to customer and salesperson
    cust = diarized[0]
github PatrickDuncan / cleansio / cleansio / audio / convert.py View on Github external
def read_and_convert_audio(file_path):
    """ Create a GCS AudioSegment from the file_path """
    audio_segment = AudioSegment.from_file(file_path)
    audio_segment            \
        .set_channels(1)     \
        .set_sample_width(2) \
        .set_frame_rate(__sample_rate(audio_segment))
    return audio_segment
github MaxStrange / AudioSegment / audiosegment.py View on Github external
def from_file(path):
    """
    Returns an AudioSegment object from the given file based on its file extension.
    If the extension is wrong, this will throw some sort of error.

    :param path: The path to the file, including the file extension.
    :returns: An AudioSegment instance from the file.
    """
    _name, ext = os.path.splitext(path)
    ext = ext.lower()[1:]
    seg = pydub.AudioSegment.from_file(path, ext)
    return AudioSegment(seg, path)
github tterb / yt2mp3 / yt-to-mp3.py View on Github external
def convertMP3(title, ext):
  audio = AudioSegment.from_file(title+'.'+ext, format=ext)
  audio.export(title+'.mp3', format='mp3')
github deltachat / deltabot / plugins / simplebot_mastodon / simplebot_mastodon / __init__.py View on Github external
def toot(cls, ctx, acc, visibility=None, in_reply_to=None):
        m = cls.get_session(acc)
        if ctx.msg.is_image() or ctx.msg.is_gif() or ctx.msg.is_video() or ctx.msg._view_type in (dc.const.DC_MSG_AUDIO, dc.const.DC_MSG_VOICE):
            if ctx.msg.filename.endswith('.aac'):
                aac_file = AudioSegment.from_file(ctx.msg.filename, 'aac')
                filename = ctx.msg.filename[:-4]+'.mp3'
                aac_file.export(filename, format='mp3')
            else:
                filename = ctx.msg.filename
            media = [m.media_post(filename).id]
            if in_reply_to:
                m.status_reply(m.status(in_reply_to), ctx.text,
                               media_ids=media, visibility=visibility)
            else:
                m.status_post(ctx.text, media_ids=media, visibility=visibility)
        elif ctx.text:
            if in_reply_to:
                m.status_reply(m.status(in_reply_to),
                               ctx.text, visibility=visibility)
            else:
                m.status_post(ctx.text, visibility=visibility)
github Vernacular-ai / kaldi-serve / kaldi_serve / app / server.py View on Github external
def transcribe(lang: str='en', model: str='tdnn'):
    """
    Transcribe audio
    """
    if request.method == "POST":
        try:
            f = request.files['file']
            filename = secure_filename(f.filename)
            wav_filename = os.path.join(app.config['UPLOAD_FOLDER'], filename)
            f.save(wav_filename)
            complete_audio = AudioSegment.from_file(wav_filename)
            chunks = split_on_silence(complete_audio, silence_thresh=-26, min_silence_len=500, keep_silence=500)
            chunks = chunks if len(chunks)>0 else [complete_audio]
        except:
            return jsonify(status='error', description="Unable to find 'file'")

        try:
            transcriptions = []
            for i, chunk in enumerate(chunks):
                chunk_filename = os.path.join(app.config['UPLOAD_FOLDER'], filename.strip(".wav")+"chunk"+str(i)+".wav")
                chunk.export(chunk_filename, format="wav")
                config_obj = config.config[lang][model]
                config_obj["wav_filename"] = chunk_filename
                transcription = inference.inference(config_obj)
                transcriptions.append(transcription)
        except:
            return jsonify(status='error', description="Wrong lang or model")
github antlarr / bard / bard / utils.py View on Github external
def calculateAudioTrackSHA256_pyav(path):
    data, properties = audioSamplesFromAudioFile(path)
    audioSha256sum = calculateSHA256_data(data)
    # print('size:', len(audio_segment.raw_data))
    if config['enable_internal_checks']:
        if hasattr(path, 'seek'):
            path.seek(0)
        audio_segment = AudioSegment.from_file(path)
        pydubAudioSha256sum = calculateSHA256_data(audio_segment.raw_data)
        if audio_segment.raw_data != data or \
                pydubAudioSha256sum != audioSha256sum:
            raise Exception('SHA256sum IS DIFFERENT BETWEEN PYAV AND PYDUB')
        print('pyav/pydub decode check ' +
              TerminalColors.Ok + 'OK' + TerminalColors.ENDC)
    return audioSha256sum, data, properties