Ich habe ffmpeg heruntergeladen. < /P>
Code: Select all
# Import necessary libraries
from pydub import AudioSegment
import speech_recognition as sr
import os
import pydub
chunk_count = 0
directory = os.fsencode(r'C:\Users\zach.blair\Downloads\speechRecognition\New folder')
# Text file to write the recognized audio
fh = open("recognized.txt", "w+")
for file in os.listdir(directory):
filename = os.fsdecode(file)
if filename.endswith(".wav"):
chunk_count += 1
# Input audio file to be sliced
audio = AudioSegment.from_file(filename,format="wav")
'''
Step #1 - Slicing the audio file into smaller chunks.
'''
# Length of the audiofile in milliseconds
n = len(audio)
# Variable to count the number of sliced chunks
counter = 1
# Interval length at which to slice the audio file.
interval = 20 * 1000
# Length of audio to overlap.
overlap = 1 * 1000
# Initialize start and end seconds to 0
start = 0
end = 0
# Flag to keep track of end of file.
# When audio reaches its end, flag is set to 1 and we break
flag = 0
# Iterate from 0 to end of the file,
# with increment = interval
for i in range(0, 2 * n, interval):
# During first iteration,
# start is 0, end is the interval
if i == 0:
start = 0
end = interval
# All other iterations,
# start is the previous end - overlap
# end becomes end + interval
else:
start = end - overlap
end = start + interval
# When end becomes greater than the file length,
# end is set to the file length
# flag is set to 1 to indicate break.
if end >= n:
end = n
flag = 1
# Storing audio file from the defined start to end
chunk = audio[start:end]
# Filename / Path to store the sliced audio
filename = str(chunk_count)+'chunk'+str(counter)+'.wav'
# Store the sliced audio file to the defined path
chunk.export(filename, format ="wav")
# Print information about the current chunk
print(str(chunk_count)+str(counter)+". Start = "
+str(start)+" end = "+str(end))
# Increment counter for the next chunk
counter = counter + 1
AUDIO_FILE = filename
# Initialize the recognizer
r = sr.Recognizer()
# Traverse the audio file and listen to the audio
with sr.AudioFile(AUDIO_FILE) as source:
audio_listened = r.listen(source)
# Try to recognize the listened audio
# And catch expections.
try:
rec = r.recognize_google(audio_listened)
# If recognized, write into the file.
fh.write(rec+" ")
# If google could not understand the audio
except sr.UnknownValueError:
print("Empty Value")
# If the results cannot be requested from Google.
# Probably an internet connection error.
except sr.RequestError as e:
print("Could not request results.")
# Check for flag.
# If flag is 1, end of the whole audio reached.
# Close the file and break.
fh.close()
Code: Select all
Traceback (most recent call last):
File "C:\Users\zach.blair\Downloads\speechRecognition\New folder\speechRecognition3.py", line 17, in
audio = AudioSegment.from_file(filename,format="wav")
File "C:\Users\zach.blair\AppData\Local\Programs\Python\Python37-32\lib\site-packages\pydub\audio_segment.py", line 704, in from_file
p.returncode, p_err))
pydub.exceptions.CouldntDecodeError: Decoding failed. ffmpeg returned error code: 1
Code: Select all
ffmpeg version N-95027-g8c90bb8ebb Copyright (c) 2000-2019 the FFmpeg developers
built with gcc 9.2.1 (GCC) 20190918
configuration: --enable-gpl --enable-version3 --enable-sdl2 --enable-fontconfig --enable-gnutls --enable-iconv --enable-libass --enable-libdav1d --enable-libbluray --enable-libfreetype --enable-libmp3lame --enable-libopencore-amrnb --enable-libopencore-amrwb --enable-libopenjpeg --enable-libopus --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libtheora --enable-libtwolame --enable-libvpx --enable-libwavpack --enable-libwebp --enable-libx264 --enable-libx265 --enable-libxml2 --enable-libzimg --enable-lzma --enable-zlib --enable-gmp --enable-libvidstab --enable-libvorbis --enable-libvo-amrwbenc --enable-libmysofa --enable-libspeex --enable-libxvid --enable-libaom --enable-libmfx --enable-ffnvcodec --enable-cuvid --enable-d3d11va --enable-nvenc --enable-nvdec --enable-dxva2 --enable-avisynth --enable-libopenmpt --enable-amf
libavutil 56. 35.100 / 56. 35.100
libavcodec 58. 58.101 / 58. 58.101
libavformat 58. 33.100 / 58. 33.100
libavdevice 58. 9.100 / 58. 9.100
libavfilter 7. 58.102 / 7. 58.102
libswscale 5. 6.100 / 5. 6.100
libswresample 3. 6.100 / 3. 6.100
libpostproc 55. 6.100 / 55. 6.100
Guessed Channel Layout for Input Stream #0.0 : mono
Input #0, wav, from '2a.wav.wav':
Duration: 00:09:52.95, bitrate: 64 kb/s
Stream #0:0: Audio: pcm_mulaw ([7][0][0][0] / 0x0007), 8000 Hz, mono, s16, 64 kb/s
Stream mapping:
Stream #0:0 -> #0:0 (pcm_mulaw (native) -> pcm_s8 (native))
Press [q] to stop, [?] for help
[wav @ 0000024307974400] pcm_s8 codec not supported in WAVE format
Could not write header for output file #0 (incorrect codec parameters ?): Function not implemented
Error initializing output stream 0:0 --
Conversion failed!