scipy.cluster kmeans analisis de voz
joolivar en ing.uchile.cl
joolivar en ing.uchile.cl
Mie Mar 12 16:19:19 CET 2008
Saludos a todos
Tengo que realizar una análisis de voz para un pequeño diccionario,
las palabras son (anterior, siguiente). La idea es poder adelantar y
retroceder por voz las paginas de impress (openppt). Para ello he
realizado las siguientes rutinas.
El problema es que el error de distorsión para todas las palabras es
casi igual
por lo que no puedo diferenciar entre ellas.
Me gustaría saber si alguien ha usado kmeans y fft de scilab para
corroborar que
realmente funciona ya que si por ejemplo genero el codebook para la
palabra 'anterior' y luego uso vq para encontrar el error de
distorsión este entrega un valor de 480 que parece exagerado. Junto a
esto otras palabras entregan casi igual valor.
Cualquier ayuda se agradece.
Pd:Para la parte de impress pienso usar UNO con python
-----------------------para el codebook--------------------
from wav_array import wavread
from scipy.fftpack import fft,rfft
from math import log
from scipy.cluster.vq import whiten,kmeans
from scipy import hamming
import pickle
def leer_book(nombre):
f=open(nombre,'r')
book=pickle.load(f)
f.close()
return book
def guardar_book(archivo,datos):
f=open(archivo,'w')
pickle.dump(datos,f)
def ventanas_wav(archivo_wav):
[data,Fs,Bits]=wavread(wv)
N=len(data)
V=256
M=N/V
IN=0
OUT=V-1
F=[]
C=[]
for i in range(M-V):
ft=abs(fft(data[IN:OUT]*hamming(V-1)))
F.append(ft)
C.append(rfft(map(lambda x: log(x), ft)))
IN=IN+V
OUT=OUT+V
return whiten(F),whiten(C)
print '\n----Inicio----'
wv='/home/joolivar/Desktop/Nueva/voz/muestras/anterior1.wav'
[F,C]=ventanas_wav(wv)
CC='CFT_book.pdb'
FF='FFT_book.pdb'
CFTB=leer_book(CC)
print CFTB[0]
FFTB=leer_book(FF)
CDBKF=kmeans(F, FFTB[0], iter=80, thresh=1e-5)
CDBKC=kmeans(C, CFTB[0], iter=80, thresh=1e-5)
print 'guardando datos'
guardar_book('FFT_book.pdb',CDBKF)
guardar_book('CFT_book.pdb',CDBKC)
print '----FIN----'
------------------para el análisis--------------
import pickle,commands,string
from scipy.cluster.vq import whiten,vq
from wav_array import wavread
from scipy.fftpack import fft,fftshift,rfft
from math import log,sqrt
from pylab import hamming,fromstring, Int16, UInt8,
clip,plot,xlabel,ylabel,title,grid,savefig,show,subplot,figure
def ceptrums(datos):
N=len(data)
V=256
M=N/V
IN=0
OUT=V-1
F=[]
C=[]
for i in range(M-V):
ft=abs(fft(datos[IN:OUT]*hamming(V-1)))
F.append(ft)
C.append((rfft(map(lambda x: log(x), ft))))
IN=IN+V
OUT=OUT+V
return whiten(F),whiten(C)
def leer_book(nombre):
f=open(nombre,'r')
book=pickle.load(f)
f.close()
return book
def leer_palabra(nombre):
[data,Fs,Bits]=wavread(nombre)
return data
print '\n+++Inicio+++\n'
C='CFT_book.pdb'
F='FFT_book.pdb'
CFTB=leer_book(C)
FFTB=leer_book(F)
lista=string.split(commands.getoutput('ls
/home/joolivar/Desktop/Nueva/voz/muestras/*.wav'),'\n')
i=0
CL=[]
figure(1)
for list in lista:
i=i+1
##nombre=string.split(list,'muestras/')[1][:-3]+'png'
data=leer_palabra(list)
[FF,CC]=ceptrums(data)
[INF,VQF]=vq(FF,CFTB[0])
[INC,VQC]=vq(CC,FFTB[0])
subplot(810+i)
plot(INF,'x')
subplot(820+i)
plot(INC,'x')
print sum(VQF)/len(VQF),sum(VQC)/len(VQC)
show()
print '++++fin++++'
------------wav_array---------
#!/usr/bin/python
# -*- coding: UTF-8 -*-
#
# Copyright (C) 2006 by Hernán Ordiales
# <audiocode en uint8.com.ar>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
try:
import wave
from sys import exit
import math
import array
from pylab import fromstring, Int16, UInt8,
clip,plot,xlabel,ylabel,title,grid,savefig,show
from FFT import fft, inverse_fft
from numpy.fft import fftshift
except:
print 'libraries import error!'
sys.exit()
# Note: at the moment only mono wav files
# Example: [ y, Fs, bits ] = wavread( 'filename' )
# Note: Only supports 8 and 16 bits wav files
def wavread( name ):
file = wave.open( name, 'r' )
[Channels,Bytes,Fs,Frames,Compress,CompressName] = file.getparams() #
(nchannels, sampwidth in bytes, sampling frequency, nframes, comptype,
compname)
Data = file.readframes( Frames )
Bits = Bytes*8
if Bits==16: # 16 bits per sample
Data = fromstring( Data, Int16 ) / 32767.0 # -1..1 values, Int16
because Bits=2x8=16
elif Bits==8: # 8 bits per sample
Data = (fromstring( Data, UInt8 ) / 128.0 ) - 1.0 # -1..1 values
else:
print "Error. Sorry, this wavread function only supports 8 or 16
bits wav files."
return -1, -1, -1
file.close()
## print "Fs: ",Fs,"\nBits: ",Bits,"\nChannels: ",Channels
return Data, Fs, Bits
# Example: wavwrite( y, Fs, filename )
def wavwrite( data_array, Fs, name ):
file = wave.open( name, 'w' )
file.setframerate( Fs ) # sets sampling frequency
file.setnchannels( 1 ) # sets number of channels
file.setsampwidth( 2 ) # number of bytes: 16bits/8=2, 16 bits per sample
clipped = False
block_size = 1024*10 # write block size: 10k
a_max = 32767 # max amp
a_min = -32767 # min amp
n = 0
len_data_array = len( data_array ) # 2 bytes (int16) data
while n < len_data_array :
frame = '' # string frame of 'block_size'
for i in range( block_size ) :
if n < len_data_array :
twodatabytes = int( data_array[n] * a_max )
if twodatabytes > a_max or twodatabytes < a_min : clipped = True
twodatabytes = min( max(twodatabytes,a_min), a_max ) #
normalization, -32767..32767
#twodatabytes.clip( min=a_min, max=a_max ) # normalization, -32767..32767
frame += chr( twodatabytes & 0xFF ) # takes first byte, converts
it to char and adds it to the frame
frame += chr( (twodatabytes >> 8) & 0xFF ) # takes the second byte
n += 1
file.writeframes( frame )
if clipped == True : print "Warning: Some values were clipped"
print "Final length:", len_data_array/512,"kb" # n*2/1024 (bytes
size/1024) = n/512
file.close()
# Example: wavwrite8bits( y, Fs, filename )
def wavwrite8bits( data_array, Fs, name ):
file = wave.open( name, 'w' )
file.setframerate( Fs ) # sets sampling frequency
file.setnchannels( 1 ) # sets number of channels
file.setsampwidth( 1 ) # number of bytes, 8 bits per sample
clipped = False
block_size = 1024*10 # write block size: 10k
a_max = 255 # max amp
a_min = 0 # min amp
n = 0
len_data_array = len( data_array ) # 1 byte (UInt8) data
while n < len_data_array :
frame = '' # string frame of 'block_size'
for i in range( block_size ) :
if n < len_data_array :
newbyte = int( (data_array[n]+1.0) * 128 ) # ~ 255/2
if newbyte > a_max or newbyte < a_min : clipped = True
newbyte = min( max(newbyte,a_min), a_max ) # normalization, 0..255
#newbyte.clip( min=a_min, max=a_max ) # normalization, 0..255
frame += chr( newbyte & 0xFF ) # takes the byte, converts it to
char and adds it to the frame
n += 1
file.writeframes( frame )
if clipped == True : print "Warning: Some values were clipped"
print "Final length:", len_data_array/512,"kb" # n*2/1024 (bytes
size/1024) = n/512
file.close()
----------------------------------------------------------------
This message was sent using IMP, the Internet Messaging Program.
_______________________________________________
Lista de correo Python-es
http://listas.aditel.org/listinfo/python-es
FAQ: http://listas.aditel.org/faqpyes
Más información sobre la lista de distribución Python-es