import psyco,os,kohoWrap,pywt
psyco.full()
quant=200
cycs=1000
dim1=10
dim2=10
learnrate=0.001
play='<img src="play.gif" width="20" height="20" alt="play" style="border:none"/>'
def toLittleEndianDWORD(i): return ''.join(map(chr,[x&0xff for x in [i,i>>8,i>>16,i>>24]]))
def fromLittleEndianDWORD(s): return ord(s[0]) + (ord(s[1])<<8) + (ord(s[2])<<16) + (ord(s[3])<<24)
def fromLittleEndianWORD(s): return ord(s[0]) + (ord(s[1])<<8)
get_area=lambda x : float(x.split('.')[0].split('_')[-1])
def fft_vect(fn):
wav=open(fn).read()
format=fromLittleEndianWORD(wav[20:22])
assert format==1, 'Only PCM'
channels=fromLittleEndianWORD(wav[22:24])
assert channels==1, 'Only mono'
data_pos=wav.find('data') + 8
samp2wav=lambda x : int(x) * 2 + data_pos
samplerate=fromLittleEndianDWORD(wav[24:28])
header=wav[:data_pos-4]
data_len=len(wav)-data_pos
total=data_len/2/samplerate
ints=[]
for n in range(data_len/2):
p=data_pos + n*2
ints.append(fromLittleEndianWORD(wav[p:p+2]))
num_samp=len(ints)
print '%d samples loaded from %s' % (num_samp,fn)
(cA, cD) = pywt.dwt(ints, 'db1')
d={}
ff=cA
mn=min(ff)
mx=max(ff)
df=mx-mn
ffn=map(lambda x : (x-mn)/df,ff)
ffn2=map(lambda x: int(x*quant),ffn)
for i in ffn2: d[i]=d.get(i,0)+1
rl=[]
for i in range(quant+1):
if d.has_key(i): rl.append(float(d[i])/num_samp)
else: rl.append(0)
return rl
inlist,fnlist=[],[]
mx_area=max(map(get_area,filter(lambda x:x.find('wav')>-1,os.listdir('.'))))
for f in os.listdir('.'):
if not f[-3:]=='wav': continue
fnlist.append(f+'.mp3')
inlist.append(fft_vect(f))
kmp=kohoWrap.KohTrain(inlist,cycs,dim1,dim2,learnrate)
pd={}
for n,(x,y) in enumerate(zip(kmp.x[-1],kmp.y[-1])): pd[(x,y)]=pd.get((x,y),[])+[fnlist[n]]
h='<table border="1">\n'
for x in range(dim1+2):
h+='<tr>\n'
for y in range(dim2+2):
k=(x,y)
if pd.has_key(k):
links=map(lambda x : '<a href="%s">%s</a>' % (x,play),pd[k])
h+='<td>%s</td>\n' % ' '.join(links)
else: h+='<td> </td>\n'
h+='</tr>\n'
h+='</table>\n'
ofn='km_c%d.html' % cycs
open(ofn,'w').write(h)
print '%s written' % ofn