#!/usr/bin/python
# Finds groups of PCM sound peaks using a Kohonen SOM with quantisized wavelet transform as input vector
# Nick Fankhauser, 2009
import psyco,os,kohoWrap,pywt
psyco.full()
quant=200 # number of quantisized transformed values 
cycs=1000 # training cycles
dim1=10 # x map size
dim2=10 # y map size
learnrate=0.001
play='<img src="play.gif" width="20" height="20" alt="play" style="border:none"/>'

def toLittleEndianDWORD(i): return ''.join(map(chr,[x&0xff for x in [i,i>>8,i>>16,i>>24]]))
def fromLittleEndianDWORD(s): return ord(s[0]) + (ord(s[1])<<8) + (ord(s[2])<<16) + (ord(s[3])<<24)
def fromLittleEndianWORD(s): return ord(s[0]) + (ord(s[1])<<8)
get_area=lambda x : float(x.split('.')[0].split('_')[-1])

def fft_vect(fn):
 wav=open(fn).read()
 format=fromLittleEndianWORD(wav[20:22])
 assert format==1, 'Only PCM'
 channels=fromLittleEndianWORD(wav[22:24])
 assert channels==1, 'Only mono'
 data_pos=wav.find('data') + 8 # start of wave data
 samp2wav=lambda x : int(x) * 2 + data_pos
 samplerate=fromLittleEndianDWORD(wav[24:28]) # samples per second
 header=wav[:data_pos-4] # WAV header
 data_len=len(wav)-data_pos
 total=data_len/2/samplerate
 ints=[]
 for n in range(data_len/2):
  p=data_pos + n*2
  ints.append(fromLittleEndianWORD(wav[p:p+2]))
 num_samp=len(ints)
 print '%d samples loaded from %s' % (num_samp,fn)
 (cA, cD) = pywt.dwt(ints, 'db1')
 d={}
 ff=cA
 mn=min(ff)
 mx=max(ff)
 df=mx-mn
 ffn=map(lambda x : (x-mn)/df,ff)
 ffn2=map(lambda x: int(x*quant),ffn)
 for i in ffn2: d[i]=d.get(i,0)+1
 rl=[]
 for i in range(quant+1):
  if d.has_key(i): rl.append(float(d[i])/num_samp)
  else: rl.append(0)
 return rl

inlist,fnlist=[],[]
mx_area=max(map(get_area,filter(lambda x:x.find('wav')>-1,os.listdir('.'))))
for f in os.listdir('.'):
 if not f[-3:]=='wav': continue
 fnlist.append(f+'.mp3')
 inlist.append(fft_vect(f))
kmp=kohoWrap.KohTrain(inlist,cycs,dim1,dim2,learnrate)
pd={}
for n,(x,y) in enumerate(zip(kmp.x[-1],kmp.y[-1])): pd[(x,y)]=pd.get((x,y),[])+[fnlist[n]]
h='<table border="1">\n'
for x in range(dim1+2):
 h+='<tr>\n'
 for y in range(dim2+2):
  k=(x,y)
  if pd.has_key(k): 
   links=map(lambda x : '<a href="%s">%s</a>' % (x,play),pd[k])
   h+='<td>%s</td>\n' % ' '.join(links)
  else: h+='<td>&nbsp;</td>\n'
 h+='</tr>\n'
h+='</table>\n'
ofn='km_c%d.html' % cycs
open(ofn,'w').write(h)
print '%s written' % ofn