#!/usr/bin/python
# Finds and saves peaks in WAV (PCM wave audio) files
import sys,os,numpy,string,psyco
psyco.full()

window_duration=3 # window when scanning for peaks [seconds]
scan_wind=50 # window when scanning for peak width [samples]
thresh_fact=1.6 # threshold factor [fraction of median]

def get_nb(s):
 rs=''
 for c in s:
  if c in string.digits: rs+=c
 return rs

def toLittleEndianDWORD(i): return ''.join(map(chr,[x&0xff for x in [i,i>>8,i>>16,i>>24]]))
def fromLittleEndianDWORD(s): return ord(s[0]) + (ord(s[1])<<8) + (ord(s[2])<<16) + (ord(s[3])<<24)
def fromLittleEndianWORD(s): return ord(s[0]) + (ord(s[1])<<8)

fn=sys.argv[1]
fn_time=int(get_nb(os.path.basename(fn)))
wav=open(fn).read()
# checks
format=fromLittleEndianWORD(wav[20:22])
assert format==1, 'Only PCM'
channels=fromLittleEndianWORD(wav[22:24])
assert channels==1, 'Only mono'
data_pos=wav.find('data') + 8 # start of wave data
samp2wav=lambda x : int(x) * 2 + data_pos
samplerate=fromLittleEndianDWORD(wav[24:28]) # samples per second
window=samplerate*window_duration # window size in samples
header=wav[:data_pos-4] # WAV header
data_len=len(wav)-data_pos
total=data_len/2/samplerate
print '%s: %2.1f MB -> %d min, %d sec' % (fn,data_len/1024.0/1024.0,total/60,total%60)
ints=[]
for n in range(data_len/2):
 p=data_pos + n*2
 ints.append(fromLittleEndianWORD(wav[p:p+2]))
num_samp=len(ints)
num_areas=num_samp/window
areas=[]
for w in range(num_areas):
 area=0
 for i in range(window): area+=ints[w*window+i]
 areas.append(area/window)
median=numpy.median(areas)
thresh=int(median*thresh_fact)
ends=[]
for n,a in enumerate(areas):
 if a<thresh: continue
 start=n*window
 end=(n+1)*window
 iv=ints[start:end]
 mxp=iv.index(max(iv))
 last=0
 for i1 in range(0,start+mxp,scan_wind):
  current=int(numpy.mean(ints[start+mxp-i1-scan_wind:start+mxp-i1]))
  if current==last: break
  last=current
 for i2 in range(0,num_samp-start+mxp,scan_wind):
  current=int(numpy.mean(ints[start+mxp+i2:start+mxp+i2+scan_wind]))
  if current==last: break
  last=current
 p1=mxp-i1 + n*window
 p2=mxp+i2 + n*window
 overlap=False
 for e in ends: 
  if p1<e: overlap=True
 if overlap: continue
 ends.append(p2)
 print '-> Peak %2.2f - %2.2f' % (float(p1)/samplerate,float(p2)/samplerate)
 data=wav[samp2wav(p1):samp2wav(p2)]
 lstr=toLittleEndianDWORD(len(data))
 pfn='peak_%d_%d.wav' % (fn_time+n*window_duration,a)
 open(pfn,'w').write(header + lstr + data)
 os.system('lame --silent %s' % pfn)
 os.unlink(pfn)