#!/usr/bin/python
# Finds original photos of scaled web images
import urllib,Image,StringIO,os,shutil,sys,logging,psyco
psyco.full()
quant=20 # quantization (10 = slow, >20 = fast but possibly inaccurate)
url1='http://foto.nyk.ch/' # number of newest photo
url2='http://foto.nyk.ch/?n=%d' # photo number -> image url
foto_dir='foto' # directory on webserver
number='n' # "number" parameter name
data_dir='/mnt/max/foto/years' # local photos sorted in year (YYYY) directoires
#url1='http://dan.nyk.ch/cgi-bin/photo.cgi?gd=1'
#url2='http://dan.nyk.ch/cgi-bin/photo.cgi?nb=%d'
#foto_dir='photos'
#number='nb'
logging.basicConfig(level=logging.DEBUG,format='%(asctime)s %(message)s',filename='mapping.log')
logging.getLogger('').addHandler(logging.StreamHandler())

glob=False
start=1
if len(sys.argv)>1:
 if sys.argv[1]=='global':  glob=True
 else: start=int(sys.argv[1])
logging.info( 'Global = %s, quant = %d, start = %d' % (glob,quant,start) )

def scanDir(d):
 if not os.path.isdir(d): return []
 rl=[]
 for fn in os.listdir(d):
  if os.path.isdir(d+fn): rl+=scanDir(d+fn+'/')
  else: rl.append(d+fn)
 return rl

def fingerprint(f,sx2=0,sy2=0):
 try: im = Image.open(f)
 except: return
 sx,sy=im.size
 if not sx2: sx2,sy2=sx/quant,sy/quant
 im2=im.resize((sx2,sy2))
 return im2

def jheadTime(fn):
 dstr='Date/Time    : '  # search string to find date in jhead output
 dList=filter( lambda x: x.find(dstr)==0, os.popen('jhead '+fn).readlines() )
 if dList: 
  tp=reduce(lambda x,y:x+y,map(lambda x:x.split(':'),':'.join(dList[0].split(':')[1:]).strip().split()))
  return tp
 return ['0']*6

sep='?'+number+'='
for i in urllib.urlopen(url1).read().split('\n'):
 if i.find(sep)>-1: li=i
last=int(li.split(sep)[1].split('&')[0])
cc=lambda (x,y,z) : (x/quant,y/quant,z/quant)
pix=lambda im,x,y : cc(im.getpixel((x,y)))
cache,c1,c2,c3,lastyear={},0,0,0,0
for i in range(start,last+1):
 ofn='best%06d.jpg' % i
 if os.path.isfile(ofn): c1+=1; continue
 img=False
 for l in urllib.urlopen(url2 % i).read().split('\n'):
  if l.find('img src')>-1 and l.find(foto_dir)>-1: img=l.split('"')[1]
 if not img: continue
 year=int(img.split(foto_dir+'/')[1].split('/')[0])
 if year!=lastyear: cache={}
 lastyear=year
 img_data=urllib.urlopen(img).read()
 fp=fingerprint(StringIO.StringIO(img_data))
 if not fp: c3+=1; continue
 if cache: 
  memo=len(cache) * (len(cache.keys()[0]) + cache.values()[0].size[0] * cache.values()[0].size[1])
  memo=float(memo)/1024.0/1024.0
 else: memo=0
 logging.info( '%d, %s, %dx%dx%d, %2.1f MB cache' % (i,img,fp.size[0],fp.size[1],256/quant,memo) )
 d='%s/%d/' % (data_dir,year)
 if glob: d=data_dir + '/'
 files=scanDir(d)
 # search by unique filename
 bfn=os.path.basename(img).lower().replace('%20',' ')
 bfiles=map(lambda x : os.path.basename(x).lower(),files)
 if bfiles.count(bfn)==1: # found by unique filename
  mxf=files[bfiles.index(bfn)]
  logging.info( '%d -=> %s (%2.1f%%)' % (i,mxf,100) )
  shutil.copy(mxf,ofn)
  c1+=1
  continue
 # search by timestamp
 if bfn.count('-')==4 and bfn.count('_')==1:
  bfn2=os.path.splitext(bfn)[0]
  ds=bfn2.split('_')[0].split('-')
  ts=bfn2.split('_')[1].split('-')
  d='%s/%d/%02d/%02d.%02d.%d/' % (data_dir,int(ds[0]),int(ds[1]),int(ds[2]),int(ds[1]),int(ds[0]))
  print d
  for f in scanDir(d):
   tt=jheadTime(f)
   deq=int(tt[0])==int(ds[0]) and int(tt[1])==int(ds[1]) and int(tt[2])==int(ds[2])
   teq=int(tt[3])==int(ts[0]) and int(tt[4])==int(ts[1]) and int(tt[5])==int(ts[2])
   if deq and teq: # found by timestamp
    logging.info( '%d --> %s (%2.1f%%)' % (i,f,100) )
    shutil.copy(f,ofn)
    c1+=1
    continue
 # searching by fingerprint
 mxp,mxf=0,False
 for f in files:
  if f.lower().find('.jpg')==-1: continue
  key=f+str(fp.size)
  if cache.has_key(key): fi=cache[key]
  else: 
   fi=fingerprint(f,sx2=fp.size[0],sy2=fp.size[1])
   if not fi: continue
   cache[key]=fi
  if fi.size!=fp.size: continue
  bad=0
  for x in range(fp.size[0]):
   for y in range(fp.size[1]):
    if pix(fi,x,y)!=pix(fp,x,y): bad+=1
  prz=100-float(bad)/float(fp.size[0]*fp.size[1])*100
  if prz>mxp: mxp=prz; mxf=f
 if not mxf or mxp<15.0: 
  logging.info('==> Not found!')
  c2+=1
  continue
 # found by fingerprint
 logging.info( '%d -> %s (%2.1f%%)' % (i,mxf,mxp) )
 shutil.copy(mxf,ofn)
 c1+=1
logging.info( '%d mapped, %d not found, %d image error' % (c1,c2,c3) )