#!/usr/bin/python
# Finds original photos of scaled web images
url1='http://foto.nyk.ch/' # number of newest photo
url2='http://foto.nyk.ch/?n=%d' # photo number -> image url
foto_dir='foto' # directory on webserver
number='n' # "number" parameter name
#url1='http://dan.nyk.ch/cgi-bin/photo.cgi?gd=1'
#url2='http://dan.nyk.ch/cgi-bin/photo.cgi?nb=%d'
#foto_dir='photos'
#number='nb'

import urllib,os,shutil,sys,logging
import os, shutil
from imgSeekLib import ImageDB,__version__
from imgSeekLib.Settings import Env
curdb=ImageDB.ImgDB(Env())
logging.basicConfig(level=logging.DEBUG,format='%(asctime)s %(message)s',filename='mapping.log')
logging.getLogger('').addHandler(logging.StreamHandler())

start=int(sys.argv[1])
logging.info( 'start = %d' % start )

for i in urllib.urlopen(url1).read().split('\n'):
 if i.find(sep)>-1: li=i
last=int(li.split(sep)[1].split('&')[0])
c1,c2,c3=0,0,0
for i in range(start,last+1):
 ofn='best%06d.jpg' % i
 if os.path.isfile(ofn): c1+=1; continue
 img=False
 for l in urllib.urlopen(url2 % i).read().split('\n'):
  if l.find('img src')>-1 and l.find(foto_dir)>-1: img=l.split('"')[1]
 if not img: c3+=1; continue
 try: img_data=urllib.urlopen(img).read()
 except: c3+=1; continue
 tfn='/tmp/query.jpg'
 open(tfn,'w').write(img_data)
 res=curdb.queryImage(tfn,1,1,removeFirst = 0)
 os.unlink(tfn)
 if not res: c2+=1; continue
 rid,rsc=res[0]
 if rsc<40: c2+=1; continue
 rfn=curdb.img[rid][0]
 shutil.copy(rfn,ofn)
logging.info( '%d mapped, %d not found, %d image error' % (c1,c2,c3) )