url1='http://foto.nyk.ch/'
url2='http://foto.nyk.ch/?n=%d'
foto_dir='foto'
number='n'
import urllib,os,shutil,sys,logging
import os, shutil
from imgSeekLib import ImageDB,__version__
from imgSeekLib.Settings import Env
curdb=ImageDB.ImgDB(Env())
logging.basicConfig(level=logging.DEBUG,format='%(asctime)s %(message)s',filename='mapping.log')
logging.getLogger('').addHandler(logging.StreamHandler())
start=int(sys.argv[1])
logging.info( 'start = %d' % start )
for i in urllib.urlopen(url1).read().split('\n'):
if i.find(sep)>-1: li=i
last=int(li.split(sep)[1].split('&')[0])
c1,c2,c3=0,0,0
for i in range(start,last+1):
ofn='best%06d.jpg' % i
if os.path.isfile(ofn): c1+=1; continue
img=False
for l in urllib.urlopen(url2 % i).read().split('\n'):
if l.find('img src')>-1 and l.find(foto_dir)>-1: img=l.split('"')[1]
if not img: c3+=1; continue
try: img_data=urllib.urlopen(img).read()
except: c3+=1; continue
tfn='/tmp/query.jpg'
open(tfn,'w').write(img_data)
res=curdb.queryImage(tfn,1,1,removeFirst = 0)
os.unlink(tfn)
if not res: c2+=1; continue
rid,rsc=res[0]
if rsc<40: c2+=1; continue
rfn=curdb.img[rid][0]
shutil.copy(rfn,ofn)
logging.info( '%d mapped, %d not found, %d image error' % (c1,c2,c3) )