import urllib,Image,StringIO,os,shutil,sys,logging,psyco
psyco.full()
quant=20
url1='http://foto.nyk.ch/'
url2='http://foto.nyk.ch/?n=%d'
foto_dir='foto'
number='n'
data_dir='/mnt/max/foto/years'
logging.basicConfig(level=logging.DEBUG,format='%(asctime)s %(message)s',filename='mapping.log')
logging.getLogger('').addHandler(logging.StreamHandler())
glob=False
start=1
if len(sys.argv)>1:
if sys.argv[1]=='global': glob=True
else: start=int(sys.argv[1])
logging.info( 'Global = %s, quant = %d, start = %d' % (glob,quant,start) )
def scanDir(d):
if not os.path.isdir(d): return []
rl=[]
for fn in os.listdir(d):
if os.path.isdir(d+fn): rl+=scanDir(d+fn+'/')
else: rl.append(d+fn)
return rl
def fingerprint(f,sx2=0,sy2=0):
try: im = Image.open(f)
except: return
sx,sy=im.size
if not sx2: sx2,sy2=sx/quant,sy/quant
im2=im.resize((sx2,sy2))
return im2
def jheadTime(fn):
dstr='Date/Time : '
dList=filter( lambda x: x.find(dstr)==0, os.popen('jhead '+fn).readlines() )
if dList:
tp=reduce(lambda x,y:x+y,map(lambda x:x.split(':'),':'.join(dList[0].split(':')[1:]).strip().split()))
return tp
return ['0']*6
sep='?'+number+'='
for i in urllib.urlopen(url1).read().split('\n'):
if i.find(sep)>-1: li=i
last=int(li.split(sep)[1].split('&')[0])
cc=lambda (x,y,z) : (x/quant,y/quant,z/quant)
pix=lambda im,x,y : cc(im.getpixel((x,y)))
cache,c1,c2,c3,lastyear={},0,0,0,0
for i in range(start,last+1):
ofn='best%06d.jpg' % i
if os.path.isfile(ofn): c1+=1; continue
img=False
for l in urllib.urlopen(url2 % i).read().split('\n'):
if l.find('img src')>-1 and l.find(foto_dir)>-1: img=l.split('"')[1]
if not img: continue
year=int(img.split(foto_dir+'/')[1].split('/')[0])
if year!=lastyear: cache={}
lastyear=year
img_data=urllib.urlopen(img).read()
fp=fingerprint(StringIO.StringIO(img_data))
if not fp: c3+=1; continue
if cache:
memo=len(cache) * (len(cache.keys()[0]) + cache.values()[0].size[0] * cache.values()[0].size[1])
memo=float(memo)/1024.0/1024.0
else: memo=0
logging.info( '%d, %s, %dx%dx%d, %2.1f MB cache' % (i,img,fp.size[0],fp.size[1],256/quant,memo) )
d='%s/%d/' % (data_dir,year)
if glob: d=data_dir + '/'
files=scanDir(d)
bfn=os.path.basename(img).lower().replace('%20',' ')
bfiles=map(lambda x : os.path.basename(x).lower(),files)
if bfiles.count(bfn)==1:
mxf=files[bfiles.index(bfn)]
logging.info( '%d -=> %s (%2.1f%%)' % (i,mxf,100) )
shutil.copy(mxf,ofn)
c1+=1
continue
if bfn.count('-')==4 and bfn.count('_')==1:
bfn2=os.path.splitext(bfn)[0]
ds=bfn2.split('_')[0].split('-')
ts=bfn2.split('_')[1].split('-')
d='%s/%d/%02d/%02d.%02d.%d/' % (data_dir,int(ds[0]),int(ds[1]),int(ds[2]),int(ds[1]),int(ds[0]))
print d
for f in scanDir(d):
tt=jheadTime(f)
deq=int(tt[0])==int(ds[0]) and int(tt[1])==int(ds[1]) and int(tt[2])==int(ds[2])
teq=int(tt[3])==int(ts[0]) and int(tt[4])==int(ts[1]) and int(tt[5])==int(ts[2])
if deq and teq:
logging.info( '%d --> %s (%2.1f%%)' % (i,f,100) )
shutil.copy(f,ofn)
c1+=1
continue
mxp,mxf=0,False
for f in files:
if f.lower().find('.jpg')==-1: continue
key=f+str(fp.size)
if cache.has_key(key): fi=cache[key]
else:
fi=fingerprint(f,sx2=fp.size[0],sy2=fp.size[1])
if not fi: continue
cache[key]=fi
if fi.size!=fp.size: continue
bad=0
for x in range(fp.size[0]):
for y in range(fp.size[1]):
if pix(fi,x,y)!=pix(fp,x,y): bad+=1
prz=100-float(bad)/float(fp.size[0]*fp.size[1])*100
if prz>mxp: mxp=prz; mxf=f
if not mxf or mxp<15.0:
logging.info('==> Not found!')
c2+=1
continue
logging.info( '%d -> %s (%2.1f%%)' % (i,mxf,mxp) )
shutil.copy(mxf,ofn)
c1+=1
logging.info( '%d mapped, %d not found, %d image error' % (c1,c2,c3) )