#!/usr/bin/env python

# Igglon hakutuloslistan parseri
# Tuottaa XML-tiedoston, joka kertoo mm. Igglossa myytävien asuntojen koordinaatit
# Osma Suominen 28.3.2006 <ozone@iki.fi>
#    päivitetty 19.1.2007:
#	- kaupungin valinta koodilla (HKI), vapaateksti ei enää toimi
#	- osoitteiden parsiminen toimii taas
#
# TODO: järkevämmän HTML-parserin (esim Beautiful Soup) käyttäminen
#
# Public domain: saa käyttää vapaasti mihin huvittaa

import cgi
import cgitb; cgitb.enable()
import urllib
import HTMLParser
from urlparse import urlparse, urljoin

# Parameters for coordinate conversion from Igglo's x/y coordinate pairs to
# WGS84 latitude/longitude pairs. These are only approximate!
dN_dy = 8.798502382575044e-06	# dN/dy
dE_dx = 2.0025510204081788e-05	# dE/dx
baseN = 1.4409053793612756
baseE = -42.862012611225019

# Coordinate conversion functions

def to_lat(y):
  """convert from y coordinate to WGS84 latitude"""
  return baseN + (int(y) * dN_dy)

def to_lng(x):
  """convert from x coordinate to WGS84 longitude"""
  return baseE + (int(x) * dE_dx)

class V:
  def __init__(self, val):
    self.value = val

# Parse CGI parameters and pass them on to the Igloo backend
form = cgi.FieldStorage()
if not form.has_key('fromwhere'):
  form = { 'fromwhere': V('HKI'), 'minsize': V(50), 'maxsize': V(75), 'minprice': V(1000), 'maxprice': V(150000) }

URL = "http://www.igglo.fi/search.php?freetext=&strict_location[city]=%s&min[total_price]=%s&max[total_price]=%s&min[rooms]=&max[rooms]=&min[size]=%s&max[size]=%s&strict_ad[floor_level]=&strict_ad[a_condition]=&strict_location[building_type]=&show_locations=0&admode[sell]=1&admode[silentsell]=1&admode[forrent]=0&admode[silentforrent]=0&&results_perpage=64&results_sorting=6" % \
  (form['fromwhere'].value, form['minprice'].value, form['maxprice'].value, form['minsize'].value, form['maxsize'].value)

class IggloResultParser(HTMLParser.HTMLParser):
  in_result = 0
  in_size = 0
  in_price = 0
  in_address = 0
  in_location = 0

  def handle_starttag(self, tag, attrs):
    attrs = dict(attrs)
    css_class = None
    if attrs.has_key('class'): css_class = attrs['class']
    if tag == 'div':
      if css_class in ('resultblock', 'resultblocklast'):
        self.in_result = 1
        self.size = None
        self.price = ''
        self.address = None
        self.location = None
        self.href = None
        self.imgsrc = None
        return
    
    if not self.in_result: return
    
    if tag == 'img' and attrs.has_key('alt') and attrs['alt'] == 'Katso koko ilmoitus':
      self.imgsrc = attrs['src']
      self.address = attrs['src'].split('/')[-2]

    if tag == 'div' and css_class == 'ctitle':
      self.in_size = 1
      return
     
    if tag == 'b':
      self.in_address = 1

    if self.in_size and tag == 'a':
      self.href = urljoin(URL, attrs['href']).replace('&', '&amp;')
      return
    
    if tag == 'a' and attrs.has_key('title') and attrs['title'] == 'katso kartalta':
      query = urlparse(attrs['href'])[4]
      params = query.split('&')
      args = dict([p.split('=', 1) for p in params])
      print "<marker size='%s' price='%s' address='%s' location='%s' href='%s' imgsrc='%s' lat='%s' lng='%s'/>" % \
       (self.size, self.price, self.address, self.location, self.href, self.imgsrc, to_lat(args['y']), to_lng(args['x']))
      self.in_result = 0
    
  def handle_data(self, data):
    if self.in_size:
      self.size = data
      self.in_size = 0
      self.in_price = 1	# tässä välissä tulee yksi charref: &sup2;
      return

    if self.in_price:
      if data.strip() != '':
	self.price = data.strip()
      self.in_price = 0
      return
    
    if self.in_location:
      self.location = data
      self.in_location = 0
      return
    
    if self.in_address:
      self.address = data
      self.in_address = 0
  
  def handle_endtag(self, tag):
    pass
  

print "Content-type: text/xml"
print ""
print "<markers>"
data = urllib.urlopen(URL)
parser = IggloResultParser()
parser.feed(data.read())
print "</markers>"