#!/usr/bin/python
# Image to HTML converter
# Copyright (C) January 2019 Neil Fraser
# https://neil.fraser.name/

# This program is free software; you can redistribute it and/or
# modify it under the terms of version 2 of the GNU General
# Public License as published by the Free Software Foundation.

# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
# http://www.gnu.org/

# Loads JPEGs, PNGs and GIFs from websites and converts them to HTML.
# Usage:  img2html.py?img=www.example.com/image.jpg

from PIL import Image
import cgi
import cStringIO
import math
import re
import sys
import urllib


# Throw a 500 error and terminate the program.
def die(msg):
  print('Status:500')
  print('Content-type: text/plain')
  print('')
  print('Error: ' + msg)
  sys.exit()

def is_valid_url(url):
  regex = re.compile(
      r'^https?://'  # http:// or https://
      r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+[A-Z]{2,6}\.?|'  # domain...
      r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or IP
      r'(?::\d+)?'  # optional port
      r'(?:/?|[/?]\S+)$', re.IGNORECASE)
  return url is not None and regex.search(url)

form = cgi.FieldStorage()
if not form.has_key('img'):
  die('"img" paramater not found.')

#rows = min(512+1, int(form['y'].value, 10))

remoteurl = form['img'].value
remoteurl = remoteurl.strip()
if not remoteurl:
  die('No image URL specified.')
if not re.match('\\w+:\/\/', remoteurl):
  remoteurl = 'http://' + remoteurl
if not is_valid_url(remoteurl):
  die('Invalid image URL.  Please go back and check the address.')

# Reduce load on my server, use cached tux.html if the request is tux.jpg
if remoteurl == 'https://neil.fraser.name/software/img2html/tux.jpg':
  f = open('/home/neil/html/software/img2html/tux.html')
  print("Content-type: text/html\n\n")
  print(f.read())
  f.close()
  sys.exit()

try:
  f = urllib.urlopen(remoteurl)
  data = f.read()
except Exception as err:
  die('Invalid image URL: %s' % err)
oldsize = int(round(len(data) / 1024))

cStr = cStringIO.StringIO(data)
image = Image.open(cStr)
image = image.convert('RGBA')
background = Image.new('RGBA', image.size, (255, 255, 255))
background.paste(image, mask=image.split()[3])  # 3 is the alpha channel
image = background
image = image.convert('RGB')

maxPixels = 100*100
(oldX, oldY) = image.size
if oldX * oldY > maxPixels:
  factor = math.sqrt(maxPixels / float(oldX * oldY))
  newX = int(round(oldX * factor))
  newY = int(round(oldY * factor))
  image.thumbnail((newX, newY), Image.ANTIALIAS)
  msg = '[This image is too large to comfortably handle, so it has been scaled down to %s%% of its original size.]<P>' % int(100.0 * newX / oldX);
else:
  newX = oldX
  newY = oldY
  msg = ''


# Scan the image pixel by pixel and build the HTML table.
table = '';
firstrow = True  # Disable RLE for first row of each table segment (dodge Mozilla bug)
for y in range(newY):
  row = '';
  for x in range(newX):
    try:
      (r, g, b) = image.getpixel((x, y))
    except:
      # Some images throw a range error on some (transparent?) pixels.
      (r, g, b) = (255, 255, 255)
    if x == newX:
      # Dummy run to clear the colspan buffer.
      rgb = ''
    else:
      rgb = '%02x%02x%02x' % (r, g, b)
    if x == 0:  # Initialise the RLE (Run Length Encoding)
      prev_rgb = rgb
      span = 0
    span += 1
    if rgb != prev_rgb or firstrow:
      if span == 1:  # One pixel.
        row += "<TD BGCOLOR=#%s><img width=1 height=1></TD>" % prev_rgb
      else:  # A run of multiple pixels with the same colour.
        row += "<TD BGCOLOR=#%s COLSPAN=%d><img width=1 height=1></TD>" % (prev_rgb, span)
      span = 0
      prev_rgb = rgb
  table += "<TR>%s</TR>\n" % row

  # Segment the table so that MSIE renders it in pieces instead of waiting till the end.
  if y != 0 and (y == 5 or y % 15 == 0) and y < newY - 10:
    table += "</TABLE><TABLE CELLPADDING=0 CELLSPACING=0 BORDER=0>\n"
    firstrow = True
  else:
    firstrow = False
newsize = int(round(len(table) / 1024))


# We're done.  Now print it all out (this is what takes the time).
print("Content-type: text/html\n")

print("""
<HTML>
<HEAD>
<TITLE>img2html: {remoteurl}</TITLE>
</HEAD>

<BODY>
<DL>
<DT><B>Original Image</B>
<DD><A HREF="{remoteurl}">{remoteurl}</A>
<DD>{oldX}x{oldY}
<DD>{oldsize}KB
<P>
{msg}
<DT><B>Text Image</B>
<DD>{newX}x{newY}
<DD>{newsize}KB
<DD>

<TABLE CELLPADDING=0 CELLSPACING=0 BORDER=0>
{table}</TABLE>

<P>
<DT><B>Done</B>
</DL>
</BODY></HTML>
""".format(remoteurl=remoteurl, table=table, msg=msg,
    oldsize=oldsize, newsize=newsize,
    oldX=oldX, oldY=oldY, newX=newX, newY=newY))
