#!/usr/bin/perl

# Image to HTML converter
# Copyright (C) 2003 Neil Fraser, Scotland
# http://neil.fraser.name/

# This program is free software; you can redistribute it and/or modify it under the terms of version 2 of the GNU General Public License as published by the Free Software Foundation.
# This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.  http://www.gnu.org/

# Loads JPEGs, PNGs and GIFs from websites and converts them to HTML.
# Usage:  img2html.pl?img=www.example.com/image.jpg

use strict;
use GD;
use CGI;

my $query = new CGI;
my $remoteurl = $query->param('img');
$remoteurl =~ s/^\s+//;
$remoteurl =~ s/^http:\/\///;
$remoteurl =~ s/\s+$//;
$remoteurl || die("No image URL specified.");
($remoteurl =~ /([^-\w.\/?&=~%*+;:])/) && die("Invalid image URL", 'Please go back and check the address.');
$remoteurl = 'http://'.$remoteurl;
my $filename = $remoteurl;
$filename =~ s/\W/_/g;

# Reduce load on my server, use cached tux.html if the request is tux.jpg
if ($remoteurl eq 'http://neil.fraser.name/software/img2html/tux.jpg') {
  open (TUX, '/home/neil/html/software/img2html/tux.html');
    my @tux = <TUX>;
  close (TUX);
  print "Content-type: text/html\n\n";
  print join '', @tux;
  exit;
}

# Rate limiter, added when Digg hit this script with thousands of users.
my $ps_count = 0;
`/bin/ps aux > /tmp/img2html.txt`;
open (PS, '/tmp/img2html.txt');
  while(<PS>) {
    if (/img2html/) {
      $ps_count++;
    }
  }
close (PS);
if ($ps_count > 4) {
  print "Content-type: text/plain\n\n";
  print "Rate-limiter: Too many simultaneous processes.  Try again in a few seconds.";
  exit;
}

# Fetch the image using Lynx
`/usr/bin/lynx -source '$remoteurl' > /tmp/$filename`;

# Log this incase of security investigations
require 'ctime.pl';
open (LOG, '>>/home/neil/html/software/img2html/log.txt');
  print LOG $remoteurl.' - '.($ENV{'REMOTE_HOST'} || $ENV{'REMOTE_ADDR'}).' - '.ctime(time);
close (LOG);

# Load the original image.
my $oldsize = int((-s "/tmp/$filename")/1024 + 0.5);
GD::Image->trueColor(1);
my $image = GD::Image->new("/tmp/$filename");
if (!$image && index(`/usr/bin/file /tmp/$filename`, ' GIF ') != -1) {
  # This image may be a GIF.  Try converting it to PNG.
  `/usr/bin/gif2png /tmp/$filename`;
  unlink("/tmp/$filename");
  $filename =~ s/.gif$/.png/i;
  $image = GD::Image->new("/tmp/$filename");
} elsif (!$image && index(`/usr/bin/file /tmp/$filename`, ' JPEG ') != -1) {
  # This image is a JPEG but isn't in JFIF.  Try converting it.
  my $newfilename = 'jpg2jpg_'.$filename;
  `/usr/bin/jpegtran /tmp/$filename > /tmp/$newfilename`;
  unlink("/tmp/$filename");
  $filename = $newfilename;
  $image = GD::Image->new("/tmp/$filename");
}

unlink("/tmp/$filename");
$image || die("Can't load image:", $remoteurl, "/tmp/$filename", $!, 'Note that only JPEGs, PNGs and GIFs are supported.');

# If the image is too large, scale it down.
my $msg = '';
my $maxPixels = 100*100;
my ($oldX, $oldY) = $image->getBounds();
my ($newX, $newY);
if ($oldX*$oldY > $maxPixels) {
  my $factor = sqrt($maxPixels/($oldX*$oldY));
  $newX = int($oldX * $factor);
  $newY = int($oldY * $factor);
  my $newimage;
  $newimage = new GD::Image($newX, $newY);
  my $bg = $newimage->colorAllocate(255, 255, 255); # White bg incase of transparency
  $newimage->filledRectangle(0, 0, $newX, $newY, $bg);
  $newimage->copyResampled($image,0,0,0,0,$newX,$newY,$oldX,$oldY);
  $image = $newimage;
  $msg = '[This image is too large to comfortably handle, so it has been scaled down to '.int(100*$newX/$oldX).'% of its original size.]<P>';
} else {
  $newX = $oldX;
  $newY = $oldY;
}

# Scan the image pixel by pixel and build the HTML table.
my $table = '';
my $row;
my ($x, $y);
my ($r, $g, $b, $rgb);
my ($prev_rgb, $span);
my $firstrow = 1; # Disable RLE for first row of each table segment (dodge Mozilla bug)
for($y=0; $y<$newY; $y++) {
  $row = '';
  for($x=0; $x<=$newX; $x++) {
    ($r,$g,$b) = $image->rgb($image->getPixel($x, $y));
    if ($x == $newX) {
      # Dummy run to clear the colspan buffer.
      $rgb='';
    } else {
      $rgb = sprintf('%02lx%02lx%02lx', $r,$g,$b);
    }
    if ($x == 0) { # Initialise the RLE (Run Length Encoding)
      $prev_rgb = $rgb;
      $span = 0;
    }
    $span++;
    if ($rgb ne $prev_rgb || $firstrow) {
      if ($span == 1) { # One pixel.
        $row .= "<TD BGCOLOR=#$prev_rgb><img width=1 height=1></TD>";
      } else { # A run of multiple pixels with the same colour.
        $row .= "<TD BGCOLOR=#$prev_rgb COLSPAN=$span><img width=1 height=1></TD>";
      }
      $span = 0;
      $prev_rgb = $rgb;
    }
  }
  $table .= "<TR>$row</TR>\n";

  # Segment the table so that MSIE renders it in pieces instead of waiting till the end.
  if ($y != 0 && ($y == 5 || $y % 15 == 0) && $y < $newY-10) {
    $table .= "</TABLE><TABLE CELLPADDING=0 CELLSPACING=0 BORDER=0>\n";
    $firstrow = 1;
  } else {
    $firstrow = 0;
  }
}
my $newsize = int(length($table)/1024 + 0.5);

# We're done.  Now print it all out (this is what takes the time).
print "Content-type: text/html\n\n";
print <<END;
<HTML>
<HEAD>
<TITLE>img2html: $remoteurl</TITLE>
</HEAD>

<BODY>
<DL>
<DT><B>Original Image</B>
<DD><A HREF="$remoteurl">$remoteurl</A>
<DD>${oldX}x$oldY
<DD>${oldsize}KB
<P>
$msg
<DT><B>Text Image</B>
<DD>${newX}x$newY
<DD>${newsize}KB
<DD>

<TABLE CELLPADDING=0 CELLSPACING=0 BORDER=0>
$table</TABLE>

<P>
<DT><B>Done</B>
</DL>
</BODY></HTML>
END
