#!/usr/bin/perl
# vim: set ft=perl ts=3 sw=3 expandtab:
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
#              C E D A R
#          S O L U T I O N S       "Software done right."
#           S O F T W A R E
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# Copyright (c) 2006-2007,2015 Kenneth J. Pronovici.
# All rights reserved.
#
# Based on software originally written by Bharat Mediratta, with no explicit
# copyright statement or license.  The original source can be found here:
# http://codex.gallery2.org/index.php/Downloads:PHP_Deserialize
#
# Redistribution and use in source and binary forms, with or without
# modification, is permitted without restrictions.  
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE.
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #
#
# Authors  : Kenneth J. Pronovici <pronovic@ieee.org>
# Language : Perl 5.8.
# Revision : $Id: gallery-extract 1378 2015-02-21 21:28:16Z pronovic $
# Purpose  : Extract a Gallery (v1) album into a directory
#
# # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # #

########
# Notes
########

# NOTE: As of 2015, this script is obsolete and no longer works with the
# serialized file format generated by the latest version of Gallery and PHP.  I
# haven't had time to figure out how to parse the new file format.
# 
# Extracts Gallery (v1) albums into a directory and generates a printable index
# including captions.  I use this when I want to put an album or albums onto a CD
# to show on a TV using someone's DVD player.  It's tedious to copy the files by
# hand, so this script parses the Gallery data files, dumps the pictures into a
# directory, and renames them so the correct viewing order is obvious to the DVD
# player.
#
# To run it, use something like this:
#
#      extract.pl target-dir index-file gallery-dir [gallery-dir] [gallery-dir]
#
# The complicated part of this code was originally written by Bharat Mediratta
# and was distributed on the Gallery web site.  Bharat's code parses the
# Gallery data file format for album.dat and photos.dat (which is just PHP's
# standard data serialization format) and dumps the result to the screen for
# debugging purposes.  I've used that code as the starting point for my
# purposes.  The only major change I made was a tweak to let the parser handle
# negative numbers (sometimes, "-1" shows up where a number is expected).


################
# Package setup
################

use strict;
use Data::Dumper;
use File::Basename;
use File::Copy;
use Getopt::Long;


###############
# Global setup
###############

my $data;  # Global variable used by Bharat's parsing routines

my %parse_func =
  ("O:" => \&parse_object,
   "a:" => \&parse_array,
   "s:" => \&parse_string,
   "i:" => \&parse_integer,
   "b:" => \&parse_integer,
   "N;" => \&parse_null);


###############
# Main routine
###############

my $help;
my $dry_run;
my $interactive;

my $return = GetOptions("interactive" => \$interactive,
                        "dry-run"     => \$dry_run,
                        "help"        => \$help);

if($return == 0 or $help) {
   usage();
   exit 1;
}

# Check arguments
if($#ARGV < 2) {
   usage();
   exit 1;
}

# Extract arguments
my $target_dir = shift @ARGV;
my $index_file = shift @ARGV;
my @albums = @ARGV;
$target_dir =~ s/\/$//g;
for my $album (@albums) {
   $album =~ s/\/$//g;
}

# Note what we're doing
print "Extracting a total of " . ($#albums+1) . " album(s) to [$target_dir].\n";
print "Cumulative index information will be written in [$index_file].\n";

# Tell the user about dry-run setting
if(defined $dry_run) {
   print "Dry run: index will be generated, but no data will be copied.\n";
}

# Tell the user about interactive setting
if(defined $interactive) {
   print "Interactive session: you will get the chance to choose pictures from each album.\n";
}

# Generate the album set
my @albumset = &generate_albumset(\@albums);
print "Completed generating album set using album.dat and photos.dat.\n";

# Create the index file
&generate_index(\@albumset, $index_file);
print "Completed generating index file.\n";

# Copy the images files, renumbered properly
my $count = &copy_images(\@albumset, $target_dir);
print "Completed copying $count images to target directory.\n";


######################
# Utility subroutines
######################

sub usage {
   print "usage: extract.pl [options] target-dir index-file gallery-dir [gallery-dir] [gallery-dir]\n";
   print "--interactive Do an interactive run\n";
   print "--dry-run     Do a dry run (don't copy any pictures)\n";
   print "--help        Print this usage statement\n";             
   print "If you choose an interactive session, you can indicate pictures in individual galleries.\n";
}

# Parses picture-list input, which is like a MS Word print dialog page listing,
# i.e. 1,2,3-10,14,15 We split on ",", trim spaces from fields, then deal with
# ranges.  A hash lets us make sure we're doing only unique pages.
sub parse_picture_list {
   my %requested;
   my $answer = shift;
   my @split = split(",", $answer);
   for my $item (@split) {
      $item =~ s/ //g;
      if($item) {
         if($item =~ /-/) {
            my ($start, $end) = split("-", $item);
            for my $index ($start .. $end) {
               $requested{$index} = $index;
            }
         }
         else
         {
            $requested{$item} = $item;
         }
      }
   }
   return sort {$a <=> $b} keys %requested;
}


##################################
# High-level function subroutines
##################################

sub generate_albumset {
   my $albums = shift;

   my @albumset;
   for my $album (@$albums) {
      my $albumkey = &basename($album);

      my @requested;
      if(defined $interactive) {
         print "Enter picture numbers for album [$album], i.e. \"1,2,3-10,14,15\".\n";
         print "Pictures: ";
         my $answer = <STDIN>;
         chop $answer;
         @requested = &parse_picture_list($answer);
         my $count = $#requested + 1;
         print "Requested " . $count . " pictures from [$album].\n";
         print "\n";
      }

      my $path = $album . "/album.dat";
      open my $fh, "<$path" or die "Unable to open $path\n";
      my $contents; { local $/; undef $/; $contents = <$fh>; } # read without changing line separator
      close $fh;
      my ($tree, $size) = &call_parse($contents);
      my ($title, $description) = &untangle_album_data($tree);

      $path = $album . "/photos.dat";
      open my $fh, "<$path" or die "Unable to open $path\n";
      my $contents; { local $/; undef $/; $contents = <$fh>; } # read without changing line separator
      close $fh;
      my ($tree, $size) = &call_parse($contents);
      my @images = &untangle_image_data($tree, $album, \@requested);

      my %albumdata;
      $albumdata{'key'} = $albumkey;
      $albumdata{'path'} = $album;
      $albumdata{'title'} = $title;
      $albumdata{'description'} = $description;
      $albumdata{'count'} = scalar @images;
      $albumdata{'images'} = \@images;

      push @albumset, \%albumdata;
   }

   return @albumset;
}

sub generate_index {
   my $albumset = shift;
   my $path = shift;

   open my $fh, ">$path" or die "Unable to open $path\n";

   my $index = 0;
   for my $album (@$albumset) {

      print $fh "\n";
      print $fh "\n";
      print $fh "===============================================================================\n";
      print $fh "Album: " . $$album{'title'} . "\n";
      print $fh "Description: " . $$album{'description'} . "\n";
      print $fh "Original path: " . $$album{'path'} . "\n";
      print $fh "Album contains a total of " . $$album{'count'} . " images.\n";
      print $fh "===============================================================================\n";
      print $fh "\n";

      my $images = $$album{'images'};
      for my $image (@$images) {
         my $relpath = $$image{'relpath'};
         my $caption = $$image{'caption'};
         $caption =~ s///g;
         printf $fh "Image %06d: [%15s] %s\n", $index, $relpath, substr($caption, 0, 47);
         $index += 1;
      } 
   }

   close $fh;
}


sub copy_images {
   my $albumset = shift;
   my $target_dir = shift;

   if(! -d $target_dir) {
      mkdir $target_dir;
   }

   $| = 1; # perform flush after each write to STDOUT, so status info gets written

   my $index = 0;
   print "Copying files";
   for my $album (@$albumset) {
      my $images = $$album{'images'};
      for my $image (@$images) {
         my $relpath = $$image{'relpath'};
         my $abspath = $$image{'abspath'};
         my $extension = $$image{'extension'};
         my $newpath = sprintf "%s/img%06d.%s", $target_dir, $index, $extension;
         if(!defined $dry_run) {
            &copy($abspath, $newpath) or die "\nError copying [$abspath] to [$newpath].\n";
         }
         print ".";
         $index += 1;
      } 
   }
   print "\n";

   return $index;
}


################################
# Data-manipulation subroutines
################################

sub untangle_image_data {
   my $tree = shift;
   my $album = shift;
   my $requested = shift;

   my $treesize = scalar @$tree;
   
   my @indices;
   if(defined @$requested) {
      @indices = @$requested;
   }
   else {
      @indices = (1 .. $treesize);
   }

   my @images;
   for my $index (@indices) {
      $index = $index - 1;  # true index is zero-based
      my %image;
      my $name = $$tree[$index]{$index}{'albumitem (class)'}[0]{'image'}{'image (class)'}[0]{'name'};
      my $type = $$tree[$index]{$index}{'albumitem (class)'}[0]{'image'}{'image (class)'}[1]{'type'};
      my $caption = $$tree[$index]{$index}{'albumitem (class)'}[3]{'caption'};
      $image{'relpath'} = $name . "." . $type;
      $image{'abspath'} = $album . "/" . $name . "." . $type;
      $image{'extension'} = $type;
      $image{'caption'} = $caption;
      push @images, \%image;
   } 

   return @images;
}

sub untangle_album_data {
   my $tree = shift;

   my $title = $$tree{'album (class)'}[0]{'fields'}[0]{'title'};
   my $description = $$tree{'album (class)'}[0]{'fields'}[1]{'description'};

   return ($title, $description)
}


######################
# Parsing subroutines
######################

sub call_parse {
   my $parsedata = shift;
   $data = $parsedata;
   my ($tree, $size) = &parse(0);
   return ($tree, $size);
}

sub parse {
  my $index = shift;

  my $type = substr($data, $index, 2);
  my $func = $parse_func{$type};
  if (!$func) {
    die "Unknown type '$type' at position $index";
  }
  &$func($index+2);
}

sub parse_object {
  my $index = shift;

  my $self = {};

  my ($name, $members);
  ($name, $index) = &get_string($index);
  $index = skip($index, ':');
  ($members, $index) = &parse_array($index);

  $self->{"$name (class)"} = $members;

  return ($self, $index);
}

sub parse_null {
  my $index = shift;

  return ("<null>", $index);
}

sub parse_integer {
  my $index = shift;

  my $value;
  my $self = {};
  ($value, $index) = get_number($index);
  $index = skip($index, ';');

  return ($value, $index);
}

sub parse_string {
  my $index = shift;

  my $value;
  my $size = 0;

  ($value, $index) = get_string($index);
  $index = skip($index, ';');

  return ($value, $index);
}

sub parse_array {
  my $index = shift;

  my $self = [];
  my $count = 0;
  ($count, $index) = get_number($index);
  $index = skip($index, ':{');
  for (my $i = 0; $i < $count; $i++) {
    my ($key, $value);
    ($key, $index) = &parse($index);
    ($value, $index) = &parse($index);
    push(@$self, { $key => $value });
  }
  $index = skip($index, '}');
  return ($self, $index);
}

sub get_string {
  my $index = shift;

  my $size;
  my $value;
  ($size, $index) = get_number($index);
  $index = skip($index, ':"');
  $value = substr($data, $index, $size);
  $index += $size;
  $index = skip($index, '"');

  return ($value, $index);
}

sub get_number {
  my $index = shift;

  my $ch;
  my $num = undef;
  while(($ch = substr($data, $index, 1)) =~ /\d/) {
    $index++;
    if (!defined($num)) {
      $num = 0;
    }
    $num = 10 * $num + int($ch);
  }

  if (!defined($num)) {
    if($ch eq '-') {
       $index += 2;
    }
    else {
       die "Expected number at $index, found '$ch' instead\n";
    }
  }

  return ($num, $index);
}


sub skip {
  my $index = shift;
  my $expected = shift;

  my $size = length($expected);
  my $actual = substr($data, $index, $size);
  if ($actual ne $expected) {
    die "Expected '$expected' at $index, found '$actual' instead\n";
    exit;
  }

  return $index + $size;
}

