#!/usr/bin/env perl
# convert a HVSC .sid to .mp3
# get the High Voltage SID Collection at http://www.hvsc.c64.org/
#
# By default this perl program uses sidplay2 and lame to convert to WAV and MP3 file formats.
# You can use other programs by changing the $ENV variables below.
#
# this program was written by Dirk Jagdmann <doj@cubic.org> 2017-06-08

use strict;
use Digest::MD5;

# check command line
die "you have to start convert.pl from the HVSC top level directory" unless -d 'DOCUMENTS';
my $sid = $ARGV[0];
die "could not find $sid" unless -f $sid;

# read song length database
my %sl;
my %md5;
my $fn;
open(S, "DOCUMENTS/Songlengths.txt") or die "could not open DOCUMENTS/Songlengths.txt: $!";
while(<S>)
{
    # remove newline characters
    s/\r?\n$//;
    # skip first line
    next if /^\[Database\]$/;
    # match a filename line
    if (m!^; /(.+)!) {
	$fn = $1;
	#print "found $fn\n";
	die "could not find SID file $fn" unless -f $fn;
	next;
    }
    # match a song lengths line
    if (/^([0-9a-f]{32})=(.+)/) {
	my $md5 = $1;
	my $songs = $2;
	$sl{$fn} = $songs;
	$md5{$md5} = $songs;
	next;
    }
    # complain about lines we don't parse
    print STDERR "unknown Songlength.txt line: $_\n";
}
close(S);

die "could not find $sid in song length database" unless defined $sl{$sid};

# convert all songs
my @job;
my $o = 0;
while($sl{$sid} =~ /(\d+):(\d+)(\(\w\))?/g) {
    ++$o;
    my $min = $1;
    my $sec = $2;

    # don't convert songs that are shorter than 20 seconds
    next if $min == 0 && $sec < 20;
    my $len = "${min}:${sec}";

    # setup a conversion job
    my $h = { 'sid' => $sid,
	      'o' => $o,
	      'len' => $len,
	    };
    push(@job, $h);
}

# set total number of songs and create WAV and MP3 filenames
my $total = $o;
foreach my $h (@job) {
    $h->{total} = $total;

    my $num = sprintf(formatstr($total), $h->{o});

    my $wav = $h->{sid};
    $wav =~ s!.+/!!;
    $wav =~ s/\.sid/-$num.wav/;
    $h->{wav} = $wav;

    my $mp3 = $wav;
    $mp3 =~ s/\.wav/.mp3/;
    $h->{mp3} = $mp3;
}

# setup defaults for conversion programs
$ENV{SIDPLAY} ||= 'sidplay2';
$ENV{LAME} ||= 'lame --quiet -b 64 -h --add-id3v2 --tg Instrumental';

# convert all jobs
for(my $i = 0; $i <= $#job; $i++) {
    my $h = $job[$i];
    my $num = "$h->{o}/$h->{total}";
    print "$i/$#job $h->{sid} #$num $h->{len}\n";

    # convert SID to WAV
    unlink($h->{wav});
    my $o = `$ENV{SIDPLAY} -o$h->{o} -t$h->{len} -w$h->{wav} $h->{sid} 2>&1`;

    # parse output of sidplay to get information
    my $title = '';
    my $author = '';
    my $released = '';
    my $year = '';
    if ($o =~ /Title\s+: (.+)\s+\|/) {
	$title = $1;
    }
    if ($o =~ /Author\s+: (.+)\s+\|/) {
	$author = $1;
    }
    if ($o =~ /Released\s+: (.+)\s+\+/) {
	$released = $1;
	if ($released =~ /^((19|20)\d\d)/)
	{
	    $year = $1;
	}
    }
    # add number to title
    $title .= " $num" if $num ne '1/1';
    # remove problematic characters (for unix command line)
    # TODO: escape these characters instead of removing them.
    $title =~ s/["\$]//g;
    $author =~ s/["\$]//g;
    $released =~ s/["\$]//g;
    $year =~ s/["\$]//g;

    # convert WAV to MP3
    unlink($h->{mp3});
    system("$ENV{LAME} --tt \"$title\" --ta \"$author\" --tl \"$released\" --tn \"$num\" --ty \"$year\" $h->{wav} $h->{mp3}");
    unlink($h->{wav});
}

sub formatstr
{
    my $val = shift;
    if ($val < 10) {
	return '%i';
    }
    if ($val < 100) {
	return '%02i';
    }
    if ($val < 1000) {
	return '%03i';
    }
    if ($val < 10000) {
	return '%04i';
    }
    return '%s';
}
