#!/usr/bin/env perl
# convert the HVSC Top 100 from SID into WAV/MP3 files.
# http://www.transbyte.org/SID/HVSC_Top100.html
# By default this perl program uses sidplay2 and lame to convert to WAV and MP3 file formats.
# You can use other programs by changing the $ENV variables below.
#
# this program was written by Dirk Jagdmann <doj@cubic.org> 2017-06-05

use strict;
use Digest::MD5;

# download and unpack the HVSC Top 100
my $dir = 'HVSC_Top_100';
unless(-d $dir) {
    my $fn = 'HVSC_Top_100.zip';
    my $url = "http://www.transbyte.org/SID/$fn";
    unless(-f $fn) {
	system("wget $url");
    }
    unless(-f $fn) {
	system("curl -o $fn $url");
    }
    die "could not get $url" unless -f $fn;
    system("unzip $fn");
    die "could not unpack $dir" unless -d $dir;
}
chdir($dir) or die "could not chdir $dir: $!";

my $fn;
my @job;

open(S, "DOCUMENTS/Songlengths.txt") or die "could not open DOCUMENTS/Songlengths.txt: $!";
while(<S>)
{
    # remove newline characters
    s/\r?\n$//;
    # skip first line
    next if /^\[Database\]$/;
    # match a filename line
    if (m!^; /(.+)!) {
	$fn = $1;
	#print "found $fn\n";
	die "could not find SID file $fn" unless -f $fn;
	next;
    }
    # match a song lengths line
    if (/^([0-9a-f]{32})=(.+)/) {
	my $md5 = $1;
	my $songs = $2;

	if (0) {
	    # check SID file MD5
	    open(my $a, $fn) or die "could not open $fn: $!";
	    binmode($a);
	    my $ctx = Digest::MD5->new or die;
	    $ctx->addfile($a);
	    my $sid_md5 = $ctx->hexdigest();
	    die "SID file $fn MD5 sum $sid_md5 does not match songlength database $md5" unless $sid_md5 eq $md5;
	}

	# parse songs lengths
	my @local_jobs;
	my $o = 0;
	while($songs =~ /(\d+):(\d+)(\(\w\))?/g) {
	    ++$o;
	    my $min = $1;
	    my $sec = $2;
	    # don't convert songs that are shorter than 20 seconds
	    next if $min == 0 && $sec < 20;
	    my $len = "${min}:${sec}";
	    #print "$fn $len\n";


	    # setup a conversion job
	    my $h = { 'sid' => $fn,
		      'o' => $o,
		      'len' => $len,
		    };
	    push(@job, $h);
	    push(@local_jobs, $h);
	}

	# set total number of songs and create WAV and MP3 filenames
	my $total = $o;
	foreach my $h (@local_jobs) {
	    $h->{total} = $total;

	    my $num = sprintf(formatstr($total), $h->{o});

	    my $wav = $h->{sid};
	    $wav =~ s!.+/!!;
	    $wav =~ s/\.sid/-$num.wav/;
	    $h->{wav} = $wav;

	    my $mp3 = $wav;
	    $mp3 =~ s/\.wav/.mp3/;
	    $h->{mp3} = $mp3;
	}
    }
}
close(S);

# setup defaults for conversion programs
$ENV{SIDPLAY} ||= 'sidplay2';
$ENV{LAME} ||= 'lame --quiet -b 64 -h --add-id3v2 --tg Instrumental';

# convert all jobs
for(my $i = 0; $i <= $#job; $i++) {
    my $h = $job[$i];
    my $num = "$h->{o}/$h->{total}";
    print "$i/$#job $h->{sid} #$num $h->{len}\n";

    # convert SID to WAV
    unlink($h->{wav});
    my $o = `$ENV{SIDPLAY} -o$h->{o} -t$h->{len} -w$h->{wav} $h->{sid} 2>&1`;

    # parse output of sidplay to get information
    my $title = '';
    my $author = '';
    my $released = '';
    my $year = '';
    if ($o =~ /Title\s+: (.+)\s+\|/) {
	$title = $1;
    }
    if ($o =~ /Author\s+: (.+)\s+\|/) {
	$author = $1;
    }
    if ($o =~ /Released\s+: (.+)\s+\+/) {
	$released = $1;
	if ($released =~ /^((19|20)\d\d)/)
	{
	    $year = $1;
	}
    }
    # add number to title
    $title .= " $num" if $num ne '1/1';
    # remove problematic characters (for unix command line)
    # TODO: escape these characters instead of removing them.
    $title =~ s/["\$]//g;
    $author =~ s/["\$]//g;
    $released =~ s/["\$]//g;
    $year =~ s/["\$]//g;

    # convert WAV to MP3
    unlink($h->{mp3});
    system("$ENV{LAME} --tt \"$title\" --ta \"$author\" --tl \"$released\" --tn \"$num\" --ty \"$year\" $h->{wav} $h->{mp3}");
    unlink($h->{wav});
}

sub formatstr
{
    my $val = shift;
    if ($val < 10) {
	return '%i';
    }
    if ($val < 100) {
	return '%02i';
    }
    if ($val < 1000) {
	return '%03i';
    }
    if ($val < 10000) {
	return '%04i';
    }
    return '%s';
}
