#!/usr/bin/env perl

use strict;
use warnings;
use diagnostics;

# Sebastien Moretti from the Vital-IT (https://www.vital-it.ch) Center
# for high-performance computing of the SIB Swiss Institute of Bioinformatics.
my $VERSION = '0.1.9';

use Cwd 'abs_path';
use DateTime;
use File::Copy;
use File::Slurp;
use Getopt::Long;
use List::MoreUtils qw(uniq);
use LWP::Simple;
use Text::Format;


# Configuration
my $desc_file = 'DESCRIPTION';
my $temp_dir  = 'ze_temp';
my $R_path    = '/usr/bin';
my %url       = ('bioc'            => 'http://bioconductor.org/packages/__RELEASE__/bioc/VIEWS',
                 'cran'            => 'https://cran.r-project.org/web/packages/',
                 'data-annotation' => 'http://bioconductor.org/packages/__RELEASE__/data/annotation/VIEWS',
                 'data-experiment' => 'http://bioconductor.org/packages/__RELEASE__/data/experiment/VIEWS',
                 'extra'           => 'http://bioconductor.org/packages/__RELEASE__/extra/VIEWS',
                 # Can use Bioconductor extra here because CRAN is checked before in alphanumeric order
                 );


my ($package, $R_version, $R_package, $B_version) = ('', 0, 0, 'release');
my ($debug)  = (0);
my ($infile) = ('');
my %opts = ('debug'       => \$debug,     # more verbose
            'package=s'   => \$package,   # Package name
            'R_version=s' => \$R_version,
            'R_package=s' => \$R_package,
            'B_version=s' => \$B_version,
            'file=s'      => \$infile,
           );

# Check arguments
my $test_options = Getopt::Long::GetOptions(%opts);
if ( !$test_options || ($package eq '' && $infile eq '') || ($package ne '' && $infile ne '') || $R_version eq '0' ){
    warn "\n\tInvalid or missing argument:
\te.g. $0 -package=<PACKAGE NAME>  -R_version=<VERSION OF R> [-R_package=<R PACKAGE VERSION> -B_version=<Bioconductor version> -debug]
\t\t   OR with a LOCAL file:
\te.g. $0 -file=<DESCRIPTION file> -R_version=<VERSION OF R> [-R_package=<R PACKAGE VERSION> -B_version=<Bioconductor version> -debug]
\t-package          Package name             e.g. SRAdb
\t-R_version        Version of R             e.g. 3.5.1
\t-R_package        Version of R package     e.g. 351 (by default R_version without dots)
\t-B_version        Version of Bioconductor  (by default last release)
\t-debug            More verbose
\t-file             Read a local R DESCRIPTION file
\n";
    exit 1;
}

if ( $infile ne '' ){
    if ( !-e $infile || !-r $infile || -z $infile ){
        die "Cannot read file [$infile]\n";
    }
    $infile = abs_path($infile);
}


if ( $R_package==0 ){
    $R_package = $R_version;
    $R_package =~ s{\.}{}g;
}

if ( !$infile && get_R_base_package($package) ){
    die "$package is already available with base R libraries\n";
}


mkdir $temp_dir;
chdir $temp_dir;
binmode STDOUT, ':utf8';

# Try to get package description file from different sources, in order
my $Source = '';
if ( $infile ){
    # Copy in $temp_dir where we are!
    copy("$infile", "./$desc_file");
    $Source = 'LOCAL';
}
else {
    URL:
    for my $url ( sort keys %url ){ # Luckyly sorting respect package source frequencies
        if ( $url eq 'cran' ){
            $url{$url} .= "$package/$desc_file";
        }
        else {
            $url{$url} =~ s{__RELEASE__}{$B_version};
        }

        my $content = get("$url{$url}");
        if ( defined $content && $content ne '' ){
            if ( $content =~ /Package: $package\n/ ){
                $Source = $url;
                # Shorten file
                $content =~ s{^.*?Package: $package\n}{Package: $package\n}sm;
                $content =~ s{\n\n.*$}{\n}sm;
                write_file("$desc_file", $content);
                last;
            }
        }
    }
}
die "Cannot get package info\n"  if ( $Source eq '' );


# Parse DESCRIPTION file
my %bioc_packages;
if ( -e "$desc_file" && -s "$desc_file" ){
    my ($Name, $Summary, $Version, $Description, $License) = ('', '', 0, '', '');
    my @Deps;
    my $requirements = '';
    my ($flag_desc, $flag_require, $flag_summary) = (0, 0, 0);
    my $ori_version  = 0;
    LINE:
    for my $line ( read_file("$desc_file") ){
        if ( $flag_desc==1 && $line =~ /^\w+:/ ){
            $flag_desc = 0;
        }
        elsif ( $flag_desc==1 ){
            $Description .= $line;
        }

        if ( $flag_require==1 && $line =~ /^\w+:/ ){
            $flag_require = 0;
        }
        elsif ( $flag_require==1 ){
            chomp $line;
            $requirements .= $line;
        }

        if ( $flag_summary==1 && $line =~ /^\w+:/ ){
            $flag_summary = 0;
        }
        elsif ( $flag_summary==1 ){
            $Summary .= $line;
            $Summary =~ s{\n}{ };
        }

        if ( $line =~ /^Package:\s*(.+)/ ){
            $Name = trim($1);
            if ( $Source eq 'LOCAL' ){
                $package = $Name;
            }
            die "Description name and provided package name do not match: [$package]-[$Name]\n"  if ( $package ne $Name );
        }
        elsif ( $line =~ /^Title:\s*(.+)/ ){
            $Summary = $1;
            $flag_summary = 1;
        }
        elsif ( $line =~ /^Version:\s*(.+)/ ){
            $Version     = trim($1);
            $ori_version = $Version;
            $Version     =~ s{\-}{\.}g; # To respect RPM version format
        }
        elsif ( $line =~ /^License:\s*(.+)/ ){
            $License = format_license( trim($1) );
        }
        elsif ( $line =~ /^Description:\s*(.+)/ ){
            $Description = $1."\n";
            $flag_desc = 1;
        }
        elsif ( $line =~ /^Depends:\s*(.+)/ || $line =~ /^Imports:\s*(.+)/ || $line =~ /^LinkingTo:\s*(.+)/ ){
            if ( $requirements ne '' ){
                $requirements .= ', ';
            }
            $requirements .= $1;
            $flag_require = 1;
        }
        #TODO Add SystemRequirements but strange format most of the time
    }

    $requirements =~ s{  +}{ }g;
    $requirements =~ s{\s*Authors\@R.+}{};
    push @Deps, map { trim($_) }
                split(/ *, */, $requirements);

    # Clean and format Description & Summary
    my $text = Text::Format->new;
    $text->firstIndent(0);
    $text->justify(1);
    chomp $Description;
    $Description =~ s{\n}{ }g;
    $Description =~ s{  +}{ }g;
    $Description =~ s{\s*Authors\@R.+}{};
    $Description .= '.'  if ( $Description !~ /[\.\!\?]$/ );
    $Description = $text->format($Description);
    chomp $Description;
    $Summary     =~ s{  +}{ }mg;
    $Summary     =~ s{\s*Authors\@R.+}{};
    $Summary     = trim($Summary);
    $Summary     =~ s{\.$}{}  if ( $Summary !~ /\.\.$/ );
    $Summary     = ucfirst($Summary);

    my $deps = join(',', uniq @Deps);
    # Get Bioconductor package list for right dependencies between Bioconductor and CRAN
#    if ( $Source ne 'cran' ){
        my @Bioc_urls = ("http://bioconductor.org/packages/$B_version/bioc/src/contrib/PACKAGES",
                         "http://bioconductor.org/packages/$B_version/data/annotation/src/contrib/PACKAGES",
                         "http://bioconductor.org/packages/$B_version/data/experiment/src/contrib/PACKAGES",
#                         "http://bioconductor.org/packages/$B_version/extra/src/contrib/PACKAGES",
                        # Do not use Bioconductor extra here because some packages are duplicated with CRAN,
                        # and more up-to-date in CRAN
                        );
        for my $url ( @Bioc_urls ){
            my $content = get("$url");
            if ( defined $content && $content ne '' ){
                map  { /^Package:\s+(.+)/; $bioc_packages{$1}=1; }
                grep { /^Package:\s+/ }
                split("\n", $content);
            }
        }
#    }

    # Print raw spec
    get_template($Version, $Summary, $License, $Description, $Source, $ori_version, $deps);
}
else {
    die "No information file to parse\n";
}


# Cleaning
END {
    chdir '..';
    system("rm -Rf $temp_dir");
}

exit 0;


sub trim {
    my ($string) = @_;

    $string =~ s{^\s+}{}m;
    $string =~ s{\s+$}{}m;
    return $string;
}

sub format_license {
    my ($string) = @_;

    # See with "rpmlint -I invalid-license"
    #TODO 'ACM | file LICENSE'  => ???
    my %license = ('AGPL-3'                                 => 'AGPLv3',
                   'Apache License 2.0'                     => 'AGPLv2',
                   'Artistic License 2.0'                   => 'Artistic 2.0',
                   'Artistic-1.0'                           => 'Artistic 1.0',
                   'Artistic-1.0 | file LICENSE'            => 'Artistic 1.0',
                   'Artistic-2.0'                           => 'Artistic 2.0',
                   'Artistic-2.0 + file LICENSE'            => 'Artistic 2.0',
                   'Artistic-2.0 | GPL-2 + file LICENSE'    => 'GPLv2 or Artistic',
                   'Artistic-2.0 | file LICENSE'            => 'Artistic 2.0',
                   'BSD'                                    => 'BSD',
                   'BSD_2_clause'                           => 'BSD Protection',
                   'BSD_2_clause + file LICENSE'            => 'BSD Protection',
                   'BSD_3_clause'                           => 'BSD Protection',
                   'BSD_3_clause + file LICENSE'            => 'BSD Protection',
                   'CC BY-NC 3.0'                           => 'CC-BY-NC 3.0',
                   'CC BY-NC-ND 4.0'                        => 'CC-BY-NC-ND 4.0',
                   'CC0'                                    => 'CC0',
                   'CPL'                                    => 'CPL',
                   'EPL'                                    => 'EPL',
                   'EPL (>= 1.0)'                           => 'EPL',
                   'FreeBSD | GPL-2 | file LICENSE'         => 'GPLv2',
                   'GPL'                                    => 'GPL+',
                   'GPL (== 2)'                             => 'GPLv2',
                   'GPL (> 2)'                              => 'GPLv3+',
                   'GPL (>=2)'                              => 'GPLv2+',
                   'GPL (>= 2)'                             => 'GPLv2+',
                   'GPL (>= 2) + file LICENSE'              => 'GPLv2+ with exceptions',
                   'GPL (>= 2.0)'                           => 'GPLv2+',
                   'GPL (>=2.0)'                            => 'GPLv2+',
                   'GPL (>= 2.1)'                           => 'GPLv2+',
                   'GPL (>= 2.14)'                          => 'GPLv2+',
                   'GPL (>= 3)'                             => 'GPLv3+',
                   'GPL (>= 3.0)'                           => 'GPLv3+',
                   'GPL (>=3.0)'                            => 'GPLv3+',
                   'GPL-2'                                  => 'GPLv2',
                   'GPL-2 + file LICENSE'                   => 'GPLv2 with exceptions',
                   'GPL-2 | GPL-3'                          => 'GPLv2+',
                   'GPL-2 | file LICENSE'                   => 'GPLv2 with exceptions',
                   'GPL (>= 2) | file LICENCE'              => 'GPLv2+ with exceptions',
                   'GPL-3'                                  => 'GPLv3',
                   'GPL-3 + file LICENSE'                   => 'GPLv3 with exceptions',
                   'GPL-3 | file LICENSE'                   => 'GPLv3 with exceptions',
                   'LGPL'                                   => 'LGPLv2',
                   'LGPL (>= 2)'                            => 'LGPLv2+',
                   'LGPL (>= 2.0)'                          => 'LGPLv2+',
                   'LGPL (>= 2.1)'                          => 'LGPLv2+',
                   'LGPL (>= 2.1) | file LICENSE'           => 'LGPLv2+ with exceptions',
                   'LGPL (>= 3)'                            => 'LGPLv3+',
                   'LGPL (>= 3.0)'                          => 'LGPLv3+',
                   'LGPL-2'                                 => 'LGPLv2',
                   'LGPL-2.1'                               => 'LGPLv2+',
                   'LGPL-3'                                 => 'LGPLv3',
                   'LGPL-3 | Apache License 2.0'            => 'LGPLv3 and AGPLv2',
                   'MIT'                                    => 'MIT',
                   'MIT + file LICENSE'                     => 'MIT with advertising',
                   'Unlimited'                              => 'Public Use',
                   'file LICENSE'                           => '',
                  );

    return $license{$string} || 'UNKNOWN';
}

sub get_R_base_package {
    my ($package) = @_;

    #FIXME This list is hardcoded, from Fedora R spec file!!!
    my %base = ('base'          => 1,
                #'boot'          => 1,
                #'class'         => 1,
                #'cluster'       => 1,
                #'codetools'     => 1,
                'compiler'      => 1,
                'datasets'      => 1,
                #'foreign'       => 1,
                'graphics'      => 1,
                'grDevices'     => 1,
                'grid'          => 1,
                #'KernSmooth'    => 1,
                #'lattice'       => 1,
                #'MASS'          => 1,
                #'Matrix'        => 1,
                'methods'       => 1,
                #'mgcv'          => 1,
                #'nlme'          => 1,
                #'nnet'          => 1,
                'parallel'      => 1,
                #'rpart'         => 1,
                #'spatial'       => 1,
                'splines'       => 1,
                'stats'         => 1,
                'stats4'        => 1,
                #'survival'      => 1,
                'tcltk'         => 1,
                'tools'         => 1,
                'translations'  => 1,
                'utils'         => 1,
               );

    return exists $base{$package} ? 1 : 0;
}

sub get_template {
    my ($Version, $Summary, $License, $Description, $Source, $original_version, $deps) = @_;

    my $Name    = $Source eq 'bioc'            ? "R$R_package-Bioconductor-%{_realname}"
                : $Source eq 'cran'            ? "R$R_package-%{_realname}"
                : $Source eq 'data-annotation' ? "R$R_package-Bioconductor-%{_realname}"
                : $Source eq 'data-experiment' ? "R$R_package-Bioconductor-%{_realname}"
                : $Source eq 'extra'           ? "R$R_package-Bioconductor-%{_realname}"
                : $Source eq 'LOCAL'           ? "R$R_package-%{_realname}"
                :                                '';

    my $URL     = $Source eq 'bioc'            ? 'http://www.bioconductor.org/packages/__RELEASE__/bioc/html/%{_realname}.html'
                : $Source eq 'cran'            ? 'https://cran.r-project.org/package=%{_realname}'
                : $Source eq 'data-annotation' ? 'http://www.bioconductor.org/packages/__RELEASE__/data/annotation/html/%{_realname}.html'
                : $Source eq 'data-experiment' ? 'http://www.bioconductor.org/packages/__RELEASE__/data/experiment/html/%{_realname}.html'
                : $Source eq 'extra'           ? 'http://www.bioconductor.org/packages/__RELEASE__/extra/html/%{_realname}.html'
                : $Source eq 'LOCAL'           ? 'TODO:LOCAL'
                :                                '';

    my $Source0 = $Source eq 'bioc'            ? 'http://www.bioconductor.org/packages/__RELEASE__/bioc/src/contrib/%{_realname}_%{version}.tar.gz'
                : $Source eq 'cran'            ? "https://cran.r-project.org/src/contrib/${package}_$original_version.tar.gz"
                : $Source eq 'data-annotation' ? 'http://www.bioconductor.org/packages/__RELEASE__/data/annotation/src/contrib/%{_realname}_%{version}.tar.gz'
                : $Source eq 'data-experiment' ? 'http://www.bioconductor.org/packages/__RELEASE__/data/experiment/src/contrib/%{_realname}_%{version}.tar.gz'
                : $Source eq 'extra'           ? 'http://www.bioconductor.org/packages/__RELEASE__/extra/src/contrib/%{_realname}_%{version}.tar.gz'
                : $Source eq 'LOCAL'           ? 'TODO:LOCAL'
                :                                '';

    $URL     =~ s{__RELEASE__}{$B_version};
    $Source0 =~ s{__RELEASE__}{$B_version};

    # Dependencies
    my $DepsB = '';
    my $DepsR = '';
    for my $dep ( split(/,\s*/, $deps) ){
        my $is_R = 0;
        my ($depsB, $depsR) = ('', '');
        next  if ( $dep eq '' );
        if ( $dep =~ /[<>=]/ ){ # has version restriction
            $dep =~ s{(\S)\(}{$1 (}; # For cases like R(>= 2.10)
            my @field = split(/\s+/, $dep);
            $is_R = 1  if ( $field[0] eq 'R' ); # If R is mentioned it is associated with a version
            $is_R = 2  if ( get_R_base_package($field[0]) );
            my $ori_version   = $field[-1];
            $ori_version     =~ s{\)}{}g;
            my $fixed_version = $ori_version;
            $fixed_version   =~ s{\-}{\.}g;
            $dep =~ s{[\(\)]}{}g;
            $dep =~ s{ $ori_version}{ $fixed_version}g;
        }

        if ( $is_R==1 ){
            $depsB .= "BuildRequires:  $dep\n";
            $depsR .= "Requires:       $dep\n";
        }
        elsif ( get_R_base_package($dep) || $is_R==2 ){
            $depsB .= "BuildRequires:  R$R_package-$dep\n";
            $depsR .= "Requires:       R$R_package-$dep\n";
        }
        # Distinct between CRAN (e.g. R311-XML) and Bioconductor packages (e.g. R311-Bioconductor-Biobase)
        else {
            my @field = split(' ', $dep);
            if ( exists $bioc_packages{$field[0]} ){
                $depsB .= "BuildRequires:  R$R_package-Bioconductor-$dep\n";
                $depsR .= "Requires:       R$R_package-Bioconductor-$dep\n";
            }
            else {
                $depsB .= "BuildRequires:  R$R_package-$dep\n";
                $depsR .= "Requires:       R$R_package-$dep\n";
            }
        }
        my @part = split(/\s+/, $depsB);
        if ( scalar @part == 2 ){ # Without version dependencies
            $DepsB .= $depsB;
            $DepsR .= $depsR;
        }
        else {
            shift @part;
            # Align versions for better display
            $DepsB .= sprintf("%-15s %-47s %2s %s\n", 'BuildRequires:', @part);
            $DepsR .= sprintf("%-15s %-47s %2s %s\n", 'Requires:',      @part);
        }
    }

    # Log
    my $dt       = DateTime->now;
    my $Date     = $dt->day_abbr.' '.$dt->month_abbr.' '.$dt->day.' '.$dt->year;
    my $Packager = '';
    my $username  = `git config --get user.name`;  chomp $username;
    my $useremail = `git config --get user.email`; chomp $useremail;
    if ( $username ne '' || $useremail ne '' ){
        $Packager = "$username <$useremail>";
    }
    if ( $Packager eq '' && -e "$ENV{'HOME'}/.rpmmacros" ){
        my @field = map  { /^\%packager\s+(.+)$/; $1 }
                    grep { /^\%packager/i }
                    read_file("$ENV{'HOME'}/.rpmmacros", chomp => 1);
        $Packager = $field[0] || '';
    }


    # Build template
    my $template = "# Define R version, package prefix and category
%define _R_version      $R_version
%define _prefix         $R_path/R/%{_R_version}
%define category        Applications/R-BioC

%define         _realname  $package
Name:           $Name
Version:        $Version
Release:        1%{?dist}
Summary:        $Summary
Group:          %{category}
License:        $License
URL:            $URL
Source0:        $Source0
Prefix:         %{_prefix}

BuildRequires:  R                                                = %{_R_version}
BuildRequires:  R-core                                           = %{_R_version}
BuildRequires:  R-devel                                          = %{_R_version}
$DepsB

Requires:       R                                                = %{_R_version}
Requires:       R-core                                           = %{_R_version}
$DepsR

%description
$Description


%prep


%build


%install
# Special R trick to allow mock to write in ../R/library/ folder
mkdir -p  \$RPM_BUILD_ROOT%{_libdir}/R/library
export R_LIBS=\$RPM_BUILD_ROOT%{_libdir}/R/library
export LIBS=-ldl
$R_path/R CMD INSTALL  %{SOURCE0}

# Installation cleaning
rm -rf \$RPM_BUILD_ROOT%{_libdir}/R/library/R.css
find \$RPM_BUILD_ROOT%{_libdir}/R/library/ -type f -name COPYING -exec chmod -x {} \\;
find \$RPM_BUILD_ROOT%{_libdir}/R/library/ -type f -name INDEX   -exec chmod -x {} \\;


%files
%dir %{_libdir}/R/library/
%{_libdir}/R/library/%{_realname}


%changelog
* $Date $Packager - $Version-1
- First import";

    print $template, "\n";
    return;
}

