#!/usr/bin/perl
# #
# Software subject to following license(s):
#   Apache 2 License (http://www.opensource.org/licenses/apache2.0)
#   Copyright (c) Responsible Organization
#

# #
# Current developer(s):
#   Luis Fernando Muñoz Mejías <Luis.Munoz@UGent.be>
#

# 
# #
# ccm, 26.2.0, 1, Tue Apr 07 2026
#

use strict;
use warnings;

BEGIN {
    unshift(@INC, '/usr/lib/perl');
    pop @INC if $INC[-1] eq '.';
}

use Getopt::Long;
use Digest::MD5 qw(md5_hex);
use Errno qw(ESRCH EPERM);
use File::Path;
use EDG::WP4::CCM::CCfg qw(initCfg);
use EDG::WP4::CCM::CacheManager::DB qw(read_db close_db);
use EDG::WP4::CCM::Fetch::Download;
use LC::Stat qw(:ST);

#
# Global Variables
#
# $CacheDir     cache directory path
# $DataDir      data subdirectory path
# $TimeAging    time limit for not used profiles and temp files
# $CurrentCID   content of current.cid file
# $LatestCID    content of latest.cid file
# @Profile      all the available profiles
# %NonactiveURL list of non active URLs
# $Tolerant     strict or tolerant behaviour to errors
# $keepCID      oldest CID to keep
#

my ($CacheDir, $DataDir, $TmpDir);
my ($TimeAging, $KeepCID);
my ($CurrentCID, $LatestCID);
my (@Profile);
my (%NonactiveURL);
my ($Tolerant);

# Perl warnings could be transformed into fatal errors
$SIG{__WARN__} = sub { $Tolerant ? warn $_[0] :  die $_[0] };

sub Warn
{
    my $msg = $_[0];
    my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time);
    $msg = sprintf("%04d/%02d/%02d-%02d:%02d:%02d [WARN] %s",
                   $year+1900, $mon+1, $mday, $hour, $min, $sec,$msg);
    print STDERR $msg . "\n";
}


sub Info
{
    my $msg = shift;
    my ($sec, $min, $hour, $mday, $mon, $year) = localtime(time);
    $msg = sprintf("%04d/%02d/%02d-%02d:%02d:%02d [INFO] %s",
                   $year+1900, $mon+1, $mday, $hour, $min, $sec,$msg);
    print "$msg\n";
}


#
# read_cid
#
# Read from file a configuration ID.
#
# Open the file given as parameter and read a profile CID number,
# checking the proper CID format.
#
sub read_cid
{
    my ($filename) = @_;
    my ($id);

    open(my $FILE , '<', $filename) or die("$filename open failed: $!");
    $id = join("", <$FILE>);
    $id =~ /^(\d+)\n$/ or die("$filename unknown CID format");
    $id = $1;
    close($FILE);

    return($id);
}

#
# init
#
# Initialise global variables.
#
# The init() function does the following:
#   - process command line options
#   - get current and latest CID
#   - get list of available profiles
#   - get list of cached URLs
#
sub init
{
    my (@url_list);
    my ($ccm_conf);

    # Process command line options
    my $usage = "$0 [--time seconds] [--conf ccm.conf] [--tolerant]";

    $ccm_conf = "";
    $Tolerant  = 0;
    my $help = 0;
    GetOptions('config=s'  => \$ccm_conf,
               'time=i'    => \$TimeAging,
               'tolerant'  => \$Tolerant,
               'help'      => \$help,
        ) or die("error processing command line: $!\n $usage");

    if (!$ccm_conf) {
        initCfg();
    } else {
        initCfg($ccm_conf);
    }

    $TimeAging = EDG::WP4::CCM::CCfg::getCfgValue("purge_time") if (!defined($TimeAging));
    die("Usage: $usage\n") if ($help);

    $CacheDir = EDG::WP4::CCM::CCfg::getCfgValue("cache_root");
    my $numKeep = EDG::WP4::CCM::CCfg::getCfgValue("keep_old");

    $DataDir = $CacheDir . "/data";
    $TmpDir = $CacheDir . "/tmp";

    # Get current and latest CID

    $CurrentCID = read_cid("${CacheDir}/current.cid");
    $LatestCID  = read_cid("${CacheDir}/latest.cid");
    $KeepCID    = $CurrentCID - $numKeep;

    # Get list of configuration profiles

    my $DIR;
    opendir($DIR , $CacheDir) or die("$CacheDir open failed: $!");
    @Profile = grep (/profile\.\d+/, readdir($DIR));
    close($DIR);

    # Get list of cached URLs in data directory

    opendir($DIR , $DataDir) or die("$DataDir open failed: $!");
    @url_list = grep { $_ ne '.' and $_ ne '..' } readdir($DIR);
    @NonactiveURL{@url_list} = ("") x @url_list;
    close($DIR);

    return;
}

#
# clean_profile
#
# The clean_profile() funtion, given a profile name, removes
# all dead active.pid files.
# For each active.pid file, it checks if the processes is
# running, and if not, remove the file.
#
sub clean_profile
{
    my ($profile) = @_;

    my $DIR;
    if( !opendir($DIR , "${CacheDir}/$profile") ) {
        Warn("${CacheDir}/$profile open failed: $!");
        return;
    }

    while (my $file = readdir($DIR)) {

        next unless ($file =~ /ccm-active-$profile-(\d+)/);
        my $active_pid = $1;

        if( kill (0 => $active_pid) ) {
            # the process is alive
            Info("process ${active_pid} holding CID=$profile is still alive, not cleaning up");
            next;
        } elsif ( $! == EPERM ) {
            # it is alive but I am not the owner, that's ok
            Warn("process is not owned by me: ${active_pid}, cannot clean up");
            next;
        } elsif ( $! == ESRCH ) {
            # the process has died
            Info("process ${active_pid} holding CCM cache CID=$profile has died without removing active flag, cleaning up");
            $file = "${CacheDir}/${profile}/ccm-active-$profile-$active_pid";
            unlink($file) or Warn("error unlinking file $file: $!");
        } else {
            # this is an error
            Warn("error signaling pid ${active_pid}: $!");
        }

    }

    close($DIR);

    return;
}

#
# num_active_pids
#
# Given a profile, return the number of active.pid files
#
sub num_active_pids
{
    my ($profile) = @_;
    my (@files, $n_files);

    opendir(my $DIR , "${CacheDir}/$profile")
                  or die("${CacheDir}/$profile open failed $!");
    @files = grep (/ccm-active-profile/, readdir($DIR));
    $n_files = @files;
    close($DIR);

    return($n_files);
}

#
# read_url
#
# Read from file an URL.
#
sub read_url
{
    my ($filename) = @_;
    my $url;

    open(my $FILE, '<', $filename) or die("$filename failed to open $!");
    $url = <$FILE>;
    chomp($url) if ($url);
    close($FILE);

    return($url);
}

#
# save_active_urls
#
# Delete from NonactiveURL hash those URLs used by the profile.
#
# The URL to be deleted are:
#   - URL stored at profile.url
#   - all URLs stored in eid2data.db recods of type 'fetch'
#
sub save_active_urls
{
    my ($profile) = @_;

    # remove from NonactiveURL hash those URLs contained
    # in files profile.url

    my $url = read_url("${CacheDir}/${profile}/profile.url");
    my $encoded_url = EDG::WP4::CCM::Fetch::Download->EncodeURL($url);
    delete($NonactiveURL{$encoded_url});

    # search URLs in fetch porperties
    my %hash;
    my $fn = "${CacheDir}/${profile}/eid2data";
    my $readErr = read_db(\%hash, $fn);
    die("Failed to read eid2data: $readErr") if defined($readErr);

    foreach my $key ( keys(%hash) ) {
        my $ukey = unpack("L", $key);
        next unless ($ukey & 0x10000000);
        next unless ($hash{$key} eq 'fetch');
        $ukey = $ukey & 0xEFFFFFFF;
        $key = pack("L", $ukey);
        $url = $hash{$key};
        my $encoded_url = EDG::WP4::CCM::Fetch::Download->EncodeURL($url);
        delete( $NonactiveURL{$encoded_url} );
    }

    close_db($fn);
    return;
}

#
# clean_temp_dir
#
# Clean up temporary directory
#
sub clean_temp_dir
{
    my ($mod_time, $curr_time);

    $curr_time = time();

    opendir(my $DIR , "$TmpDir") or die("$TmpDir open failed $!");

    while (my $dir = readdir($DIR)) {
        next if ($dir eq "." or $dir eq "..");
        $mod_time = (stat("$TmpDir/$dir"))[ST_MTIME];
        if ($mod_time + $TimeAging >= $curr_time ) {
            next;
        }
        if( !rmtree("$TmpDir/$dir")  ) {
            Warn("failed to remove $TmpDir/$dir: $1");
        }
    }

    close($DIR);
    return;
}

#
# main
#
# Purge main() algorithm
#
# Remove all dead active.pid files of every profile.
# If a profile is not the current or latest profiles, and if it is not
# active, and the last time it was modified is older than TimeAging,
# then remove the profile.
# Finally, remove unused URLs files, and temporary files.
#
sub main
{
    foreach my $profile (@Profile) {

        $profile =~ /profile\.(\d+)/;
        my $prof_cid = $1;

        my $mod_time = (stat("$CacheDir/$profile"))[ST_MTIME];

        clean_profile($profile);

        my $curr_time = time();

        if (($prof_cid eq $CurrentCID)
                or ($prof_cid eq $LatestCID)
                or (int($prof_cid) >= $KeepCID)
                or num_active_pids($profile)
                or ($mod_time + $TimeAging >= $curr_time)) {
            save_active_urls($profile);
            next;
        }

        if( !rmtree("$CacheDir/$profile")  ) {
            Warn("cannot remove directory $CacheDir/$profile: $!");
        }

    }

    foreach my $url (keys(%NonactiveURL)) {
        unlink( "$DataDir/$url" )
            or Warn("error unlinking file $DataDir/$url: $!");
    }

    clean_temp_dir();

    return;
}

init();
main();
exit(0);

=head1 NAME

ccm-purge - Clean up the configuration cache directory

=head1 DESCRIPTION

This program cleans up the configuration cache directory
removing unused configuration profiles, temporary files, and
unused cached files.

=head1 SYNOPSIS

ccm-purge --time time_aging [--config ccm.conf] [--tolerant]

=head1 OPTIONS

=over

=item B<--time>

the maximun time (in seconds) before to remove a profile that
it is not in use and files from tmp directory

=item B<--config>

the absolute PATH of configuration file of the Configuration
Cache Client

=item B<--tolerant>

Controls how tolerant is the program to errors, the default behaviour
is strict, that means that the program will stop whenever an error is
found, if this option is specified, the program will continue even if
small errors are detected

=back

=cut
