Revision as of 13:41, 14 September 2007

Import Plugin Tutorial 1: CSV

Before You Start

It is sensible to separate the plugins you create for EPrints from those included with it. Create a directory for your import plugins in the main plugin directory (usually /opt/eprints3/perl_lib/EPrints/Plugin/import) for example /opt/eprints3/perl_lib/EPrints/Plugin/import/MyPlugins.

To prepare for this tutorial you should install the Text::CSV module. The following command as root, or using sudo should work.

cpan Text::CSV

CSV.pm

package EPrints::Plugin::Import::MyPlugins::CSV;

use EPrints::Plugin::Import::TextFile;
use strict;

our @ISA = ('EPrints::Plugin::Import::TextFile');

sub new
{
        my( $class, %params ) = @_;

        my $self = $class->SUPER::new( %params );

        $self->{name} = 'CSV';
        $self->{visible} = 'all';
        $self->{produce} = [ 'list/eprint' ];

        my $rc = EPrints::Utils::require_if_exists('Text::CSV');
        unless( $rc )
        {
                $self->{visible} = '';
                $self->{error} = 'Failed to load required module Text::CSV';
        }

        return $self;
}

sub input_fh
{
        my( $plugin, %opts ) = @_;
        my @ids;
        my $fh = $opts{fh};
        my $csv = Text::CSV->new();
        my @records = <$fh>;
        my @fields;

        if ($csv->parse(shift @records))
        {
                @fields = $csv->fields();
        }
        else
        {
                $plugin->error($csv->error_input);
                return undef;
        }

        foreach my $row (@records)
        {
                my @input_data = (join(',',@fields),$row);

                my $epdata = $plugin->convert_input(\@input_data);
                next unless defined $epdata;

                my $dataobj = $plugin->epdata_to_dataobj($opts{dataset},$epdata);
                if( defined $dataobj )
                {
                        push @ids, $dataobj->get_id;
                }
        }

        return EPrints::List->new(
                        dataset => $opts{dataset},
                        session => $plugin->{session},
                        ids=>\@ids );
}

sub convert_input
{
        my $plugin = shift;
        my @input = @{shift @_};
        my $csv = Text::CSV->new();

        my @record;
        if ($csv->parse($input[1]))
        {
                @record = $csv->fields();
        }
        else
        {
                $plugin->error($csv->error_input);
                return undef;
        }

        my @fields = split(',',$input[0]);
        #Check length of row
        if (scalar @fields != scalar @record)
        {
                $plugin->warning('Row length mismatch');
                return undef;
        }

        my %output = ();

        my $dataset = $plugin->{session}->{repository}->get_dataset('archive');

        my $i = 0;
        foreach my $field (@fields)
        {
                #Check field exists
                unless ($dataset->has_field($field))
                {
                        $i++;
                        next;
                }

                my $metafield = $dataset->get_field($field);
                #Check for multiple
                if ($metafield->get_property('multiple'))
                {
                        #Split
                        my @values = split(';',$record[$i]);

                        #Check for name
                        if ($metafield->{type} eq 'name')
                        {
                                my @names = ();

                                foreach my $value (@values)
                                {
                                        my $name = $value;

                                        next unless ($value =~ /^(.*?),(.*?)(,(.*?))?$/);
                                        push @names, { family => $1, given => $2, lineage => $4 };
                                }

                                $output{$field} = \@names;
                        }
                        else
                        {
                                $output{$field} = \@values;
                        }
                }
                else
                {
                        $output{$field} = $record[$i];
                }
                $i++;
        }
        return \%output;
}

1;