#!/usr/bin/perl -wT

#
# populate-medline-table-online.pl
# This script shows a basic INSERT INTO command
#

use strict;
use diagnostics;
use DBI;

use LWP::Simple;

# database connection
my $dbh = DBI->connect('DBI:Pg:dbname=local_medline', 'dewey', '')
  or die "Unable to make database connection: $DBI::errstr\n";

my $insert_sql = <<"EOSQL";
INSERT INTO refs
  (pubmed_id, authors, title, keywords, citation, address, abstract, comments, num_refs, pub_date)
VALUES
  (?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
EOSQL

my $sth = $dbh->prepare($insert_sql);

LOOP:
while (my $id = shift @ARGV) { 
  my $hr = get_record_by_id($id);

  # yes, $hr is a lousy variable name, but I have to type it alot later
  # it just stands for hashref

  unless (ref($hr) eq 'HASH') {
    print STDERR "Error with id $id: $hr\n";
    next LOOP;
  }
  $sth->execute($$hr{PMID}, $$hr{AU}, $$hr{TI}, $$hr{MH}, $$hr{SO},
                $$hr{AD}, $$hr{AB}, $$hr{CM}, $$hr{RF}, $$hr{DA})
    or print STDERR 'Problem inserting id $id: ', $sth->errstr, "\n"; 
}

$dbh->disconnect;

#---------------------
sub get_record_by_id {
#---------------------
  my ($uid) = @_;
  my %multiple_values = ( 'AU' => 1,
                          'MH' => 1,
                        );

  # FIXME- make sure $uid is a number...
  my $url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Text&db=PubMed&dopt=MEDLINE&uid=$uid";
  my $response = get($url);
  $response || return "No response from server";
  $response =~ s/\<\/*pre\>//gi;
  return "No Record" unless ($response =~ /^UI/);
  my @in_lines = split /\n/, $response;
  my ($line, $attr, %attr);
  while ($line = shift @in_lines) {
    if ($line =~ /^([A-Z]+)\s*\-\s+(.*)/) {
      $attr = $1, my $value = $2;
      if ($multiple_values{$attr}) {
        $attr{$attr} .= "$value\n";
      } else {
        $attr{$attr} = $value;
      }
    } elsif ($line =~ /^\s+(.*)/) {
      # continuation line
      $attr{$attr} .= " $1";
    }
  }
  return \%attr;
}
                     

syntax highlighted by Code2HTML, v. 0.8.11