#!/usr/bin/perl -wT
#
# populate-medline-table-online.pl
# This script shows a basic INSERT INTO command
#
use strict;
use diagnostics;
use DBI;
use LWP::Simple;
# database connection
my $dbh = DBI->connect('DBI:Pg:dbname=local_medline', 'dewey', '')
or die "Unable to make database connection: $DBI::errstr\n";
my $insert_sql = <<"EOSQL";
INSERT INTO refs
(pubmed_id, authors, title, keywords, citation, address, abstract, comments, num_refs, pub_date)
VALUES
(?, ?, ?, ?, ?, ?, ?, ?, ?, ?);
EOSQL
my $sth = $dbh->prepare($insert_sql);
LOOP:
while (my $id = shift @ARGV) {
my $hr = get_record_by_id($id);
# yes, $hr is a lousy variable name, but I have to type it alot later
# it just stands for hashref
unless (ref($hr) eq 'HASH') {
print STDERR "Error with id $id: $hr\n";
next LOOP;
}
$sth->execute($$hr{PMID}, $$hr{AU}, $$hr{TI}, $$hr{MH}, $$hr{SO},
$$hr{AD}, $$hr{AB}, $$hr{CM}, $$hr{RF}, $$hr{DA})
or print STDERR 'Problem inserting id $id: ', $sth->errstr, "\n";
}
$dbh->disconnect;
#---------------------
sub get_record_by_id {
#---------------------
my ($uid) = @_;
my %multiple_values = ( 'AU' => 1,
'MH' => 1,
);
# FIXME- make sure $uid is a number...
my $url = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi?cmd=Text&db=PubMed&dopt=MEDLINE&uid=$uid";
my $response = get($url);
$response || return "No response from server";
$response =~ s/\<\/*pre\>//gi;
return "No Record" unless ($response =~ /^UI/);
my @in_lines = split /\n/, $response;
my ($line, $attr, %attr);
while ($line = shift @in_lines) {
if ($line =~ /^([A-Z]+)\s*\-\s+(.*)/) {
$attr = $1, my $value = $2;
if ($multiple_values{$attr}) {
$attr{$attr} .= "$value\n";
} else {
$attr{$attr} = $value;
}
} elsif ($line =~ /^\s+(.*)/) {
# continuation line
$attr{$attr} .= " $1";
}
}
return \%attr;
}
syntax highlighted by Code2HTML, v. 0.8.11