Escidoc2ris.pl

From MPDLMediaWiki
Revision as of 13:03, 5 January 2011 by Kristina (talk | contribs) (→‎Input file example)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

The eSciDoc source file should be formatted like "field=data". Fields should be in the same order as in the eSciDoc specification. See #Input file example below.

Conversion script[edit]

open (RIS, "> ris.ris");
open (ESD, "escidoc.txt");

while(defined(my $esd=<ESD>)) {
	chomp($esd);
	if (substr ($esd,0,5) eq "Genre"){
		if (substr ($esd,6) eq "Article") {$ty = "JOUR"} #Changes to MGZN later if there's no source journal. Unfortunately, NEWS cannot be supported in this mapping.
		elsif (substr ($esd,6) eq "Book") {$ty = "BOOK"}
		elsif (substr ($esd,6) eq "Book Item") {$ty = "CHAP"}
		elsif (substr ($esd,6) eq "Proceedings") {$ty = "CONF"}
		elsif (substr ($esd,6) eq "Conference Paper") {$ty = "CHAP"}
		elsif (substr ($esd,6) eq "Poster") {$ty = "GEN"} #or ART? PAMP?
		elsif (substr ($esd,6) eq "Courseware/Lecture") {$ty = "GEN"}
		elsif (substr ($esd,6) eq "Thesis") {$ty = "THES"}
		elsif (substr ($esd,6) eq "Paper") {$ty = "GEN"}
		elsif (substr ($esd,6) eq "Report") {$ty = "RPRT"}
		elsif (substr ($esd,6) eq "Journal") {$ty = "JFULL"}
		elsif (substr ($esd,6) eq "Issue") {$ty = "GEN"} #or SER? JFULL?
		elsif (substr ($esd,6) eq "Series") {$ty = "SER"}
		elsif (substr ($esd,6) eq "Manuscript") {$ty = "UNPB"}
		elsif (substr ($esd,6) eq "Other") {$ty = "GEN"}
	}

	if (substr ($esd,0,19) eq "Creator.CreatorRole"){$creatorrole=substr($esd,20)}
	if (substr ($esd,0,27) =~ /Creator.Person.CompleteName/ and $creatorrole =~ /Author/){push (@au,substr($esd,28))}#to do: GivenName and FamilyName
	if (substr ($esd,0,27) =~ /Creator.Person.CompleteName/ and $creatorrole =~ /Artist|Editor|Painter|Photographer|Illustrator|Commentator/){push(@maybeauthor,substr($esd,28)." ($creatorrole)")}#problem: Editor becomes Author when no Author is given.
	if (substr ($esd,0,27) =~ /Creator.Person.CompleteName/ and $creatorrole =~ /Advisor|Transcriber|Translator|Contributor/){push(@ed,substr($esd,28)." ($creatorrole)")}
	if (substr ($esd,0,25) =~ /Creator.Organization.Name/ and $creatorrole =~ /Author/){push (@au,substr($esd,26))}
	if (substr ($esd,0,25) =~ /Creator.Organization.Name/ and $creatorrole =~ /Artist|Editor|Painter|Photographer|Illustrator|Commentator/){push(@maybeauthor,substr($esd,26)." ($creatorrole)")}
	if (substr ($esd,0,25) =~ /Creator.Organization.Name/ and $creatorrole =~ /Advisor|Transcriber|Translator|Contributor/){push(@ed,substr($esd,26)." ($creatorrole)")}
	if (substr ($esd,0,28) =~ /Creator.Organization.Address/){push(@ad,substr($esd,29))}
	if (substr ($esd,0,32) =~ /Creator.Person.Organization.Name/){push(@ad,substr($esd,33))}
	if (substr ($esd,0,35) =~ /Creator.Person.Organization.Address/){push(@ad,substr($esd,36))}
	if (substr ($esd,0,5) eq "Title") {$ti=substr($esd,6)}
	if (substr ($esd,0,8) eq "Language") {push (@n1,"Language: ".substr($esd,9))}
	if (substr ($esd,0,16) eq "AlternativeTitle") {$t2=substr($esd,17)}
	if (substr ($esd,0,14) eq "Identifier.Id=") {
		if ($id eq "") {$id=substr($esd,14)}
		else {$identifier=substr($esd,14)} #Only the first Identifier is used for the ID field to prevent multiple ID fields; all others are later mapped to N1. Note that neither EndNote nor Reference Manager support ID. JabRef maps ID to non-standard BibTeX field "refid".
	}
	if (substr ($esd,0,18) eq "Identifier.IdType=") {
		if ($id ne "" and $firsttype!=1) {$firsttype=1}
		elsif ($id ne "" and $firsttype==1) {
			$idtype=substr($esd,18);
			if ($idtype eq "Other") {$idtype = "Identifier"}
			push(@n1,"$idtype: $identifier");
		}
		
	}
	if (substr ($esd,0,25) eq "PublishingInfo.Publisher=") {$pb=substr($esd,25)}
	if (substr ($esd,0,21) eq "PublishingInfo.Place=") {$cy=substr($esd,21)}
	if (substr ($esd,0,23) eq "PublishingInfo.Edition=") {$vl=substr($esd,23)} #If there's a "real" volume number, $vl gets overwritten later
	if (substr ($esd,0,10) eq "Date.Date=") {push(@maybepy,substr($esd,10))}
	if (substr ($esd,0,14) eq "Date.DateType=") {push(@datetype,substr($esd,14))}
	if (substr ($esd,0,13) eq "ReviewMethod=") {push(@n1,"Review method: ".substr($esd,13))}
	if (substr ($esd,0,13) eq "Source.Genre=") {
		$sourcegenre=substr($esd,13);
		if ($ty eq "JOUR" and $sourcegenre ne "Journal"){$ty="MGZN"}
	}
	if (substr ($esd,0,13) eq "Source.Title=" and $ty=~/JOUR|MGZN|RPRT|GEN|NEWS/) {
		$jo=substr($esd,13);
		if ($t2 ne "") {
			push(@n1,"Alternative title: ".$t2);
			$t2="";
		}#To prevent JO from being overwritten with T2 in EndNote, T2 has to be deleted here.
	}
	if (substr ($esd,0,13) eq "Source.Title=" and $ty=~/BOOK|CONF|THES|UNPB|JFULL|SER/) {$t3=substr($esd,13)}
	if (substr ($esd,0,13) eq "Source.Title=" and $ty=~/CHAP/) {$t2=substr($esd,13)}#Except for book sections, T2 should not be used for Source.Title data because it can also be used for alternative titles.
	if (substr ($esd,0,35) eq "Source.Creator.Person.CompleteName=" and $ty=~/BOOK|CONF|THES|UNPB|JFULL|SER/) {push (@a3,substr($esd,35))}#to do: add GivenName / FamilyName and Organization.Name
	if (substr ($esd,0,35) eq "Source.Creator.Person.CompleteName=" and $ty!=~/BOOK|CONF|THES|UNPB|JFULL|SER/) {push (@ed,substr($esd,35))}#to do: add GivenName / FamilyName and Organization.Name
	if (substr ($esd,0,14) eq "Source.Volume=") {
		if ($vl ne ""){push(@n1,"Edition: $vl")};
		$vl=substr($esd,14);
	}
	if (substr ($esd,0,13) eq "Source.Issue=") {$is=substr($esd,13)}
	if (substr ($esd,0,17) eq "Source.StartPage=") {$sp=substr($esd,17)}
	if (substr ($esd,0,15) eq "Source.EndPage=") {$ep=substr($esd,15)}
	if (substr ($esd,0,22) eq "Source.SequenceNumber=") {push(@n1,"Sequence number: ".substr($esd,22))}
	if (substr ($esd,0,32) eq "Source.PublishingInfo.Publisher=" and $pb eq "") {$pb=substr($esd,32)}
	if (substr ($esd,0,28) eq "Source.PublishingInfo.Place=" and $cy eq "") {$cy=substr($esd,28)}
	if (substr ($esd,0,30) eq "Source.PublishingInfo.Edition=" and $vl eq "") {$vl=substr($esd,30)}
	if (substr ($esd,0,21) eq "Source.Identifier.Id=") {$identifier=substr($esd,21)}
	if (substr ($esd,0,25) eq "Source.Identifier.IdType=") {
		$idtype=substr($esd,25);
		if ($idtype eq "Other"){$idtype="Identifier"}
		if ($idtype =~ /ISSN|ISBN/ and $sn eq ""){$sn=$identifier}
		else {push(@n1,"$idtype: $identifier")}
	}
	if (substr ($esd,0,20) eq "Source.Source.Title=") {#This can only be a Series.
		if ($t3 eq ""){$t3=substr($esd,20)}
		else {push(@n1,substr($esd,20))}
	}
	if (substr ($esd,0,42) eq "Source.Source.Creator.Person.CompleteName=" and $t3 eq ""){push(@a3,substr($esd,42))}#to do: again, add GivenName/FamilyName and Organization.Name
	if (substr ($esd,0,12) eq "Event.Title=") {push(@n1,substr($esd,12))} #to do: concatenation with other Event fields
	if (substr ($esd,0,19) eq "TotalNumberOfPages=" and $sp eq "") {$sp=substr($esd,19)}
	if (substr ($esd,0,7) eq "Degree=") {$m1=substr($esd,7)} #used by EndNote and Reference Manager
	if (substr ($esd,0,9) eq "Abstract=") {$n2=substr($esd,9)}
	if (substr ($esd,0,8) eq "Subject=") {@kw=split(/;/,substr($esd,8))}
	if (substr ($esd,0,16) eq "TableOfContents=") {push(@n1,"Table of contents: ".substr($esd,16))}
	if (substr ($esd,0,9) eq "Location=") {$av="Location: ".substr($esd,9)}
}

close (ESD);

print RIS "TY  - $ty\n";
print RIS "ID  - $id\n";
for (my $zaehler = 0; $zaehler < @maybepy;$zaehler++){
	if ($datetype[$zaehler] =~ /published in print/){$py=$maybepy[$zaehler]}#to do: check with order in the OpenURL table in eSciDoc specification; map other dates with DateType as prefix to N1
}
if ($py eq "") {$py = $maybepy[0]}
print RIS "PY  - $py\n";
print RIS "Y2  -\n"; #empty - Y2 is usually used for access dates (not supported in eSciDoc)


for (my $zaehler = 0; $zaehler < @au;$zaehler++){
	print RIS "AU  - $au[$zaehler]\n";
}
for (my $zaehler = 0; $zaehler < @maybeauthor;$zaehler++){
	if ($au[0] eq "") {print RIS "AU  - $maybeauthor[$zaehler]\n"}
	else {print RIS "ED  - $maybeauthor[$zaehler]\n"}
}
for (my $zaehler = 0; $zaehler < @ed;$zaehler++){
	print RIS "ED  - $ed[$zaehler]\n";
}
for (my $zaehler = 0; $zaehler < @a3;$zaehler++){
	print RIS "A3  - $a3[$zaehler]\n";
}
print RIS "TI  - $ti\n";
print RIS "T2  - $t2\n";
print RIS "T3  - $t3\n";
print RIS "AD  - ";
for (my $zaehler = 0; $zaehler < @ad;$zaehler++){
	if($zaehler>0){print RIS "; $ad[$zaehler]"}
	else{print RIS "$ad[$zaehler]"}
}
print RIS "\n";
print RIS "PB  - $pb\n";
print RIS "CY  - $cy\n";
print RIS "JO  - $jo\n";
print RIS "VL  - $vl\n";
print RIS "IS  - $is\n";
print RIS "SP  - $sp\n";
print RIS "EP  - $ep\n";
print RIS "SN  - $sn\n";
for (my $zaehler = 0; $zaehler < @kw;$zaehler++){
	print RIS "KW  - $kw[$zaehler]\n";
}
print RIS "AV  - $av\n";
print RIS "M1  - $m1\n";
print RIS "N1  - ";
for (my $zaehler = 0; $zaehler < @n1;$zaehler++){
	if($zaehler>0){print RIS "; $n1[$zaehler]"}
	else{print RIS "$n1[$zaehler]"}
}
print RIS "\n";
print RIS "N2  - $n2\n";
print RIS "ER  -";

close (RIS);

Input file example[edit]

Example of an escidoc.txt input file that works with the above script:

Genre=Thesis
Creator.CreatorRole=Author
Creator.Person.CompleteName=Bowman,B.F
Creator.Person.Organization.Name=Max-Planck-Institut für Biochemie
Creator.Person.Organization.Address=Martinsried
Creator.CreatorRole=Advisor
Creator.Person.CompleteName=Ziegler,H
Creator.Person.Title=Prof. Dr.
Creator.CreatorRole=Advisor
Creator.Person.CompleteName=Schleifer,K.H
Creator.Person.Title=Prof. Dr.
Creator.CreatorRole=Advisor
Creator.Person.CompleteName=Hofschneider,P.H
Creator.Person.Title=Prof. Dr. Dr.
Title=Die Entwicklung und Verwendung eines zellfreien Systems aus Weizenkeimen zur Translation viraler RNA
Language=de
PublishingInfo.Place=Martinsried
Date.Date=1978/12/18/
Date.DateType=submitted
Date.Date=1979/01/10/
Date.DateType=accepted
Date.Date=1979
Date.DateType=published in print
ReviewMethod=internal
Event.Name=Promotion
TotalNumberOfPages=127
Degree=phd
Abstract=In der vorliegenden Arbeit wird...
Subject=Weizen;Translation;RNA
TableOfContents=ZUSAMMENFASSUNG	1; Abkürzungen	2; A. Einleitung	4; ...
Location=T-21