Escidoc2ris.pl
Jump to navigation
Jump to search
The eSciDoc source file should be formatted like "field=data". Fields should be in the same order as in the eSciDoc specification. See #Input file example below.
Conversion script[edit]
open (RIS, "> ris.ris"); open (ESD, "escidoc.txt"); while(defined(my $esd=<ESD>)) { chomp($esd); if (substr ($esd,0,5) eq "Genre"){ if (substr ($esd,6) eq "Article") {$ty = "JOUR"} #Changes to MGZN later if there's no source journal. Unfortunately, NEWS cannot be supported in this mapping. elsif (substr ($esd,6) eq "Book") {$ty = "BOOK"} elsif (substr ($esd,6) eq "Book Item") {$ty = "CHAP"} elsif (substr ($esd,6) eq "Proceedings") {$ty = "CONF"} elsif (substr ($esd,6) eq "Conference Paper") {$ty = "CHAP"} elsif (substr ($esd,6) eq "Poster") {$ty = "GEN"} #or ART? PAMP? elsif (substr ($esd,6) eq "Courseware/Lecture") {$ty = "GEN"} elsif (substr ($esd,6) eq "Thesis") {$ty = "THES"} elsif (substr ($esd,6) eq "Paper") {$ty = "GEN"} elsif (substr ($esd,6) eq "Report") {$ty = "RPRT"} elsif (substr ($esd,6) eq "Journal") {$ty = "JFULL"} elsif (substr ($esd,6) eq "Issue") {$ty = "GEN"} #or SER? JFULL? elsif (substr ($esd,6) eq "Series") {$ty = "SER"} elsif (substr ($esd,6) eq "Manuscript") {$ty = "UNPB"} elsif (substr ($esd,6) eq "Other") {$ty = "GEN"} } if (substr ($esd,0,19) eq "Creator.CreatorRole"){$creatorrole=substr($esd,20)} if (substr ($esd,0,27) =~ /Creator.Person.CompleteName/ and $creatorrole =~ /Author/){push (@au,substr($esd,28))}#to do: GivenName and FamilyName if (substr ($esd,0,27) =~ /Creator.Person.CompleteName/ and $creatorrole =~ /Artist|Editor|Painter|Photographer|Illustrator|Commentator/){push(@maybeauthor,substr($esd,28)." ($creatorrole)")}#problem: Editor becomes Author when no Author is given. if (substr ($esd,0,27) =~ /Creator.Person.CompleteName/ and $creatorrole =~ /Advisor|Transcriber|Translator|Contributor/){push(@ed,substr($esd,28)." ($creatorrole)")} if (substr ($esd,0,25) =~ /Creator.Organization.Name/ and $creatorrole =~ /Author/){push (@au,substr($esd,26))} if (substr ($esd,0,25) =~ /Creator.Organization.Name/ and $creatorrole =~ /Artist|Editor|Painter|Photographer|Illustrator|Commentator/){push(@maybeauthor,substr($esd,26)." ($creatorrole)")} if (substr ($esd,0,25) =~ /Creator.Organization.Name/ and $creatorrole =~ /Advisor|Transcriber|Translator|Contributor/){push(@ed,substr($esd,26)." ($creatorrole)")} if (substr ($esd,0,28) =~ /Creator.Organization.Address/){push(@ad,substr($esd,29))} if (substr ($esd,0,32) =~ /Creator.Person.Organization.Name/){push(@ad,substr($esd,33))} if (substr ($esd,0,35) =~ /Creator.Person.Organization.Address/){push(@ad,substr($esd,36))} if (substr ($esd,0,5) eq "Title") {$ti=substr($esd,6)} if (substr ($esd,0,8) eq "Language") {push (@n1,"Language: ".substr($esd,9))} if (substr ($esd,0,16) eq "AlternativeTitle") {$t2=substr($esd,17)} if (substr ($esd,0,14) eq "Identifier.Id=") { if ($id eq "") {$id=substr($esd,14)} else {$identifier=substr($esd,14)} #Only the first Identifier is used for the ID field to prevent multiple ID fields; all others are later mapped to N1. Note that neither EndNote nor Reference Manager support ID. JabRef maps ID to non-standard BibTeX field "refid". } if (substr ($esd,0,18) eq "Identifier.IdType=") { if ($id ne "" and $firsttype!=1) {$firsttype=1} elsif ($id ne "" and $firsttype==1) { $idtype=substr($esd,18); if ($idtype eq "Other") {$idtype = "Identifier"} push(@n1,"$idtype: $identifier"); } } if (substr ($esd,0,25) eq "PublishingInfo.Publisher=") {$pb=substr($esd,25)} if (substr ($esd,0,21) eq "PublishingInfo.Place=") {$cy=substr($esd,21)} if (substr ($esd,0,23) eq "PublishingInfo.Edition=") {$vl=substr($esd,23)} #If there's a "real" volume number, $vl gets overwritten later if (substr ($esd,0,10) eq "Date.Date=") {push(@maybepy,substr($esd,10))} if (substr ($esd,0,14) eq "Date.DateType=") {push(@datetype,substr($esd,14))} if (substr ($esd,0,13) eq "ReviewMethod=") {push(@n1,"Review method: ".substr($esd,13))} if (substr ($esd,0,13) eq "Source.Genre=") { $sourcegenre=substr($esd,13); if ($ty eq "JOUR" and $sourcegenre ne "Journal"){$ty="MGZN"} } if (substr ($esd,0,13) eq "Source.Title=" and $ty=~/JOUR|MGZN|RPRT|GEN|NEWS/) { $jo=substr($esd,13); if ($t2 ne "") { push(@n1,"Alternative title: ".$t2); $t2=""; }#To prevent JO from being overwritten with T2 in EndNote, T2 has to be deleted here. } if (substr ($esd,0,13) eq "Source.Title=" and $ty=~/BOOK|CONF|THES|UNPB|JFULL|SER/) {$t3=substr($esd,13)} if (substr ($esd,0,13) eq "Source.Title=" and $ty=~/CHAP/) {$t2=substr($esd,13)}#Except for book sections, T2 should not be used for Source.Title data because it can also be used for alternative titles. if (substr ($esd,0,35) eq "Source.Creator.Person.CompleteName=" and $ty=~/BOOK|CONF|THES|UNPB|JFULL|SER/) {push (@a3,substr($esd,35))}#to do: add GivenName / FamilyName and Organization.Name if (substr ($esd,0,35) eq "Source.Creator.Person.CompleteName=" and $ty!=~/BOOK|CONF|THES|UNPB|JFULL|SER/) {push (@ed,substr($esd,35))}#to do: add GivenName / FamilyName and Organization.Name if (substr ($esd,0,14) eq "Source.Volume=") { if ($vl ne ""){push(@n1,"Edition: $vl")}; $vl=substr($esd,14); } if (substr ($esd,0,13) eq "Source.Issue=") {$is=substr($esd,13)} if (substr ($esd,0,17) eq "Source.StartPage=") {$sp=substr($esd,17)} if (substr ($esd,0,15) eq "Source.EndPage=") {$ep=substr($esd,15)} if (substr ($esd,0,22) eq "Source.SequenceNumber=") {push(@n1,"Sequence number: ".substr($esd,22))} if (substr ($esd,0,32) eq "Source.PublishingInfo.Publisher=" and $pb eq "") {$pb=substr($esd,32)} if (substr ($esd,0,28) eq "Source.PublishingInfo.Place=" and $cy eq "") {$cy=substr($esd,28)} if (substr ($esd,0,30) eq "Source.PublishingInfo.Edition=" and $vl eq "") {$vl=substr($esd,30)} if (substr ($esd,0,21) eq "Source.Identifier.Id=") {$identifier=substr($esd,21)} if (substr ($esd,0,25) eq "Source.Identifier.IdType=") { $idtype=substr($esd,25); if ($idtype eq "Other"){$idtype="Identifier"} if ($idtype =~ /ISSN|ISBN/ and $sn eq ""){$sn=$identifier} else {push(@n1,"$idtype: $identifier")} } if (substr ($esd,0,20) eq "Source.Source.Title=") {#This can only be a Series. if ($t3 eq ""){$t3=substr($esd,20)} else {push(@n1,substr($esd,20))} } if (substr ($esd,0,42) eq "Source.Source.Creator.Person.CompleteName=" and $t3 eq ""){push(@a3,substr($esd,42))}#to do: again, add GivenName/FamilyName and Organization.Name if (substr ($esd,0,12) eq "Event.Title=") {push(@n1,substr($esd,12))} #to do: concatenation with other Event fields if (substr ($esd,0,19) eq "TotalNumberOfPages=" and $sp eq "") {$sp=substr($esd,19)} if (substr ($esd,0,7) eq "Degree=") {$m1=substr($esd,7)} #used by EndNote and Reference Manager if (substr ($esd,0,9) eq "Abstract=") {$n2=substr($esd,9)} if (substr ($esd,0,8) eq "Subject=") {@kw=split(/;/,substr($esd,8))} if (substr ($esd,0,16) eq "TableOfContents=") {push(@n1,"Table of contents: ".substr($esd,16))} if (substr ($esd,0,9) eq "Location=") {$av="Location: ".substr($esd,9)} } close (ESD); print RIS "TY - $ty\n"; print RIS "ID - $id\n"; for (my $zaehler = 0; $zaehler < @maybepy;$zaehler++){ if ($datetype[$zaehler] =~ /published in print/){$py=$maybepy[$zaehler]}#to do: check with order in the OpenURL table in eSciDoc specification; map other dates with DateType as prefix to N1 } if ($py eq "") {$py = $maybepy[0]} print RIS "PY - $py\n"; print RIS "Y2 -\n"; #empty - Y2 is usually used for access dates (not supported in eSciDoc) for (my $zaehler = 0; $zaehler < @au;$zaehler++){ print RIS "AU - $au[$zaehler]\n"; } for (my $zaehler = 0; $zaehler < @maybeauthor;$zaehler++){ if ($au[0] eq "") {print RIS "AU - $maybeauthor[$zaehler]\n"} else {print RIS "ED - $maybeauthor[$zaehler]\n"} } for (my $zaehler = 0; $zaehler < @ed;$zaehler++){ print RIS "ED - $ed[$zaehler]\n"; } for (my $zaehler = 0; $zaehler < @a3;$zaehler++){ print RIS "A3 - $a3[$zaehler]\n"; } print RIS "TI - $ti\n"; print RIS "T2 - $t2\n"; print RIS "T3 - $t3\n"; print RIS "AD - "; for (my $zaehler = 0; $zaehler < @ad;$zaehler++){ if($zaehler>0){print RIS "; $ad[$zaehler]"} else{print RIS "$ad[$zaehler]"} } print RIS "\n"; print RIS "PB - $pb\n"; print RIS "CY - $cy\n"; print RIS "JO - $jo\n"; print RIS "VL - $vl\n"; print RIS "IS - $is\n"; print RIS "SP - $sp\n"; print RIS "EP - $ep\n"; print RIS "SN - $sn\n"; for (my $zaehler = 0; $zaehler < @kw;$zaehler++){ print RIS "KW - $kw[$zaehler]\n"; } print RIS "AV - $av\n"; print RIS "M1 - $m1\n"; print RIS "N1 - "; for (my $zaehler = 0; $zaehler < @n1;$zaehler++){ if($zaehler>0){print RIS "; $n1[$zaehler]"} else{print RIS "$n1[$zaehler]"} } print RIS "\n"; print RIS "N2 - $n2\n"; print RIS "ER -"; close (RIS);
Input file example[edit]
Example of an escidoc.txt input file that works with the above script:
Genre=Thesis Creator.CreatorRole=Author Creator.Person.CompleteName=Bowman,B.F Creator.Person.Organization.Name=Max-Planck-Institut für Biochemie Creator.Person.Organization.Address=Martinsried Creator.CreatorRole=Advisor Creator.Person.CompleteName=Ziegler,H Creator.Person.Title=Prof. Dr. Creator.CreatorRole=Advisor Creator.Person.CompleteName=Schleifer,K.H Creator.Person.Title=Prof. Dr. Creator.CreatorRole=Advisor Creator.Person.CompleteName=Hofschneider,P.H Creator.Person.Title=Prof. Dr. Dr. Title=Die Entwicklung und Verwendung eines zellfreien Systems aus Weizenkeimen zur Translation viraler RNA Language=de PublishingInfo.Place=Martinsried Date.Date=1978/12/18/ Date.DateType=submitted Date.Date=1979/01/10/ Date.DateType=accepted Date.Date=1979 Date.DateType=published in print ReviewMethod=internal Event.Name=Promotion TotalNumberOfPages=127 Degree=phd Abstract=In der vorliegenden Arbeit wird... Subject=Weizen;Translation;RNA TableOfContents=ZUSAMMENFASSUNG 1; Abkürzungen 2; A. Einleitung 4; ... Location=T-21