Difference between revisions of "Ris2escidoc.pl"

From MPDLMediaWiki
Jump to navigation Jump to search
(New page: <pre><nowiki> open (RIS,"ris.ris"); open (ESD,"> escidoc.txt"); while(defined(my $ris=<RIS>)) { $zeile++; chomp($ris); if(substr($ris,0,2) ne "TY" and $zeile==1){print "Keine gueltige R...)
 
m
 
Line 205: Line 205:
close (ESD);
close (ESD);
</nowiki></pre>
</nowiki></pre>
[[Category:eSciDoc]]

Latest revision as of 13:46, 5 January 2011

open (RIS,"ris.ris");
open (ESD,"> escidoc.txt");
while(defined(my $ris=<RIS>)) {
	$zeile++;
	chomp($ris);
	if(substr($ris,0,2) ne "TY" and $zeile==1){print "Keine gueltige RIS-Datei.\n"}
	elsif(substr($ris,0,2) eq "TY"){#The original TY data gets lost.
		if(substr($ris,6,4) eq "ABST"){$genre="Other"}
		elsif(substr($ris,6,4) eq "ADVS"){$genre="Other"}
		elsif(substr($ris,6,3) eq "ART"){$genre="Other"}
		elsif(substr($ris,6,3) eq "BIL"){$genre="Other"}
		elsif(substr($ris,6,3) eq "BOO"){$genre="Book"}
		elsif(substr($ris,6,3) eq "CAS"){$genre="Other"}
		elsif(substr($ris,6,3) eq "CHA"){
			$genre="Book Item";
			$sourcegenre="Book";
		}
		elsif(substr($ris,6,3) eq "COM"){$genre="Other"}
		elsif(substr($ris,6,3) eq "CON"){$genre="Proceedings"}
		elsif(substr($ris,6,3) eq "CTL"){$genre="Other"}
		elsif(substr($ris,6,3) eq "DAT"){$genre="Other"}
		elsif(substr($ris,6,3) eq "ELE"){$genre="Other"}
		elsif(substr($ris,6,3) eq "GEN"){$genre="Other"}
		elsif(substr($ris,6,3) eq "HEA"){$genre="Other"}
		elsif(substr($ris,6,3) eq "ICO"){$genre="Other"}
		elsif(substr($ris,6,3) eq "INP"){$genre="Other"}
		elsif(substr($ris,6,3) eq "JFU"){$genre="Journal"}
		elsif(substr($ris,6,3) eq "JOU"){
			$genre="Article";
			$sourcegenre="Journal";
		}
		elsif(substr($ris,6,3) eq "MAP"){$genre="Other"}
		elsif(substr($ris,6,3) eq "MGZ"){
			$genre="Article";
			$sourcegenre="Series";
		}
		elsif(substr($ris,6,3) eq "MPC"){$genre="Other"}
		elsif(substr($ris,6,3) eq "MUS"){$genre="Other"}
		elsif(substr($ris,6,3) eq "NEW"){
			$genre="Article";
			$sourcegenre="Series";
		}
		elsif(substr($ris,6,3) eq "PAM"){$genre="Other"}
		elsif(substr($ris,6,3) eq "PAT"){$genre="Other"}
		elsif(substr($ris,6,3) eq "PCO"){$genre="Other"}
		elsif(substr($ris,6,3) eq "RPR"){$genre="Report"}
		elsif(substr($ris,6,3) eq "SER"){$genre="Series"}
		elsif(substr($ris,6,3) eq "SLI"){$genre="Other"}
		elsif(substr($ris,6,3) eq "SOU"){$genre="Other"}
		elsif(substr($ris,6,3) eq "STA"){$genre="Other"}
		elsif(substr($ris,6,3) eq "THE"){$genre="Thesis"}
		elsif(substr($ris,6,3) eq "UNB"){$genre="Other"}
		elsif(substr($ris,6,3) eq "UNP"){$genre="Other"}
		elsif(substr($ris,6,3) eq "VID"){$genre="Other"}
		else {print "Keine gueltige RIS-Datei.\n"}
	}
	elsif(substr($ris,0,2) eq "ER"){$recordnumber++}
	elsif(substr($ris,0,2) eq "ID"){
		push (@idtype,"Other");#to do: syntax recognition to specify idType; default = Other
		push (@id,substr($ris,6));
	}
	elsif(substr($ris,0,2) eq "T1"){$title=substr($ris,6)}
	elsif(substr($ris,0,2) eq "TI"){$title=substr($ris,6)}
	elsif(substr($ris,0,2) eq "CT"){$title=substr($ris,6)}
	elsif(substr($ris,0,2) eq "BT" and $genre =~ /Book$|Proceedings|Thesis|Journal|Series|Other/){$title=substr($ris,6)}
	elsif(substr($ris,0,2) eq "BT" and $genre =~ /Article|Book Item|Conference Paper|Talk at event|Conference Report|Poster|Courseware\/Lecture|^Paper|Issue|Manuscript/){$sourcetitle=substr($ris,6)}
	elsif(substr($ris,0,2) eq "T2"){$alternativetitle=substr($ris,6)}
	elsif(substr($ris,0,2) eq "T3"){$sourcetitle=substr($ris,6)}
	elsif(substr($ris,0,2) =~ /AU|A1/){
		push(@creatornames, substr($ris,6));
		push(@creatorroles, "Author");
	}
	elsif(substr($ris,0,2) =~ /A2|ED/){
		push(@creatornames, substr($ris,6));
		push(@creatorroles, "Contributor");
	}
	elsif(substr($ris,0,2) eq "A3"){
		push(@sourcecreatornames, substr($ris,6));
		push(@sourcecreatorroles, "Author");
	}
	elsif(substr($ris,0,2) =~ /Y1|PY/){$date=substr($ris,6)}#needs to be converted into correct format
	elsif(substr($ris,0,2) eq "Y2" and $date eq ""){$date=substr($ris,6)}#needs to be converted into correct format
	elsif(substr($ris,0,2) =~ /N1/){}#Notes. Not supported in this mapping
	elsif(substr($ris,0,2) eq "AB"){$abstract=substr($ris,6)}#AB is not officially used for abstracts, but some retrieval systems do so. If there's N2 data, Abstract gets overwritten (see next line)
	elsif(substr($ris,0,2) eq "N2"){$abstract=substr($ris,6)}
	elsif(substr($ris,0,2) eq "KW"){push (@subject, substr($ris,6))}
	elsif(substr($ris,0,2) eq "RP"){}#not supported in this mapping
	elsif(substr($ris,0,2) =~ /JF|JO/){
		$sourcegenre="Journal";
		if ($sourcetitle eq "") {$sourcetitle=substr($ris,6)}
		else {push (@sourcealternativetitle, substr($ris,6))}
	}
	elsif(substr($ris,0,2) eq "JA"){push (@sourcealternativetitle,substr($ris,6))}
	elsif(substr($ris,0,2) =~ /J1|J2/) {push (@sourcealternativetitle,substr($ris,6))}
	elsif(substr($ris,0,2) eq "VL" and $genre ne "Book") {$sourcevolume=substr($ris,6)}
	elsif(substr($ris,0,2) eq "VL" and $genre eq "Book") {$publishinginfoedition=substr($ris,6)}
	elsif(substr($ris,0,2) eq "ET" and $genre =~ /Book$|Thesis|Proceedings|^Report/) {$sourcepublishinginfoedition=substr($ris,6)}
	elsif(substr($ris,0,2) eq "ET") {$publishinginfoedition=substr($ris,6)}
	elsif(substr($ris,0,2) =~ /IS|CP/){$sourceissue=substr($ris,6)}
	elsif(substr($ris,0,2) eq "SP"){
		$sourcestartpage=substr($ris,6);
		$totalnumberofpages=substr($ris,6);
	}
	elsif(substr($ris,0,2) eq "EP"){
		$sourceendpage=substr($ris,6);
	}
	elsif(substr($ris,0,2) eq "CY" and $genre =~ /Article|^Paper|Issue|Other|Conference Paper|Book Item/){$sourcepublishinginfoplace=substr($ris,6)}
	elsif(substr($ris,0,2) eq "CY"){$publishinginfoplace=substr($ris,6)}
	elsif(substr($ris,0,2) eq "PB" and $genre =~ /Article|^Paper|Issue|Other|Conference Paper|Book Item/){$sourcepublishinginfopublisher=substr($ris,6)}
	elsif(substr($ris,0,2) eq "PB"){$publishinginfopublisher=substr($ris,6)}

	#The SN fields probably needs to be scanned to determine its format (ISSN vs. ISBN)
	elsif(substr($ris,0,2) eq "SN" and $genre =~ /Journal|Series/){
		push (@id,substr($ris,6));
		push (@idtype,"ISSN");
	}
	elsif(substr($ris,0,2) eq "SN" and $genre =~ /Book$|Thesis|Proceedings|^Report/){
		push (@id,substr($ris,6));
		push (@idtype,"ISBN");
	}
	elsif(substr($ris,0,2) eq "SN" and $genre =~ /Article|^Paper|Issue|Other/){
		push (@sourceid,substr($ris,6));
		push (@sourceidtype,"ISSN");
	}
	elsif(substr($ris,0,2) eq "SN" and $genre =~ /Conference Paper|Book Item/){
		push (@sourceid,substr($ris,6));
		push (@sourceidtype,"ISBN");
	}
	elsif(substr($ris,0,2) eq "AD"){push (@creatororganizationaddress, substr($ris,6))}
	elsif(substr($ris,0,2) eq "AV"){push (@location, "Availability: ".substr($ris,6))}
	elsif(substr($ris,0,2) =~ /M1|M2|M3|U1|U2|U3|U4|U5/){}#Miscellaneous and Custom fields. Not supported in this mapping
	elsif(substr($ris,0,2) eq "UR"){
		push (@id, substr($ris,6));
		push (@idtype, "URI");
	}
	elsif(substr($ris,0,2) =~ /L1|L2/){
		#check for previous URI-type Ids needed
		push (@id, substr($ris,6));
		push (@idtype, "URI");
	}
	elsif(substr($ris,0,2) eq "L3"){}#Related Records. Not supported in this mapping
	elsif(substr($ris,0,2) eq "L4"){}#Image. Not supported in this mapping
}
close (RIS);

if ($sourcetitle eq "" and $totalnumberofpages ne ""){$sourcestartpage=""}
if ($sourceendpage ne ""){$totalnumberofpages=""}

print ESD "Genre=".$genre."\n";
for (my $zaehler = 0; $zaehler < @creatorroles;$zaehler++){
	print ESD "Creator.CreatorRole=$creatorroles[$zaehler]\nCreator.Person.CompleteName=$creatornames[$zaehler]\n"; #Organization not supported in this mapping
}
for (my $zaehler = 0; $zaehler < @creatororganizationaddress;$zaehler++){
	print ESD "Creator.Person.Organization.Address=$creatororganizationaddress[$zaehler]\n"; #Organization not supported in this mapping
}
print ESD "Title=$title\n";
print ESD "Language=\n"; #empty
print ESD "AlternativeTitle=$alternativetitle\n";
for (my $zaehler = 0; $zaehler < @idtype;$zaehler++){
	print ESD "Identifier.IdType=$idtype[$zaehler]\nIdentifier.Id=$id[$zaehler]\n";
}
print ESD "PublishingInfo.Publisher=$publishinginfopublisher\n";
print ESD "PublishingInfo.Place=$publishinginfoplace\n";
print ESD "PublishingInfo.Edition=$publishinginfoedition\n";
if ($sourcetitle ne "" and $sourcegenre eq "") {$sourcegenre="Series"};
print ESD "Date.Date=$date\n";
print ESD "Date.DateType=\n"; #empty
print ESD "ReviewMethod=\n"; #empty
print ESD "Source.Genre=$sourcegenre\n";
print ESD "Source.Title=$sourcetitle\n";
for (my $zaehler = 0; $zaehler < @sourcealternativetitle;$zaehler++){
	print ESD "Source.AlternativeTitle=$sourcealternativetitle[$zaehler]\n";
}
for (my $zaehler = 0; $zaehler < @sourcecreatorroles;$zaehler++){
	print ESD "Source.Creator.Person.CompleteName=$sourcecreatornames[$zaehler]\nSource.Creator.CreatorRole=$sourcecreatorroles[$zaehler]\n";
}
print ESD "Source.Volume=$sourcevolume\n";
print ESD "Source.Issue=$sourceissue\n";
print ESD "Source.StartPage=$sourcestartpage\n";
print ESD "Source.EndPage=$sourceendpage\n";
print ESD "Source.SequenceNumber=\n";#empty
print ESD "Source.PublishingInfo.Place=$sourcepublishinginfoplace\n";
print ESD "Source.PublishingInfo.Publisher=$sourcepublishinginfopublisher\n";
print ESD "Source.PublishingInfo.Edition=$sourcepublishinginfoedition\n";
for (my $zaehler = 0; $zaehler < @sourceidtype;$zaehler++){
	print ESD "Source.Identifier.IdType=$sourceidtype[$zaehler]\nSource.Identifier.Id=$sourceid[$zaehler]\n";
}
print ESD "Event=\n";#empty
print ESD "TotalNumberOfPages=$totalnumberofpages\n";
print ESD "Degree=\n";#empty
print ESD "Abstract=$abstract\n";
print ESD "Subject=";
for (my $zaehler = 0; $zaehler < @subject;$zaehler++){
	if ($zaehler==0){print ESD "$subject[$zaehler]"}
	else {print ESD ";$subject[$zaehler]"}
}
print ESD "\n";
print ESD "TableOfContents=\n";#empty
print ESD "Location=";
for (my $zaehler = 0; $zaehler < @location;$zaehler++){
	if ($zaehler==0){print ESD "$location[$zaehler]"}
	else {print ESD "; $location[$zaehler]"}
}
close (ESD);