<sourcesyntaxhighlight lang="perl">
#!/usr/bin/perl -i.old
#Modify svg file from BKchem to work with librsvg, for use on Wikimedia.
use Image::Magick;
# original source: https://en.wikipedia.org/wiki/Wikipedia:WikiProject_Chemistry/Structure_drawing_workgroup/Mysid%27s_script
#Does the input file exist?
if(! -f $ARGV[0])
{print "$ARGV[0]: File not found\n";
exit};
#Open files. TODO: check that output file is writeable.
open INPUT,"<$ARGV[0]";
open OUTPUT,">$ARGV[0]_tmp.svg";
# Note: "-i.old" means original file will be renamed to <filename>.old
#initialize variables for minimum an maximum image size.
# Modify svg file from BKchem to work with librsvg, for use on Wikimedia.
$miny=-1;
$maxy=0;
use warnings;
$minx=-1;
use strict;
$maxx=0;
while (<INPUT>) {
my $font_size = 24;
#I must skip over "defs" blocks, because they have objects with relative
#dimensions. TODO:This is nasty code. I'm sure there's a better way to do
while (<>) {
#this.
if (/defs/) {
print OUTPUT $_;
my $line = $_;
while (!($line =~ /\/defs/)){
$line = <INPUT>;
print OUTPUT $line;
}
next;
}
#Sans is the most general font definition we can use, and librsvg chokes
#on Helvetica.
s/helvetica/Sans/gi;
if(m#<text[^>]+font-size="([0-9]+)pt"#){
#Figure out max and min coordinates for later.
$font_size = $1;
if (!/viewBox/ && /\d/) {
#We're already matching digits, might as well round the numbers here.
s/(\d+\.\d+)/sprintf("%.1f", $&)/ge unless(/version/);
#and fix font sizes
#s/(\d+)pt/$1*4 ."pt"/ge; # commented out, doesn't seem to be needed with "sans".
s/75\%/9pt/g;
#now let's find the approximate max and min coords, so the crop algorithm
#runs faster:
#Match "x1=" and "y1=", but don't match on x radius and y radius.
while(/[^r]([xy])[12]{0,1}="(\d+)/g) {
if($1 eq "y"){
$maxy = $2 if($2>$maxy);
$miny = $2 if(($2<$miny) || ($miny<0));
}
if($1 eq "x"){
$maxx = $2 if($2>$maxx);
$minx = $2 if(($2<$minx) || ($minx<0));
}
}
#Match on comma-separated coordinates
while(/(\d+),(\d+)/g) {
$maxy = $2 if($2>$maxy);
$miny = $2 if(($2<$miny) || ($miny<0));
$maxx = $1 if($1>$maxx);
$minx = $1 if(($1<$minx) || ($minx<0));
}
#Match on space-separated coordinates, but not when preceded
#by a comma.
while(/[^\d,.]([\d.]+)\s+([\d.]+)/g){
$maxy = $2 if($2>$maxy);
$miny = $2 if(($2<$miny) || ($miny<0));
$maxx = $1 if($1>$maxx);
$minx = $1 if(($1<$minx) || ($minx<0));
}
}
# Convert relative font sizes to absolute font sizes
#Put in prelim. coordinates.
s#(font-size=")([0-9]+)%(")#$1.($2*$font_size/100)."pt".$3#ge;
if(/<svg .*viewBox="0 0 (\d+) (\d+)"/) {
# Replace baseline-shift="super" with a numeric baseline-shift
$_ = "<svg width=\"".$1."px\" height=\"".$2."px\" version=\"1.0\" viewBox=\"0 0 $1 $2\" xmlns=\"http://www.w3.org/2000/svg\">\n";
if (/y="([\d\.]+)">.*<tspan baseline-shift="super"/) {
my $vy = $1;
my $oy = $1-4;
s/baseline-shift="super"/y="$oy"/g;
s#</tspan>([^<]+)<#</tspan><tspan y="$vy">$1</tspan><#g;
}
# Replace baseline-shift="sub" with a numeric baseline-shift
if (/y="([\d\.]+)">.*<tspan baseline-shift="sub"/) {
my $vy = $1;
my $oy = $1+3.25;
s/baseline-shift="sub"/y="$oy"/g;
s/#<\/tspan>([^<]+)</#<\/tspan><tspan y="$vy">$1<\/tspan></#g;
}
#write each line out after mangling it.
print OUTPUT $_;
}
#close the files (done with the original input file, output file becomes
#input for next pass.
close OUTPUT;
close INPUT;
#If you have a file called tmp.png, now's the time to say goodbye.
unlink("tmp.png");
print STDERR "bitmap...\n";
#use librsvg, because that's what Wikimedia uses.
system("rsvg-convert -o tmp.png $ARGV[0]_tmp.svg");
$img = Image::Magick->new;
$x = $img->ReadImage("tmp.png");
warn "$x" if "$x";
$he = $img->Get('height');
$wi = $img->Get('width');
#Blur the image so that we don't miss the edge later on, and to make a nice
#border
$x = $img->Blur(geometry=>'5.0x5.0', sigma=>5.0, radius=>5.0, channel=>"All");
print "$x\n";
$img->Write(filename=>'test.png');
print STDERR "crop...\n";
$sumpix=0;
#read the file one pixel row at a time, always starting at the edge value
#detected, going outwards until we hit a completely empty line (pure
#transparent pixels) This seems to work for all BKchem files, but would of
#course fail for general SVG files.
for ($y=$maxy; $y<$he; $y++) {
@row = $img->GetPixels(width=>$wi, height=>1, x=>1, y=>$y, map=>'RGBA');
foreach(@row){$sumpix+=$_;}
if (!$sumpix) {
$maxy=$y;
last;
}
#Have to reset the sum each time, because we are reading non-zero data until we
#hit the edge.
else {
$sumpix=0;
}
}
$sumpix=0;
for ($y=$miny; $y>0; $y--) {
@row = $img->GetPixels(width=>$wi, height=>1, x=>1, y=>$y, map=>'RGBA');
foreach(@row){$sumpix+=$_;}
if (!$sumpix) {
$miny=$y;
last;
}
else {
$sumpix=0;
}
}
$sumpix=0;
for ($x=$minx; $x>0; $x--) {
@col = $img->GetPixels(width=>1, height=>$he, x=>$x, y=>1, map=>'RGBA');
foreach(@col){$sumpix+=$_;}
if (!$sumpix) {
$minx=$x;
last;
}
else {
$sumpix=0;
}
}
$sumpix=0;
for ($x=$maxx; $x<$wi; $x++) {
@col = $img->GetPixels(width=>1, height=>$he, x=>$x, y=>1, map=>'RGBA');
foreach(@col){$sumpix+=$_;}
if (!$sumpix) {
$maxx=$x;
last;
}
else {
$sumpix=0;
}
}
$w=($maxx-$minx);
$h=($maxy-$miny);
$outfilename = $ARGV[0];
$outfilename =~ s/.svg/2.svg/;
open OUTPUT,">$outfilename";
open INPUT,"<$ARGV[0]_tmp.svg";
#$firstgroupflag=0;
while (<INPUT>) {
if(/<svg .*viewBox="0 0 \d+ \d+"/) {
$_ = "<svg width=\"".$w."\" height=\"".$h."\" version=\"1.0\" viewBox=\"0 0 $w $h\"
xmlns=\"http://www.w3.org/2000/svg\">\n";
}
#Have to move the diagram into the view box. A simple translation works
#well. There is a group which encompasses all the tags, so put it there.
if (!$firstgroupflag && /<g/) {
s/<g/<g transform=\"translate(-$minx,-$miny)\"/;
#Only do it once!
$firstgroupflag = 1 ;
}
#Take out redundant groups.
if (/<\/g>/) {
my $line = $_;
$nextline = <INPUT>;
if (!($nextline =~ /<g stroke="\#000000" stroke-width="[\d.]+">\s/ )) {
print OUTPUT $line;
print OUTPUT $nextline;
}
next;
}
print OUTPUT $_;
}
</syntaxhighlight>
#clean up
close OUTPUT;
unlink($ARGV[0]."_tmp.svg");
print "$outfilename written\n";
</source>
|