Wikipedia:WikiProject Chemistry/Structure drawing workgroup/Mysid's script: Difference between revisions

Content deleted Content added
Remove cropping code (better to do it in BKchem)
Line 1:
<source lang="perl">
#!/usr/bin/perl
#Modify svg file from BKchem to work with librsvg, for use on Wikimedia.
use Image::Magick;
Line 9 ⟶ 10:
#Open files. TODO: check that output file is writeable.
open INPUT,"<$ARGV[0]";
open$outfile1 = OUTPUT,">$ARGV[0]_tmp.svg";
$outfile1 =~ s/.svg/1.svg/;
open OUTPUT,">$outfile1";
 
#initialize variables for minimum an maximum image size.
$miny=-1;
$maxy=0;
$minx=-1;
$maxx=0;
while (<INPUT>) {
#I must skip over "defs" blocks, because they have objects with relative
Line 32 ⟶ 30:
#on Helvetica.
s/helvetica/Sans/gi;
#Figure out max and min coordinates for later.
if (!/viewBox/ && /\d/) {
#We're already matching digits, might as well roundRound the numbers here.
s/(\d+\.\d+)/sprintf("%.1f", $&)/ge unless(/version/);
#and fix font sizes for super- and subscripts
#s/(\d+)pt/$1*4 ."pt"/ge; # commented out, doesn't seem to be needed with "sans".
s/75\%/9pt/g;
#now let's find the approximate max and min coords, so the crop algorithm
#runs faster:
#Match "x1=" and "y1=", but don't match on x radius and y radius.
while(/[^r]([xy])[12]{0,1}="(\d+)/g) {
if($1 eq "y"){
$maxy = $2 if($2>$maxy);
$miny = $2 if(($2<$miny) || ($miny<0));
}
if($1 eq "x"){
$maxx = $2 if($2>$maxx);
$minx = $2 if(($2<$minx) || ($minx<0));
}
}
#Match on comma-separated coordinates
while(/(\d+),(\d+)/g) {
$maxy = $2 if($2>$maxy);
$miny = $2 if(($2<$miny) || ($miny<0));
$maxx = $1 if($1>$maxx);
$minx = $1 if(($1<$minx) || ($minx<0));
}
#Match on space-separated coordinates, but not when preceded
#by a comma.
while(/[^\d,.]([\d.]+)\s+([\d.]+)/g){
$maxy = $2 if($2>$maxy);
$miny = $2 if(($2<$miny) || ($miny<0));
$maxx = $1 if($1>$maxx);
$minx = $1 if(($1<$minx) || ($minx<0));
}
}
#Put in prelim. coordinates.
if(/<svg .*viewBox="0 0 (\d+) (\d+)"/) {
$_ = "<svg width=\"".$1."px\" height=\"".$2."px\" version=\"1.0\" viewBox=\"0 0 $1 $2\" xmlns=\"http://www.w3.org/2000/svg\">\n";
}
# Replace baseline-shift="super" with a numeric baseline-shift
if (/y="([\d\.]+)">.*<tspan baseline-shift="super"/) {
Line 94 ⟶ 58:
close INPUT;
 
#If you have a file called tmp.png, now's the time to say goodbye.
unlink("tmp.png");
print STDERR "bitmap...\n";
#use librsvg, because that's what Wikimedia uses.
system("rsvg-convert -o tmp.png $ARGV[0]_tmp.svg");
 
open INPUT,"<$outfile1";
$img = Image::Magick->new;
$outfile2 = $ARGV[0];
$x = $img->ReadImage("tmp.png");
$outfile2 =~ s/.svg/2.svg/;
warn "$x" if "$x";
open OUTPUT,">$outfile2";
$he = $img->Get('height');
$wi = $img->Get('width');
#Blur the image so that we don't miss the edge later on, and to make a nice
#border
$x = $img->Blur(geometry=>'5.0x5.0', sigma=>5.0, radius=>5.0, channel=>"All");
print "$x\n";
$img->Write(filename=>'test.png');
 
 
print STDERR "crop...\n";
 
$sumpix=0;
#read the file one pixel row at a time, always starting at the edge value
#detected, going outwards until we hit a completely empty line (pure
#transparent pixels) This seems to work for all BKchem files, but would of
#course fail for general SVG files.
for ($y=$maxy; $y<$he; $y++) {
@row = $img->GetPixels(width=>$wi, height=>1, x=>1, y=>$y, map=>'RGBA');
foreach(@row){$sumpix+=$_;}
if (!$sumpix) {
$maxy=$y;
last;
}
#Have to reset the sum each time, because we are reading non-zero data until we
#hit the edge.
else {
$sumpix=0;
}
}
$sumpix=0;
for ($y=$miny; $y>0; $y--) {
@row = $img->GetPixels(width=>$wi, height=>1, x=>1, y=>$y, map=>'RGBA');
foreach(@row){$sumpix+=$_;}
if (!$sumpix) {
$miny=$y;
last;
}
else {
$sumpix=0;
}
}
$sumpix=0;
for ($x=$minx; $x>0; $x--) {
@col = $img->GetPixels(width=>1, height=>$he, x=>$x, y=>1, map=>'RGBA');
foreach(@col){$sumpix+=$_;}
if (!$sumpix) {
$minx=$x;
last;
}
else {
$sumpix=0;
}
}
$sumpix=0;
for ($x=$maxx; $x<$wi; $x++) {
@col = $img->GetPixels(width=>1, height=>$he, x=>$x, y=>1, map=>'RGBA');
foreach(@col){$sumpix+=$_;}
if (!$sumpix) {
$maxx=$x;
last;
}
else {
$sumpix=0;
}
}
 
$w=($maxx-$minx);
$h=($maxy-$miny);
 
$outfilename = $ARGV[0];
$outfilename =~ s/.svg/2.svg/;
open OUTPUT,">$outfilename";
 
open INPUT,"<$ARGV[0]_tmp.svg";
#$firstgroupflag=0;
while (<INPUT>) {
if(/<svg .*viewBox="0 0 \d+ \d+"/) {
$_ = "<svg width=\"".$w."\" height=\"".$h."\" version=\"1.0\" viewBox=\"0 0 $w $h\"
xmlns=\"http://www.w3.org/2000/svg\">\n";
}
#Have to move the diagram into the view box. A simple translation works
#well. There is a group which encompasses all the tags, so put it there.
if (!$firstgroupflag && /<g/) {
s/<g/<g transform=\"translate(-$minx,-$miny)\"/;
#Only do it once!
$firstgroupflag = 1 ;
}
#Take out redundant groups.
if (/<\/g>/) {
Line 198 ⟶ 72:
print OUTPUT $nextline;
}
} else next;{
print OUTPUT $_;}
print OUTPUT $_;
}
#clean up
close OUTPUT;
close INPUT;
unlink($ARGV[0]."_tmp.svg");
unlink("$outfile1");
print "$outfilename written\n";
print "$outfile2 written\n";
</source>