Merge pull request #120 from Ede123/raster_images

Fix embedding of raster images
This commit is contained in:
Eduard Braun 2016-09-23 23:23:21 +02:00 committed by GitHub
commit 91ee9d2112
5 changed files with 166 additions and 30 deletions

View file

@ -60,7 +60,7 @@ from collections import namedtuple
from decimal import Context, Decimal, InvalidOperation, getcontext
import six
from six.moves import range
from six.moves import range, urllib
from scour.svg_regex import svg_parser
from scour.svg_transform import svg_transform_parser
@ -2939,47 +2939,65 @@ def removeComments(element):
def embedRasters(element, options):
import base64
import urllib
"""
Converts raster references to inline images.
NOTE: there are size limits to base64-encoding handling in browsers
"""
"""
global _num_rasters_embedded
href = element.getAttributeNS(NS['XLINK'], 'href')
# if xlink:href is set, then grab the id
if href != '' and len(href) > 1:
# find if href value has filename ext
ext = os.path.splitext(os.path.basename(href))[1].lower()[1:]
# look for 'png', 'jpg', and 'gif' extensions
if ext == 'png' or ext == 'jpg' or ext == 'gif':
# only operate on files with 'png', 'jpg', and 'gif' file extensions
if ext in ['png', 'jpg', 'gif']:
# fix common issues with file paths
# TODO: should we warn the user instead of trying to correct those invalid URIs?
# convert backslashes to slashes
href_fixed = href.replace('\\', '/')
# absolute 'file:' URIs have to use three slashes (unless specifying a host which I've never seen)
href_fixed = re.sub('file:/+', 'file:///', href_fixed)
# file:// URLs denote files on the local system too
if href[:7] == 'file://':
href = href[7:]
# does the file exist?
if os.path.isfile(href):
# if this is not an absolute path, set path relative
# to script file based on input arg
infilename = '.'
# parse the URI to get scheme and path
# in principle it would make sense to work only with this ParseResult and call 'urlunparse()' in the end
# however 'urlunparse(urlparse(file:raster.png))' -> 'file:///raster.png' which is nonsense
parsed_href = urllib.parse.urlparse(href_fixed)
# assume locations without protocol point to local files (and should use the 'file:' protocol)
if parsed_href.scheme == '':
parsed_href = parsed_href._replace(scheme='file')
if href_fixed[0] == '/':
href_fixed = 'file://' + href_fixed
else:
href_fixed = 'file:' + href_fixed
# relative local paths are relative to the input file, therefore temporarily change the working dir
working_dir_old = None
if parsed_href.scheme == 'file' and parsed_href.path[0] != '/':
if options.infilename:
infilename = options.infilename
href = os.path.join(os.path.dirname(infilename), href)
working_dir_old = os.getcwd()
working_dir_new = os.path.abspath(os.path.dirname(options.infilename))
os.chdir(working_dir_new)
rasterdata = ''
# test if file exists locally
if os.path.isfile(href):
# open raster file as raw binary
raster = open(href, "rb")
rasterdata = raster.read()
elif href[:7] == 'http://':
webFile = urllib.urlopen(href)
rasterdata = webFile.read()
webFile.close()
# open/download the file
try:
file = urllib.request.urlopen(href_fixed)
rasterdata = file.read()
file.close()
except Exception as e:
print("WARNING: Could not open file '" + href + "' for embedding. "
"The raster image will be kept as a reference but might be invalid. "
"(Exception details: " + str(e) + ")", file=sys.stderr)
rasterdata = ''
finally:
# always restore initial working directory if we changed it above
if working_dir_old is not None:
os.chdir(working_dir_old)
# ... should we remove all images which don't resolve?
# TODO: should we remove all images which don't resolve?
# then we also have to consider unreachable remote locations (i.e. if there is no internet connection)
if rasterdata != '':
# base64-encode raster
b64eRaster = base64.b64encode(rasterdata)
@ -2991,7 +3009,8 @@ def embedRasters(element, options):
if ext == 'jpg':
ext = 'jpeg'
element.setAttributeNS(NS['XLINK'], 'href', 'data:image/' + ext + ';base64,' + b64eRaster)
element.setAttributeNS(NS['XLINK'], 'href',
'data:image/' + ext + ';base64,' + b64eRaster.decode())
_num_rasters_embedded += 1
del b64eRaster
@ -3500,10 +3519,17 @@ def scourString(in_string, options=None):
# input is a filename
# returns the minidom doc representation of the SVG
def scourXmlFile(filename, options=None):
# we need to set infilename (otherwise relative references in the SVG won't work)
if options is None:
options = generateDefaultOptions()
options.infilename = filename
# open the file and scour it
with open(filename, "rb") as f:
in_string = f.read()
out_string = scourString(in_string, options)
# prepare the output xml.dom.minidom object
doc = xml.dom.minidom.parseString(out_string.encode('utf-8'))
# since minidom does not seem to parse DTDs properly

View file

@ -2341,9 +2341,85 @@ class CommandLineUsage(unittest.TestCase):
"Statistics output not as expected when '--verbose' option was used")
class EmbedRasters(unittest.TestCase):
# quick way to ping a host using the OS 'ping' command and return the execution result
def _ping(host):
import os
import platform
system = platform.system().lower()
ping_count = '-n' if system == 'windows' else '-c'
dev_null = 'NUL' if system == 'windows' else '/dev/null'
return os.system('ping ' + ping_count + ' 1 ' + host + ' > ' + dev_null)
def test_disable_embed_rasters(self):
doc = scourXmlFile('unittests/raster-formats.svg',
parse_args(['--disable-embed-rasters']))
self.assertEqual(doc.getElementById('png').getAttribute('xlink:href'), 'raster.png',
"Raster image embedded when '--disable-embed-rasters' was specified")
def test_raster_formats(self):
doc = scourXmlFile('unittests/raster-formats.svg')
self.assertEqual(doc.getElementById('png').getAttribute('xlink:href'),
''
'VBMVEUAAP//AAAA/wBmtfVOAAAACklEQVQI12NIAAAAYgBhGxZhsAAAAABJRU5ErkJggg==',
"Raster image (PNG) not correctly embedded.")
self.assertEqual(doc.getElementById('gif').getAttribute('xlink:href'),
'',
"Raster image (GIF) not correctly embedded.")
self.assertEqual(doc.getElementById('jpg').getAttribute('xlink:href'),
''
'2wBDAAEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/'
'2wBDAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQEBAQH/'
'wAARCAABAAMDAREAAhEBAxEB/8QAFAABAAAAAAAAAAAAAAAAAAAACv/EABoQAAEFAQAAAAAAAAAAAAAAAAgABQc3d7j/'
'xAAVAQEBAAAAAAAAAAAAAAAAAAAHCv/EABwRAAEDBQAAAAAAAAAAAAAAAAgAB7gJODl2eP/aAAwDAQACEQMRAD8AMeaF'
'/u2aj5z1Fqp7oN4rxx2kn5cPuhV6LkzG7qOyYL2r/9k=',
"Raster image (JPG) not correctly embedded.")
def test_raster_paths_local(self):
doc = scourXmlFile('unittests/raster-paths-local.svg')
images = doc.getElementsByTagName('image')
for image in images:
href = image.getAttribute('xlink:href')
self.assertTrue(href.startswith('data:image/'),
"Raster image from local path '" + href + "' not embedded.")
def test_raster_paths_local_absolute(self):
with open('unittests/raster-formats.svg', 'r') as f:
svg = f.read()
# create a reference string by scouring the original file with relative links
options = ScourOptions
options.infilename = 'unittests/raster-formats.svg'
reference_svg = scourString(svg, options)
# this will not always create formally valid paths but it'll check how robust our implementation is
# (the third path is invalid for sure because file: needs three slashes according to URI spec)
svg = svg.replace('raster.png',
'/' + os.path.abspath(os.path.dirname(__file__)) + '\\unittests\\raster.png')
svg = svg.replace('raster.gif',
'file:///' + os.path.abspath(os.path.dirname(__file__)) + '/unittests/raster.gif')
svg = svg.replace('raster.jpg',
'file:/' + os.path.abspath(os.path.dirname(__file__)) + '/unittests/raster.jpg')
svg = scourString(svg)
self.assertEqual(svg, reference_svg,
"Raster images from absolute local paths not properly embedded.")
@unittest.skipIf(_ping('raw.githubusercontent.com') != 0, "Remote server not reachable.")
def test_raster_paths_remote(self):
doc = scourXmlFile('unittests/raster-paths-remote.svg')
images = doc.getElementsByTagName('image')
for image in images:
href = image.getAttribute('xlink:href')
self.assertTrue(href.startswith('data:image/'),
"Raster image from remote path '" + href + "' not embedded.")
# TODO: write tests for --enable-viewboxing
# TODO; write a test for embedding rasters
# TODO: write a test for --disable-embed-rasters
# TODO: write tests for --keep-editor-data
if __name__ == '__main__':

View file

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink= "http://www.w3.org/1999/xlink" width="170" height="210">
<text x="10" y="20">Three different formats</text>
<image id="png" x="10" y="30" width="150" height="50" xlink:href="raster.png"/>
<image id="gif" x="10" y="90" width="150" height="50" xlink:href="raster.gif"/>
<image id="jpg" x="10" y="150" width="150" height="50" xlink:href="raster.jpg"/>
</svg>

After

Width:  |  Height:  |  Size: 455 B

View file

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink= "http://www.w3.org/1999/xlink" width="330" height="270">
<g>
<text x="10" y="20">Local files</text>
<image x="10" y="30" width="150" height="50" xlink:href="raster.png"/>
<image x="10" y="90" width="150" height="50" xlink:href="./raster.png"/>
<image x="10" y="150" width="150" height="50" xlink:href="../unittests/raster.png"/>
<!-- path can also be absolute but this will obviously not work across systems -->
<!--<image x="10" y="210" width="150" height="50" xlink:href="/E:/Temp/Scour/scour.git/unittests/raster.png"/>-->
</g>
<g transform="translate(160)">
<text x="10" y="20">Local files (file: protocol)</text>
<image x="10" y="30" width="150" height="50" xlink:href="file:raster.png"/>
<image x="10" y="90" width="150" height="50" xlink:href="file:./raster.png"/>
<image x="10" y="150" width="150" height="50" xlink:href="file:../unittests/raster.png"/>
<!-- path can also be absolute but this will obviously not work across systems -->
<!--<image x="10" y="210" width="150" height="50" xlink:href="file:///E:/Temp/Scour/scour.git/unittests/raster.png"/>-->
</g>
</svg>

After

Width:  |  Height:  |  Size: 1.2 KiB

View file

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink= "http://www.w3.org/1999/xlink" width="170" height="270">
<g>
<text x="10" y="20">Files from internet</text>
<image x="10" y="30" width="150" height="50" xlink:href="http://raw.githubusercontent.com/scour-project/scour/master/unittests/raster.png"/>
<image x="10" y="90" width="150" height="50" xlink:href="https://raw.githubusercontent.com/scour-project/scour/master/unittests/raster.png"/>
</g>
</svg>

After

Width:  |  Height:  |  Size: 502 B