Fixed scour to handle entities in url references

This commit is contained in:
JSCHILL1 2009-05-17 23:07:22 -05:00
parent a03439573e
commit 9375bd69a9
9 changed files with 1973 additions and 13 deletions

View file

@ -2,6 +2,7 @@
mkdir $1 mkdir $1
for FILE in `ls fulltests` for FILE in `ls fulltests`
do do
echo Doing $FILE:
./scour.py -i fulltests/$FILE -o $1/$FILE >> $1/report.txt ./scour.py -i fulltests/$FILE -o $1/$FILE >> $1/report.txt
done done

1100
fulltests/Web20Map.svg Normal file

File diff suppressed because it is too large Load diff

After

Width:  |  Height:  |  Size: 57 KiB

555
fulltests/poster3.svg Normal file

File diff suppressed because one or more lines are too long

After

Width:  |  Height:  |  Size: 277 KiB

View file

@ -1,5 +1,5 @@
#!/bin/bash #!/bin/bash
SCOURVER="0.12" SCOURVER="0.13"
cd .. cd ..
tar cvf scour/tarballs/scour-$SCOURVER.tar scour/scour.py scour/svg_regex.py scour/LICENSE scour/NOTICE scour/README.txt scour/release-notes.html tar cvf scour/tarballs/scour-$SCOURVER.tar scour/scour.py scour/svg_regex.py scour/LICENSE scour/NOTICE scour/README.txt scour/release-notes.html
gzip scour/tarballs/scour-$SCOURVER.tar gzip scour/tarballs/scour-$SCOURVER.tar

View file

@ -9,6 +9,16 @@
<p>Copyright 2009, Jeff Schiller</p> <p>Copyright 2009, Jeff Schiller</p>
<section id="0.13">
<header>
<h2><a href="#0.13">Version 0.13</a></h2>
</header>
<ul>
<li>properly deal with fill="url(&amp;quot;#foo&amp;quot;)"</li>
</ul>
</section>
<section id="0.12"> <section id="0.12">
<header> <header>
<h2><a href="#0.12">Version 0.12</a></h2> <h2><a href="#0.12">Version 0.12</a></h2>

View file

@ -41,6 +41,7 @@
# * Put id attributes first in the serialization (or make the d attribute last) # * Put id attributes first in the serialization (or make the d attribute last)
# Next Up: # Next Up:
# + recognize that fill="url(&quot;#grd1&quot;)" is legal and do not remove grd1 gradient
# - prevent elements from being stripped if they are referenced in a <style> element # - prevent elements from being stripped if they are referenced in a <style> element
# (for instance, filter, marker, pattern) - need a crude CSS parser # (for instance, filter, marker, pattern) - need a crude CSS parser
# - Remove any unused glyphs from font elements? # - Remove any unused glyphs from font elements?
@ -65,7 +66,7 @@ import gzip
getcontext().prec = 6 getcontext().prec = 6
APP = 'scour' APP = 'scour'
VER = '0.12' VER = '0.13'
COPYRIGHT = 'Copyright Jeff Schiller, 2009' COPYRIGHT = 'Copyright Jeff Schiller, 2009'
NS = { 'SVG': 'http://www.w3.org/2000/svg', NS = { 'SVG': 'http://www.w3.org/2000/svg',
@ -403,13 +404,29 @@ def findReferencedElements(node,ids={}):
if len(propval) == 2 : if len(propval) == 2 :
prop = propval[0].strip() prop = propval[0].strip()
val = propval[1].strip() val = propval[1].strip()
if prop in referencingProps and val != '' and val[0:5] == 'url(#' : if prop in referencingProps and val != '' :
if len(val) >= 7 and val[0:5] == 'url(#' :
id = val[5:val.find(')')] id = val[5:val.find(')')]
if ids.has_key(id) : if ids.has_key(id) :
ids[id][0] += 1 ids[id][0] += 1
ids[id][1].append(node) ids[id][1].append(node)
else: else:
ids[id] = [1,[node]] ids[id] = [1,[node]]
# if the url has a quote in it, we need to compensate
elif len(val) >= 8 :
id = None
# double-quote
if val[0:6] == 'url("#' :
id = val[6:val.find('")')]
# single-quote
elif val[0:6] == "url('#" :
id = val[6:val.find("')")]
if id != None:
if ids.has_key(id) :
ids[id][0] += 1
ids[id][1].append(node)
else:
ids[id] = [1,[node]]
if node.hasChildNodes() : if node.hasChildNodes() :
for child in node.childNodes: for child in node.childNodes:
@ -823,7 +840,7 @@ def convertColor(value):
if len(b) == 1: b='0'+b if len(b) == 1: b='0'+b
s = '#'+r+g+b s = '#'+r+g+b
if s[0] == '#' and s[1]==s[2] and s[3]==s[4] and s[5]==s[6]: if s[0] == '#' and len(s)==7 and s[1]==s[2] and s[3]==s[4] and s[5]==s[6]:
s = s.upper() s = s.upper()
s = '#'+s[1]+s[3]+s[5] s = '#'+s[1]+s[3]+s[5]
@ -1397,9 +1414,9 @@ if __name__ == '__main__':
if inputfilename != '': if inputfilename != '':
print ' File:', inputfilename print ' File:', inputfilename
print ' Time taken:', str(endTimes[0]-startTimes[0]) + 's' print ' Time taken:', str(endTimes[0]-startTimes[0]) + 's'
print ' Number of unreferenced id attributes removed:', numIDsRemoved
print ' Number of elements removed:', numElemsRemoved print ' Number of elements removed:', numElemsRemoved
print ' Number of attributes removed:', numAttrsRemoved print ' Number of attributes removed:', numAttrsRemoved
print ' Number of unreferenced id attributes removed:', numIDsRemoved
print ' Number of style properties fixed:', numStylePropsFixed print ' Number of style properties fixed:', numStylePropsFixed
print ' Number of raster images embedded inline:', numRastersEmbedded print ' Number of raster images embedded inline:', numRastersEmbedded
print ' Number of path segments reduced/removed:', numPathSegmentsReduced print ' Number of path segments reduced/removed:', numPathSegmentsReduced
@ -1407,7 +1424,7 @@ if __name__ == '__main__':
print ' Number of bytes saved in colors:', numBytesSavedInColors print ' Number of bytes saved in colors:', numBytesSavedInColors
oldsize = os.path.getsize(inputfilename) oldsize = os.path.getsize(inputfilename)
newsize = os.path.getsize(outputfilename) newsize = os.path.getsize(outputfilename)
sizediff = (newsize / oldsize); sizediff = (newsize / oldsize) * 100;
print ' Original file size:', oldsize, 'bytes; new file size:', newsize, 'bytes (' + str(sizediff)[:5] + 'x)' print ' Original file size:', oldsize, 'bytes; new file size:', newsize, 'bytes (' + str(sizediff)[:5] + '%)'

263
statistics.html Normal file
View file

@ -0,0 +1,263 @@
<!DOCTYPE html>
<html>
<head>
<title>Scour Statistics</title>
<style type="text/css">
table { border-width: 1px; border-style: solid; }
th { text-align:center; }
td { text-align:right; }
th,td { border-width: 1px; border-style: solid; }
</style>
</head>
<body>
<h1><a href="http://codedread.com/scour/">Scour</a> Statistics</h1>
<p>Copyright 2009, Jeff Schiller</p>
<table>
<tr>
<th>Filename</th>
<th>Original Size</th>
<th colspan="2">0.01</th>
<th colspan="2">0.02</th>
<th colspan="2">0.03</th>
<th colspan="2">0.04</th>
<th colspan="2">0.05</th>
<th colspan="2">0.06</th>
<th colspan="2">0.07</th>
<th colspan="2">0.08</th>
<th colspan="2">0.09</th>
<th colspan="2">0.10</th>
<th colspan="2">0.11</th>
<th colspan="2">0.12</th>
</tr>
<tr>
<td><a href="fulltests/Degri_Energy_Saving_Lightbulb.svg">Degri_Energy_Saving_Lightbulb.svg</a></td>
<td>139390</td>
<td>137303</td><td>98.50%</td>
<td>135132</td><td>96.95%</td>
<td>133107</td><td>95.49%</td>
<td>132978</td><td>95.40%</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>126213</td><td>90.55%</td>
<td>83403</td><td>59.83%</td>
<td>82696</td><td>59.33%</td>
<td>82696</td><td>59.33%</td>
</tr>
<tr>
<td><a href="fulltests/GusEinstein_Angel.svg">GusEinstein_Angel.svg</a></td>
<td>611109</td>
<td>609343</td><td>99.71%</td>
<td>600297</td><td>98.23%</td>
<td>582691</td><td>95.35%</td>
<td>582562</td><td>95.33%</td>
<td>554762</td><td>90.78%</td>
<td>582209</td><td>95.27%</td>
<td>581834</td><td>95.21%</td>
<td>583456</td><td>95.47%</td>
<td>581937</td><td>95.23%</td>
<td>344813</td><td>56.42%</td>
<td>342135</td><td>55.99%</td>
<td>342135</td><td>55.99%</td>
</tr>
<tr>
<td><a href="fulltests/News_Paper.svg">News_Paper.svg</a></td>
<td>2364860</td>
<td>2363403</td><td>99.94%</td>
<td>2322822</td><td>98.22%</td>
<td>2316925</td><td>97.97%</td>
<td>2316796</td><td>97.97%</td>
<td>N/A</td><td>N/A</td>
<td>2316857</td><td>97.97%</td>
<td>2316560</td><td>97.96%</td>
<td>2317537</td><td>98.00%</td>
<td>2316459</td><td>97.95%</td>
<td>1325837</td><td>56.06%</td>
<td>1315944</td><td>55.65%</td>
<td>1315944</td><td>55.65%</td>
</tr>
<tr>
<td><a href="fulltests/OperaMarketShareEEhover.svg">OperaMarketShareEEhover.svg</a></td>
<td>560524</td>
<td>559059</td><td>99.74%</td>
<td>554859</td><td>98.99%</td>
<td>554784</td><td>98.98%</td>
<td>554784</td><td>98.98%</td>
<td>554762</td><td>98.97%</td>
<td>554762</td><td>98.97%</td>
<td>554321</td><td>98.89%</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>300608</td><td>53.63%</td>
<td>300608</td><td>53.63%</td>
</tr>
<tr>
<td><a href="fulltests/Simon_Printer_on_fire.svg">Simon_Printer_on_fire.svg</a></td>
<td>37808</td>
<td>36875</td><td>97.53%</td>
<td>34383</td><td>90.94%</td>
<td>31405</td><td>83.06%</td>
<td>31276</td><td>82.72%</td>
<td>31272</td><td>82.71%</td>
<td>31273</td><td>82.72%</td>
<td>31038</td><td>82.09%</td>
<td>31827</td><td>84.18%</td>
<td>31328</td><td>82.86%</td>
<td>24734</td><td>65.42%</td>
<td>23956</td><td>63.36%</td>
<td>23956</td><td>63.36%</td>
</tr>
<tr>
<td><a href="fulltests/Wave.svg">Wave.svg</a></td>
<td>96409</td>
<td>95513</td><td>99.07%</td>
<td>95513</td><td>99.07%</td>
<td>95513</td><td>99.07%</td>
<td>61265</td><td>63.55%</td>
<td>61265</td><td>63.55%</td>
<td>61234</td><td>63.51%</td>
<td>61252</td><td>63.53%</td>
<td>61088</td><td>63.36%</td>
<td>61088</td><td>63.36%</td>
<td>58858</td><td>61.05%</td>
<td>58985</td><td>61.18%</td>
<td>58985</td><td>61.18%</td>
</tr>
<tr>
<td><a href="fulltests/Web20Map.svg">Web20Map.svg</a></td>
<td>58239</td>
<td>48319</td><td>82.97%</td>
<td>45374</td><td>77.91%</td>
<td>40291</td><td>69.18%</td>
<td>40162</td><td>68.96%</td>
<td>42063</td><td>72.22%</td>
<td>42063</td><td>72.22%</td>
<td>42721</td><td>73.35%</td>
<td>45052</td><td>77.36%</td>
<td>44254</td><td>75.99%</td>
<td>44254</td><td>75.99%</td>
<td>43134</td><td>74.06%</td>
<td>43134</td><td>74.06%</td>
</tr>
<tr>
<td><a href="fulltests/acid.svg">acid.svg</a></td>
<td>13514</td>
<td>13120</td><td>97.08%</td>
<td>11555</td><td>85.50%</td>
<td>9675</td><td>71.59%</td>
<td>9476</td><td>70.12%</td>
<td>9515</td><td>70.41%</td>
<td>9515</td><td>70.41%</td>
<td>9410</td><td>69.63%</td>
<td>9647</td><td>71.39%</td>
<td>9493</td><td>70.25%</td>
<td>6883</td><td>50.93%</td>
<td>6381</td><td>47.22%</td>
<td>6381</td><td>47.22%</td>
</tr>
<tr>
<td><a href="fulltests/dragonfly.svg">dragonfly.svg</a></td>
<td>679018</td>
<td>678772</td><td>99.96%</td>
<td>660839</td><td>97.32%</td>
<td>660826</td><td>97.32%</td>
<td>660666</td><td>97.30%</td>
<td>660659</td><td>97.30%</td>
<td>660656</td><td>97.30%</td>
<td>660651</td><td>97.30%</td>
<td>660713</td><td>97.30%</td>
<td>660713</td><td>97.30%</td>
<td>N/A</td><td>N/A</td>
<td>608151</td><td>89.56%</td>
<td>608151</td><td>89.56%</td>
</tr>
<tr>
<td><a href="fulltests/gimp.svg">gimp.svg</a></td>
<td>27379</td>
<td>25407</td><td>92.80%</td>
<td>14361</td><td>52.45%</td>
<td>12500</td><td>45.66%</td>
<td>12419</td><td>45.36%</td>
<td>12214</td><td>44.61%</td>
<td>12211</td><td>44.60%</td>
<td>12045</td><td>43.99%</td>
<td>12715</td><td>46.44%</td>
<td>11698</td><td>42.73%</td>
<td>10706</td><td>39.10%</td>
<td>10136</td><td>37.02%</td>
<td>10136</td><td>37.02%</td>
</tr>
<tr>
<td><a href="fulltests/notification-audio-next.svg">notification-audio-next.svg</a></td>
<td>102987</td>
<td>99716</td><td>96.82%</td>
<td>98991</td><td>96.12%</td>
<td>98999</td><td>96.13%</td>
<td>98855</td><td>95.99%</td>
<td>98855</td><td>95.99%</td>
<td>98807</td><td>95.94%</td>
<td>3685</td><td>3.58%</td>
<td>3919</td><td>3.81%</td>
<td>3919</td><td>3.81%</td>
<td>3418</td><td>3.43%</td>
<td>3418</td><td>3.32%</td>
<td>3418</td><td>3.32%</td>
</tr>
<tr>
<td><a href="fulltests/poster3.svg">poster3.svg</a></td>
<td>283647</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>N/A</td><td>N/A</td>
<td>243726</td><td>85.93%</td>
<td>243726</td><td>85.93%</td>
</tr>
<tr>
<td><a href="fulltests/strawberry.svg">strawberry.svg</a></td>
<td>255918</td>
<td>250885</td><td>98.03%</td>
<td>190004</td><td>74.24%</td>
<td>148950</td><td>58.20%</td>
<td>148821</td><td>58.15%</td>
<td>147604</td><td>57.68%</td>
<td>147605</td><td>57.68%</td>
<td>146180</td><td>57.12%</td>
<td>151069</td><td>59.03%</td>
<td>146925</td><td>57.41%</td>
<td>112220</td><td>43.85%</td>
<td>111701</td><td>43.65%</td>
<td>111701</td><td>43.65%</td>
</tr>
<tr>
<td><a href="fulltests/wifi.svg">wifi.svg</a></td>
<td>14983</td>
<td>14368</td><td>95.90%</td>
<td>10872</td><td>72.56%</td>
<td>9143</td><td>61.02%</td>
<td>9014</td><td>60.16%</td>
<td>8951</td><td>59.74%</td>
<td>8948</td><td>59.72%</td>
<td>8792</td><td>58.68%</td>
<td>9149</td><td>61.06%</td>
<td>8778</td><td>58.59%</td>
<td>7041</td><td>46.99%</td>
<td>6930</td><td>46.25%</td>
<td>6930</td><td>46.25%</td>
</tr>
</table>
</body>
</html>

View file

@ -537,6 +537,11 @@ class TranslateLongHexColorIntoShortHex(unittest.TestCase):
self.assertEquals( elem.getAttribute('fill'), '#FFF', self.assertEquals( elem.getAttribute('fill'), '#FFF',
'Not converting long hex color into short hex') 'Not converting long hex color into short hex')
class AllowQuotEntitiesInUrl(unittest.TestCase):
def runTest(self):
grads = scour.scourXmlFile('unittests/quot-in-url.svg').getElementsByTagNameNS(SVGNS, 'linearGradient')
self.assertEquals( len(grads), 1,
'Removed referenced gradient when &quot; was in the url')
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()

View file

@ -0,0 +1,9 @@
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
<defs>
<linearGradient id="g" x1="0" y1="0" x2="1" y2="0">
<stop offset="0" stop-color="#0F0" />
<stop offset="1" stop-color="#00F"/>
</linearGradient>
</defs>
<rect width="100" height="100" fill="url(&quot;#g&quot;)"/>
</svg>

After

Width:  |  Height:  |  Size: 316 B