Bug ID: JDK-6805750 Improve handling of Attributes.Name

JDK-6805750 : Improve handling of Attributes.Name

Type: Enhancement
Component: core-libs
Sub-Component: java.util.jar
Affected Version: 6

Priority: P4
Status: Resolved
Resolution: Fixed
OS: generic
CPU: generic

Submitted: 2009-02-15
Updated: 2025-04-05
Resolved: 2018-04-23

Versions (Unresolved/Resolved/Fixed)

The Version table provides details related to the release that this issue/RFE will be addressed.

Unresolved : Release in which this issue/RFE will be addressed.
Resolved: Release in which this issue/RFE has been resolved.
Fixed : Release in which this issue/RFE has been fixed. The release containing this fix may be available for download as an Early Access Release or a General Availability Release.

To download the current JDK release, click here.

JDK 11
11 b11Fixed

Related Reports

Relates :	JDK-6797730 - Umbrella: Applet startup performance improvements
Relates :	JDK-8353787 - Increased number of SHA-384-Digest java.util.jar.Attributes$Name instances leading to higher memory footprint

Description

Parsing manifests is noticeable chunk of start time. Especially for signed jars where manifests are large
and have attributes per each class. For FX runtime several thousands of Attibutes.Name objects are created
(even if without full security validation).

We should make this code faster and less memory consuming.
In particular, following improvements are possible:
1) Attributes.Name.equals() should compare references to names before using case insensitive comparator
In majority of real-world cases names are identical and they are interned.

2) Attribute.Names are mostly used for hash map lookups, i.e. for almost every Attributes.Name instance hashcode will be called. We can precalculate hashcode at time of creation/validation. This helps of avoid multiple iterations over char array.

3) Attribute names are often occur multiple times in teh same and different jars.
One very typical scenario is signed jars where every entry has signature with same name.
We can reuse Attributes.Name instances, at least for the same Manifest -
this helps to reduce footprint
(in simplest FX testcase number of instances of Name droped from 3500 to 50)

See suggested fix in attachment for further details.
Yet another idea is to try to avoid of use utf8 decoder if possible.
decoder was significantly improved but for short strings overhead of wrapping arrays with byte buffers, etc. seems to be significant. Majority of real world manifests/attributes are ascii anyway.

Quick test with replacing the utf8 based String construction with ASCII only shows additional 4% improvement using same test.

One possible approach for implementation is following:
1) Check if any of non ascii symbols (with code > 0x7f) were read in the FastInputStream.readline() (we iterate through byte array there anyway)
2) if all symbols were ascii during last line read - use ascii only String constructor

Comments

URL: http://hg.openjdk.java.net/jdk/jdk/rev/e286fa159ff1 User: redestad Date: 2018-04-23 11:30:19 +0000

23-04-2018

EVALUATION As noticed by the submitter, the gain is not that big as was expected, 4-5% of total attributes reading (not the 4-5% of the overal starup, or jar file reading...). It might be worth spending some time to evaluate it further, including the risk and cost of changing the existing working code for the possible performance gain, when the resource is available. Anyway, this is definitely not a P2 bug as originally filed.

09-07-2009

SUGGESTED FIX diff -r af84cae36e3c addon/java/util/jar/Manifest.java --- a/addon/java/util/jar/Manifest.java Sun Feb 15 15:25:37 2009 +0300 +++ b/addon/java/util/jar/Manifest.java Sun Feb 15 18:57:28 2009 +0300 @@ -14,7 +14,9 @@ import java.io.IOException; import java.io.IOException; import java.util.Map; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; +import java.util.Set; /** * The Manifest class is used to maintain Manifest entry names and their @@ -190,6 +192,9 @@ public class Manifest implements Cloneab boolean skipEmptyLines = true; byte[] lastline = null; + Map<String, Attributes.Name> namecache = + new HashMap<String, Attributes.Name>(); + while ((len = fis.readLine(lbuf)) != -1) { if (lbuf[--len] != '\n') { throw new IOException("manifest line too long"); @@ -231,7 +236,7 @@ public class Manifest implements Cloneab attr = new Attributes(asize); entries.put(name, attr); } - attr.read(fis, lbuf); + attr.read(fis, lbuf, namecache); ecount++; acount += attr.size(); //XXX: Fix for when the average is 0. When it is 0, diff -r af84cae36e3c addon/java/util/jar/Attributes.java --- a/addon/java/util/jar/Attributes.java Sun Feb 15 15:25:37 2009 +0300 +++ b/addon/java/util/jar/Attributes.java Sun Feb 15 19:01:44 2009 +0300 @@ -7,14 +7,13 @@ package java.util.jar; -import java.io.DataInputStream; +import com.sun.deploy.util.Trace; import java.io.DataOutputStream; import java.io.IOException; import java.util.HashMap; import java.util.Map; import java.util.Set; import java.util.Collection; -import java.util.AbstractSet; import java.util.Iterator; import java.util.logging.Logger; import java.util.Comparator; @@ -352,6 +351,11 @@ public class Attributes implements Map<O * XXX Need to handle UTF8 values. */ void read(Manifest.FastInputStream is, byte[] lbuf) throws IOException { + read(is, lbuf, null); + } + + void read(Manifest.FastInputStream is, byte[] lbuf, + Map<String, Attributes.Name> cache) throws IOException { String name = null, value = null; byte[] lastline = null; @@ -401,7 +405,19 @@ public class Attributes implements Map<O value = new String(lbuf, i, len - i, "UTF8"); } try { - if ((putValue(name, value) != null) && (!lineContinued)) { + Name n = null; + name.intern(); + if (cache != null) { + n = cache.get(name); + if (n == null) { + n = new Name(name); + cache.put(name, n); + } + } else { + n = new Name(name); + } + + if ((put(n, value) != null) && (!lineContinued)) { Logger.getLogger("java.util.jar").warning( "Duplicate name in Manifest: " + name + ".\n" @@ -449,29 +465,27 @@ public class Attributes implements Map<O this.name = name.intern(); } - private static boolean isValid(String name) { + private boolean isValid(String name) { int len = name.length(); if (len > 70 || len == 0) { return false; } + int h = 0; for (int i = 0; i < len; i++) { - if (!isValid(name.charAt(i))) { - return false; - } + char c = name.charAt(i); + if ((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') + || c == '_' || c == '-') { + h = h*31 + c; + } else if (c >= 'A' && c <= 'Z') { + //hashcode must be identical for upper and lower case + h = h*31 + (c - 0x20); + } else { + //not valid + return false; + } } + hashCode = h; return true; - } - - private static boolean isValid(char c) { - return isAlpha(c) || isDigit(c) || c == '_' || c == '-'; - } - - private static boolean isAlpha(char c) { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); - } - - private static boolean isDigit(char c) { - return c >= '0' && c <= '9'; } /** @@ -481,23 +495,23 @@ public class Attributes implements Map<O * specified attribute object */ public boolean equals(Object o) { - if (o instanceof Name) { - Comparator c = ASCIICaseInsensitiveComparator.CASE_INSENSITIVE_ORDER; - return c.compare(name, ((Name)o).name) == 0; + if (o instanceof Name) { + Name n = (Name) o; + //names are interned and are reused often + if (name == n.name) return true; + Comparator c = ASCIICaseInsensitiveComparator.CASE_INSENSITIVE_ORDER; + return c.compare(name, n.name) == 0; } else { - return false; + return false; } } /** * Computes the hash value for this attribute name. */ - public int hashCode() { - if (hashCode == -1) { - hashCode = ASCIICaseInsensitiveComparator.lowerCaseHashCode(name); - } - return hashCode; - } + public int hashCode() { + return hashCode; + } /** * Returns the attribute name as a String.

15-02-2009