sonos

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs

commit 27c25a8457b4ad616c9ff602e06e5557034066e9
parent ea033c3ea31772f5e5b0f24902c99d0890e86ceb
Author: Brian Swetland <swetland@frotz.net>
Date:   Sun,  7 Aug 2011 07:35:16 -0700

XML/XMLSequence: remove use of Pattern and Matcher

On OpenJDK 1.6 on Linux, this is in the noise (listing 1900 tracks
still takes ~5 seconds).

On Android 2.3, this is a huge performance improvement (listing 1900
tracks takes ~6 seconds, whereas with the regex version it took ~110
seconds, due to Matcher.reset() and String.contentEquals() calling
CharSequence.toString() excessively).

Diffstat:
Mnet/frotz/sonos/XML.java | 165++++++++++++++++++++++++++++++++++++++++++++++---------------------------------
Mnet/frotz/sonos/XMLSequence.java | 124++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 220 insertions(+), 69 deletions(-)

diff --git a/net/frotz/sonos/XML.java b/net/frotz/sonos/XML.java @@ -31,15 +31,19 @@ import java.io.PrintStream; // TODO: &apos; -> ' public class XML { + private static final boolean DEBUG = false; + XMLSequence seq; /* entire buffer */ - XMLSequence tag; /* most recent tag */ XMLSequence tmp; /* for content return */ char[] xml; - int offset; - int count; - Matcher mTag; - Matcher mAttr; + /* offset and length of the name of the current tag */ + int tag_off; + int tag_len; + /* offset and length of the attr area of the current tag */ + int att_off; + int att_len; + /* true if the current tag is <...>, false if </...> */ boolean isOpen; CharsetDecoder decoder; @@ -50,10 +54,7 @@ public class XML { public XML(int size) { decoder = cs.newDecoder(); seq = new XMLSequence(); - tag = new XMLSequence(); tmp = new XMLSequence(); - mTag = pTag.matcher(seq); - mAttr = pAttr.matcher(tmp); xml = new char[size]; buf = CharBuffer.wrap(xml); } @@ -73,40 +74,48 @@ public class XML { } void reset() { seq.init(xml, buf.arrayOffset(), buf.length()); - tag.init(xml, 0, 0); tmp.init(xml, 0, 0); - offset = 0; nextTag(); - //System.err.println("XML reset, "+buf.length()+" bytes."); } public void rewind() { - offset = 0; + seq.pos = seq.offset; nextTag(); } public XMLSequence getAttr(String name) { - int off = mTag.start(3); - int end = off + mTag.end(3); - - tmp.offset = 0; - tmp.count = end; - - /* hack: work around Android bug */ - mAttr.reset(tmp); - - while (mAttr.find(off)) { - //System.err.println("ANAME: " + mAttr.group(1)); - //System.err.println("ATEXT: " + mAttr.group(2)); - tmp.offset = mAttr.start(1); - tmp.count = mAttr.end(1) - tmp.offset; - if (name.contentEquals(tmp)) { - tmp.offset = mAttr.start(2); - tmp.count = mAttr.end(2) - tmp.offset; - return tmp; - } - tmp.offset = 0; - tmp.count = end; - off = mAttr.end(); + int nlen = name.length(); + int n; + + tmp.offset = att_off; + tmp.count = att_len; + tmp.pos = att_off; + + for (;;) { + int off = tmp.space(); + int len = tmp.name(); + if (len < 0) + break; + if (DEBUG) System.err.println("ANAME: ["+new String(tmp.data,off,len)+"]"); + int voff = tmp.value(); + if (voff < 0) + break; + int vend = tmp.next('"'); + if (vend < 0) + break; + vend--; + if (DEBUG) System.err.println("ATEXT: ["+new String(tmp.data,voff,vend-voff)+"]"); + + if (nlen != len) + continue; + for (n = 0; n < len; n++) + if (name.charAt(n) != tmp.data[off+n]) /* XXX yuck */ + break; + if (nlen != n) + continue; + + tmp.offset = voff; + tmp.count = vend - voff; + return tmp; } return null; } @@ -118,7 +127,7 @@ public class XML { char[] data = xml; int n; tmp.data = data; - n = tmp.offset = mTag.end(); + n = tmp.offset = (att_off + att_len); try { for (;;) { if (data[n] == '<') @@ -173,26 +182,35 @@ public class XML { return isOpen; } + public boolean tag_eq(CharSequence name) { + if (name.length() != tag_len) + return false; + for (int n = 0; n < tag_len; n++) + if (name.charAt(n) != seq.data[tag_off + n]) /* XXX yuck */ + return false; + return true; + } /* require <tag> and consume it */ public void open(String name) throws XML.Oops { - if (!isOpen || !name.contentEquals(tag)) + if (!isOpen || !tag_eq(name)) throw new XML.Oops("expecting <"+name+"> but found " + str()); nextTag(); } /* require </tag> and consume it */ public void close(String name) throws XML.Oops { - if (isOpen || !name.contentEquals(tag)) + if (isOpen || !tag_eq(name)) throw new XML.Oops("expecting </"+name+"> but found " + str()); nextTag(); } /* require <tag> text </tag> and return text */ public XMLSequence read(String name) throws XML.Oops { - int start = mTag.end(); + int start = att_off + att_len; open(name); - tmp.adjust(start, mTag.start()); + tmp.adjust(start, tag_off - 2); close(name); + if (DEBUG) System.err.println("VAL ["+tmp+"]"); return tmp; } @@ -202,42 +220,41 @@ public class XML { return false; name.data = xml; - name.offset = tag.offset; - name.count = tag.count; + name.offset = tag_off; + name.count = tag_len; value.data = xml; - value.offset = mTag.end(); - + value.offset = att_off + att_len; nextTag(); + value.count = tag_off - value.offset - 2; - value.count = mTag.start() - value.offset; close(name); return true; } public void close(XMLSequence name) throws XML.Oops { if (isOpen) - throw new XML.Oops("1expected </"+name+">, found <"+tag+">"); - if (!name.eq(tag)) - throw new XML.Oops("2expected </"+name+">, found </"+tag+">"); + throw new XML.Oops("1expected </"+name+">, found "+str()); + if (!tag_eq(name)) + throw new XML.Oops("2expected </"+name+">, found "+str()); nextTag(); } public boolean tryRead(String name, XMLSequence value) throws XML.Oops { - if (!isOpen || !name.contentEquals(tag)) + if (!isOpen || !tag_eq(name)) return false; value.data = xml; - value.offset = mTag.end(); + value.offset = att_off + att_len; nextTag(); - value.count = mTag.start() - value.offset; + value.count = tag_off - value.offset; close(name); return true; } /* eat the current tag and any children */ public void consume() throws XML.Oops { - tmp.offset = mTag.start(2); - tmp.count = mTag.end(2) - tmp.offset; + tmp.offset = tag_off; + tmp.count = tag_len; nextTag(); while (isOpen) consume(); @@ -247,30 +264,42 @@ public class XML { /* format current begin/end tag as a string. for error messages */ String str() { if (isOpen) - return "<" + tag + ">"; + return "<" + new String(seq.data, tag_off, tag_len) + ">"; else - return "</" + tag + ">"; + return "</" + new String(seq.data, tag_off, tag_len) + ">"; } void nextTag() { - /* hack: work around Android bug */ - mTag.reset(seq); - /* can't deal with comments or directives */ - if (!mTag.find(offset)) { - tag.adjust(0,0); + int off = seq.next('<'); + boolean opn = seq.isOpen(); + int len = seq.name(); + int att = seq.pos; + int nxt = seq.next('>'); + + /* don't advance if we're in a strange state */ + if ((off < 0) || (len < 0) || (nxt < 0)) return; - } - isOpen = (mTag.start(1) == -1); - tag.adjust(mTag.start(2), mTag.end(2)); - offset = mTag.end(); - } + if (!opn) + off++; - /* G1: \ G2: tagname G3: attributes */ - static Pattern pTag = Pattern.compile("<(/)?([a-zA-Z:_][a-zA-Z0-9:\\-\\._]*)([^>]*)>",Pattern.DOTALL); - /* G1: name G2: value */ - static Pattern pAttr = Pattern.compile("\\s*([a-zA-Z:_][a-zA-Z0-9:\\-\\._]*)=\"([^\"]*)\""); + tag_off = off; + tag_len = len; + isOpen = opn; + + att_off = att; + att_len = nxt - att; + + if (DEBUG) { + if (opn) { + System.err.println("TAG ["+new String(seq.data,tag_off,tag_len)+"]"); + System.err.println("ATR ["+new String(seq.data,att_off,att_len)+"]"); + } else { + System.err.println("tag ["+new String(seq.data,tag_off,tag_len)+"]"); + } + } + } static Charset cs = Charset.forName("UTF-8"); diff --git a/net/frotz/sonos/XMLSequence.java b/net/frotz/sonos/XMLSequence.java @@ -20,6 +20,7 @@ class XMLSequence implements CharSequence { char[] data; int offset; int count; + int pos; public XMLSequence() { } @@ -27,15 +28,18 @@ class XMLSequence implements CharSequence { this.data = data; offset = start; count = end - start; + pos = offset; } public void init(XMLSequence s) { data = s.data; offset = s.offset; count = s.count; + pos = offset; } void adjust(int start, int end) { offset = start; count = end - start; + pos = offset; } boolean eq(XMLSequence other) { int count = this.count; @@ -115,8 +119,9 @@ class XMLSequence implements CharSequence { } break; } + pos = offset; } - + /* CharSequence interface */ public int length() { return count; @@ -136,4 +141,121 @@ class XMLSequence implements CharSequence { return ""; return new String(data, offset, count); } + + /* Parsing Tools */ + + /* returns next offset after a match with c, or -1 if no match */ + int next(char c) { + char[] x = data; + int n = pos; + int end = offset + count; + while (n < end) { + if (x[n++] == c) { + pos = n; + return n; + } + } + return -1; + } + + /* advance pos beyond whitespace, returns updated position */ + int space() { + char[] x = data; + int n = pos; + int end = offset + count; + while (n < end) { + switch (x[n]) { + case ' ': + case '\t': + case '\r': + case '\n': + n++; + break; + default: + pos = n; + return n; + } + } + pos = n; + return n; + } + + /* matches an XML element or attribute name, returns length (-1==failure), pos will be after */ + int name() { + char[] x = data; + int n = pos; + int end = offset + count; + if (n >= end) + return -1; + try { + if (xName0[x[n]] == 0) + return -1; + } catch (ArrayIndexOutOfBoundsException e) { + return -1; + } + while (++n < end) { + try { + if(xNameX[x[n]] == 1) + continue; + } catch (ArrayIndexOutOfBoundsException e) { } + break; + } + end = n - pos; + pos = n; + return end; + } + + /* match =" returning position or -1 if no match */ + int value() { + char[] x = data; + int n = pos; + int end = offset + count; + if (n >= end) + return -1; + if (x[n++] != '=') + return -1; + if (n >= end) + return -1; + if (x[n++] != '"') + return -1; + pos = n; + return n; + } + + /* returns false (and advances) if data[pos] == '/', otherwise true */ + boolean isOpen() { + if (data[pos] == '/') { + pos++; + return false; + } else { + return true; + } + } + + static char xName0[]; + static char xNameX[]; + + static { + xName0 = new char[127]; + xNameX = new char[127]; + int n; + for (n = 'a'; n <= 'z'; n++) { + xName0[n] = 1; + xNameX[n] = 1; + } + for (n = 'A'; n <= 'Z'; n++) { + xName0[n] = 1; + xNameX[n] = 1; + } + xName0['_'] = 1; + xName0[':'] = 1; + + for (n = '0'; n <= '9'; n++) { + xNameX[n] = 1; + } + xNameX['_'] = 1; + xNameX[':'] = 1; + xNameX['-'] = 1; + xNameX['.'] = 1; + } }