From 743d4e70400d83228b4076ff38bf5469feb33b97 Mon Sep 17 00:00:00 2001 From: Sergiusz Kierat Date: Mon, 5 Jun 2017 09:31:17 +0200 Subject: [PATCH] strict selective pulling for longer paths (cherry picked from commit f4f110827f2c510c6d2cca280b61940339d6576c) --- .../src/test/scala/scales/xml/PullTest.scala | 201 +++++++++++ .../scales/xml/parser/pull/PullIterator.scala | 8 +- scales-xml/src/test/resources/data/nature.xml | 313 ++++++++++++++++++ .../src/test/resources/data/plant_catalog.xml | 309 ----------------- .../src/test/scala/scales/xml/PullTest.scala | 3 +- 5 files changed, 521 insertions(+), 313 deletions(-) create mode 100644 core-tests/src/test/scala/scales/xml/PullTest.scala create mode 100644 scales-xml/src/test/resources/data/nature.xml delete mode 100644 scales-xml/src/test/resources/data/plant_catalog.xml diff --git a/core-tests/src/test/scala/scales/xml/PullTest.scala b/core-tests/src/test/scala/scales/xml/PullTest.scala new file mode 100644 index 00000000..afd4ee94 --- /dev/null +++ b/core-tests/src/test/scala/scales/xml/PullTest.scala @@ -0,0 +1,201 @@ +package scales.xml + +class PullTest extends junit.framework.TestCase { + + import ScalesXml._ + import junit.framework.Assert._ + import scales.utils._ + import io._ + import resources._ + + val Default = Namespace("urn:default") + val DefaultRoot = Default("Default") + + + import scales.utils.{resource => sresource} + + /* + * pump up number when cold, but make sure its even. + * + * 5000000 takes around 185s (as of 14.01.2010) and shows no leaking/unneccesary retention. + */ + val maxIterations = 500//0000 + + def testSimpleLoad = { + val pull = pullXml(sresource(this, "/data/BaseXmlTest.xml")) + assertTrue("didn't have any events",pull.hasNext) + assertFalse("should not have been closed", pull.isClosed) + var next = pull.next + assertTrue("should have been left", next.isLeft) + + next.left.get match { + case Elem(DefaultRoot,_,_) => () // is ok + case a @ _ => fail("Should have been {urn:default}Default was "+a) + } + + pull.close + assertTrue("Should have been closed", pull.isClosed) + } + + def testResourceLoad = { + val (resource, pull) = pullXmlResource(sresource(this, "/data/BaseXmlTest.xml")) + assertTrue("didn't have any events",pull.hasNext) + assertFalse("should not have been closed", resource.isClosed) + var next = pull.next + assertTrue("should have been left", next.isLeft) + + next.left.get match { + case Elem(DefaultRoot,_,_) => () // is ok + case a @ _ => fail("Should have been {urn:default}Default was "+a) + } + + resource.closeResource + assertTrue("Should have been closed", resource.isClosed) + } + + def testProxiedCloser = { + val strr = new java.io.StringReader("My String") + val proxied = new ProxiedCloseOnNeedReader(strr) + + proxied.close() // should be a noop + assertFalse("Was closed for some reason", proxied.isClosed) + + var arr = Array[Char](10) + + val count = proxied.read(arr) + assertEquals("My String".substring(0,count),arr.mkString("")) + + proxied.closeResource + assertTrue("Should have been closed", proxied.isClosed) + } + + def testChainedCloseOnNeed = { + import java.io.StringReader + def getOne = new StringReader("My String") with CloseOnNeed { + def doClose() { + super[StringReader].close() + } + } + val str1 = getOne + val str2 = getOne + val str3 = getOne + + val joined = str1 ++ str2 ++ str3 + assertFalse("1 Should not have been closed", str1.isClosed) + assertFalse("2 Should not have been closed", str2.isClosed) + assertFalse("3 Should not have been closed", str3.isClosed) + assertFalse("Joined Should not have been closed", joined.isClosed) + + joined.closeResource + + assertTrue("1 Should have been closed", str1.isClosed) + assertTrue("2 Should have been closed", str2.isClosed) + assertTrue("3 Should have been closed", str3.isClosed) + assertTrue("Joined Should have been closed", joined.isClosed) + } + + def testDump = { + val pull = pullXml(sresource(this, "/data/BaseXmlTest.xml")) + + def out(it : String) : Unit = + ()//it + + for{event <- pull}{ + event match { + case Left(x) => x match { + case Elem(qname, attrs, ns) => + out("<" + qname + attrs.map( x => " "+x.name +"='"+x.value+"'" ).mkString(" ") + ">") + + case item : XmlItem => + out("item "+item) + } + case Right(EndElem(qname, ns)) => + out("") + + } + } + + assertTrue("Should have been closed", pull.isClosed) + } + +/* + def testEphemeral = { + def ntimes( i : Int, max : Int) : EphemeralStream[Int] = + if (i < max) + EphemeralStream[Int](i) ++ (ntimes( i + 1, max)) + else + EphemeralStream.empty + + val itr = ntimes(0, 300000).iterator + println(itr.next) + }*/ + + type FiveStrings = (String,String,String,String,String) + + val expectedHead = ("264","chris.twiner","dir","M","/trunk/scalesUtils") + + def testIterator = { + val pull = pullXml(sresource(this, "/data/svnLogIteratorEg.xml")) + val LogEntries = List("log"l,"logentry"l) + + var i = 0 + + val it = scales.xml.iterate(LogEntries, pull.it) + val bits = for{ entry : XmlPath <- it + revision <- entry.\.*@("revision"l).one + author <- entry.\*("author"l).one + path <- entry.\*("paths"l).|>{x=> i+=1;x}.\*("path"l) + kind <- path.\.*@("kind"l) + action <- path.\.*@("action"l) + } yield (text(revision), value(author), text(kind), text(action), value(path)) + + val t = bits.next//iterator.next + //println( t._1+ " " + t._2 + " " + t._3 + " " + t._4 + " " + t._5 ) + assertEquals(1, i) + assertEquals(expectedHead, t) + + // sanity check + if (it.isTraversableAgain) assertTrue(it.nonEmpty) + + pull.close + } + + def testIteratorForStrictSelectivePulling() = { + val expectedPlants = List( + ("English ivy","Hedera helix", "3", "Mostly Shady", "$9.99", "000100"), + ("Dwarf periwinkle","Vinca minor", "3", "Mostly Shady", "$12.10", "000409") + ) + + val pull = pullXml( + sresource(this, "/data/nature.xml"), + strictPath = List( + NoNamespaceQName("CATALOG"), + NoNamespaceQName("EUKARYOTE"), + NoNamespaceQName("PLANT") + ) + ) + val PlantEntries = List("CATALOG"l, "PLANT"l) + + val it = scales.xml.iterate(PlantEntries, pull.it) + val plantsIt = for { + entry: XmlPath <- it + common <- entry.\*("COMMON"l).one + botanical <- entry.\*("BOTANICAL"l).one + zone <- entry.\*("ZONE"l).one + light <- entry.\*("LIGHT"l).one + price <- entry.\*("PRICE"l).one + availability <- entry.\*("AVAILABILITY"l).one + } yield (value(common), value(botanical), value(zone), value(light), value(price), value(availability)) + + val plants = plantsIt.toList + + assertEquals(expectedPlants.size, plants.size) + for ((plant, idx) <- plants.zipWithIndex) + assertEquals(expectedPlants(idx), plant) + + // sanity check + if (it.isTraversableAgain) assertTrue(it.nonEmpty) + + pull.close + } +} \ No newline at end of file diff --git a/scales-xml/src/main/scala/scales/xml/parser/pull/PullIterator.scala b/scales-xml/src/main/scala/scales/xml/parser/pull/PullIterator.scala index d69df25d..c5d5a693 100644 --- a/scales-xml/src/main/scala/scales/xml/parser/pull/PullIterator.scala +++ b/scales-xml/src/main/scala/scales/xml/parser/pull/PullIterator.scala @@ -231,6 +231,8 @@ object PullUtils { vpath = vpath.take(vpath.size - 1) None } + case XMLStreamConstants.END_DOCUMENT => + Some(null) case _ => None } @@ -258,12 +260,12 @@ object PullUtils { val elemQName = PullUtils.getElemQName(parser, strategy, token) vpath = ipath :+ elemQName val validSubtree = vpath.take(strictPath.size).equals(strictPath) - if (idepth != StartDepth && !validSubtree) { - dropWhile() - } else { + if (idepth == StartDepth || validSubtree) { val attributes = PullUtils.getAttributes(parser, strategy, token) val namespaces = PullUtils.getNamespaces(parser, strategy, token) strategy.elem(elemQName, attributes, namespaces, token) + } else { + dropWhile() } } case XMLStreamConstants.END_ELEMENT => //2 diff --git a/scales-xml/src/test/resources/data/nature.xml b/scales-xml/src/test/resources/data/nature.xml new file mode 100644 index 00000000..20563e33 --- /dev/null +++ b/scales-xml/src/test/resources/data/nature.xml @@ -0,0 +1,313 @@ + + + + + + Bloodroot + Sanguinaria canadensis + 4 + Mostly Shady + $2.44 + 031599 + + + Columbine + Aquilegia canadensis + 3 + Mostly Shady + $9.37 + 030699 + + + Marsh Marigold + Caltha palustris + 4 + Mostly Sunny + $6.81 + 051799 + + + Cowslip + Caltha palustris + 4 + Mostly Shady + $9.90 + 030699 + + + Dutchman's-Breeches + Dicentra cucullaria + 3 + Mostly Shady + $6.44 + 012099 + + + Ginger, Wild + Asarum canadense + 3 + Mostly Shady + $9.03 + 041899 + + + Hepatica + Hepatica americana + 4 + Mostly Shady + $4.45 + 012699 + + + Liverleaf + Hepatica americana + 4 + Mostly Shady + $3.99 + 010299 + + + Jack-In-The-Pulpit + Arisaema triphyllum + 4 + Mostly Shady + $3.23 + 020199 + + + Mayapple + Podophyllum peltatum + 3 + Mostly Shady + $2.98 + 060599 + + + Phlox, Woodland + Phlox divaricata + 3 + Sun or Shade + $2.80 + 012299 + + + Phlox, Blue + Phlox divaricata + 3 + Sun or Shade + $5.59 + 021699 + + + Spring-Beauty + Claytonia Virginica + 7 + Mostly Shady + $6.59 + 020199 + + + Trillium + Trillium grandiflorum + 5 + Sun or Shade + $3.90 + 042999 + + + Wake Robin + Trillium grandiflorum + 5 + Sun or Shade + $3.20 + 022199 + + + Violet, Dog-Tooth + Erythronium americanum + 4 + Shade + $9.04 + 020199 + + + Trout Lily + Erythronium americanum + 4 + Shade + $6.94 + 032499 + + + Adder's-Tongue + Erythronium americanum + 4 + Shade + $9.58 + 041399 + + + Anemone + Anemone blanda + 6 + Mostly Shady + $8.86 + 122698 + + + Grecian Windflower + Anemone blanda + 6 + Mostly Shady + $9.16 + 071099 + + + Bee Balm + Monarda didyma + 4 + Shade + $4.59 + 050399 + + + Bergamot + Monarda didyma + 4 + Shade + $7.16 + 042799 + + + Black-Eyed Susan + Rudbeckia hirta + Annual + Sunny + $9.80 + 061899 + + + Buttercup + Ranunculus + 4 + Shade + $2.57 + 061099 + + + Crowfoot + Ranunculus + 4 + Shade + $9.34 + 040399 + + + Butterfly Weed + Asclepias tuberosa + Annual + Sunny + $2.78 + 063099 + + + Cinquefoil + Potentilla + Annual + Shade + $7.06 + 052599 + + + Primrose + Oenothera + 3 - 5 + Sunny + $6.56 + 013099 + + + Gentian + Gentiana + 4 + Sun or Shade + $7.81 + 051899 + + + Blue Gentian + Gentiana + 4 + Sun or Shade + $8.56 + 050299 + + + Jacob's Ladder + Polemonium caeruleum + Annual + Shade + $9.26 + 022199 + + + Greek Valerian + Polemonium caeruleum + Annual + Shade + $4.36 + 071499 + + + California Poppy + Eschscholzia californica + Annual + Sun + $7.89 + 032799 + + + Shooting Star + Dodecatheon + Annual + Mostly Shady + $8.60 + 051399 + + + Snakeroot + Cimicifuga + Annual + Shade + $5.63 + 071199 + + + Cardinal Flower + Lobelia cardinalis + 2 + Shade + $3.02 + 022299 + + + + English ivy + Hedera helix + 3 + Mostly Shady + $9.99 + 000100 + + + Dwarf periwinkle + Vinca minor + 3 + Mostly Shady + $12.10 + 000409 + + + + + diff --git a/scales-xml/src/test/resources/data/plant_catalog.xml b/scales-xml/src/test/resources/data/plant_catalog.xml deleted file mode 100644 index 291002b5..00000000 --- a/scales-xml/src/test/resources/data/plant_catalog.xml +++ /dev/null @@ -1,309 +0,0 @@ - - - - - Bloodroot - Sanguinaria canadensis - 4 - Mostly Shady - $2.44 - 031599 - - - Columbine - Aquilegia canadensis - 3 - Mostly Shady - $9.37 - 030699 - - - Marsh Marigold - Caltha palustris - 4 - Mostly Sunny - $6.81 - 051799 - - - Cowslip - Caltha palustris - 4 - Mostly Shady - $9.90 - 030699 - - - Dutchman's-Breeches - Dicentra cucullaria - 3 - Mostly Shady - $6.44 - 012099 - - - Ginger, Wild - Asarum canadense - 3 - Mostly Shady - $9.03 - 041899 - - - Hepatica - Hepatica americana - 4 - Mostly Shady - $4.45 - 012699 - - - Liverleaf - Hepatica americana - 4 - Mostly Shady - $3.99 - 010299 - - - Jack-In-The-Pulpit - Arisaema triphyllum - 4 - Mostly Shady - $3.23 - 020199 - - - Mayapple - Podophyllum peltatum - 3 - Mostly Shady - $2.98 - 060599 - - - Phlox, Woodland - Phlox divaricata - 3 - Sun or Shade - $2.80 - 012299 - - - Phlox, Blue - Phlox divaricata - 3 - Sun or Shade - $5.59 - 021699 - - - Spring-Beauty - Claytonia Virginica - 7 - Mostly Shady - $6.59 - 020199 - - - Trillium - Trillium grandiflorum - 5 - Sun or Shade - $3.90 - 042999 - - - Wake Robin - Trillium grandiflorum - 5 - Sun or Shade - $3.20 - 022199 - - - Violet, Dog-Tooth - Erythronium americanum - 4 - Shade - $9.04 - 020199 - - - Trout Lily - Erythronium americanum - 4 - Shade - $6.94 - 032499 - - - Adder's-Tongue - Erythronium americanum - 4 - Shade - $9.58 - 041399 - - - Anemone - Anemone blanda - 6 - Mostly Shady - $8.86 - 122698 - - - Grecian Windflower - Anemone blanda - 6 - Mostly Shady - $9.16 - 071099 - - - Bee Balm - Monarda didyma - 4 - Shade - $4.59 - 050399 - - - Bergamot - Monarda didyma - 4 - Shade - $7.16 - 042799 - - - Black-Eyed Susan - Rudbeckia hirta - Annual - Sunny - $9.80 - 061899 - - - Buttercup - Ranunculus - 4 - Shade - $2.57 - 061099 - - - Crowfoot - Ranunculus - 4 - Shade - $9.34 - 040399 - - - Butterfly Weed - Asclepias tuberosa - Annual - Sunny - $2.78 - 063099 - - - Cinquefoil - Potentilla - Annual - Shade - $7.06 - 052599 - - - Primrose - Oenothera - 3 - 5 - Sunny - $6.56 - 013099 - - - Gentian - Gentiana - 4 - Sun or Shade - $7.81 - 051899 - - - Blue Gentian - Gentiana - 4 - Sun or Shade - $8.56 - 050299 - - - Jacob's Ladder - Polemonium caeruleum - Annual - Shade - $9.26 - 022199 - - - Greek Valerian - Polemonium caeruleum - Annual - Shade - $4.36 - 071499 - - - California Poppy - Eschscholzia californica - Annual - Sun - $7.89 - 032799 - - - Shooting Star - Dodecatheon - Annual - Mostly Shady - $8.60 - 051399 - - - Snakeroot - Cimicifuga - Annual - Shade - $5.63 - 071199 - - - Cardinal Flower - Lobelia cardinalis - 2 - Shade - $3.02 - 022299 - - - - English ivy - Hedera helix - 3 - Mostly Shady - $9.99 - 000100 - - - Dwarf periwinkle - Vinca minor - 3 - Mostly Shady - $12.10 - 000409 - - diff --git a/scales-xml/src/test/scala/scales/xml/PullTest.scala b/scales-xml/src/test/scala/scales/xml/PullTest.scala index d8f4b20f..2db1c1e2 100644 --- a/scales-xml/src/test/scala/scales/xml/PullTest.scala +++ b/scales-xml/src/test/scala/scales/xml/PullTest.scala @@ -1051,9 +1051,10 @@ try{ ) val pull = pullXml( - sresource(this, "/data/plant_catalog.xml"), + sresource(this, "/data/nature.xml"), strictPath = List( NoNamespaceQName("CATALOG"), + NoNamespaceQName("EUKARYOTE"), NoNamespaceQName("PLANT") ) )