From dbebac36f980517f5bd9b7e64f017b383e38ba83 Mon Sep 17 00:00:00 2001 From: Pierre Lindenbaum <33838+lindenb@users.noreply.github.com> Date: Tue, 29 Oct 2024 17:27:00 +0100 Subject: [PATCH] Improve GFF API by adding new convenience methods * new methods getUniqueAttribute() and hasAttribute() for common GFF use scenarios * make Gff3BaseData Locatable --- .../java/htsjdk/tribble/gff/Gff3BaseData.java | 42 ++++++++++++++++++- .../java/htsjdk/tribble/gff/Gff3Feature.java | 25 +++++++++++ .../htsjdk/tribble/gff/Gff3CodecTest.java | 6 +++ 3 files changed, 72 insertions(+), 1 deletion(-) diff --git a/src/main/java/htsjdk/tribble/gff/Gff3BaseData.java b/src/main/java/htsjdk/tribble/gff/Gff3BaseData.java index 0bb414f917..8d35978860 100644 --- a/src/main/java/htsjdk/tribble/gff/Gff3BaseData.java +++ b/src/main/java/htsjdk/tribble/gff/Gff3BaseData.java @@ -1,5 +1,6 @@ package htsjdk.tribble.gff; +import htsjdk.samtools.util.Locatable; import htsjdk.tribble.annotation.Strand; import java.util.ArrayList; @@ -7,8 +8,9 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Optional; -public class Gff3BaseData { +public class Gff3BaseData implements Locatable { private final String contig; private final String source; private final String type; @@ -116,6 +118,7 @@ private int computeHashCode() { return hash; } + @Override public String getContig() { return contig; } @@ -128,10 +131,12 @@ public String getType() { return type; } + @Override public int getStart() { return start; } + @Override public int getEnd() { return end; } @@ -152,10 +157,45 @@ public Map> getAttributes() { return attributes; } + /** + * get the values as List for the key, or an empty list if this key is not present + * + * @param key key whose presence in this map is to be tested + * @return the values as List, or an empty list if this key is not present + */ public List getAttribute(final String key) { return attributes.getOrDefault(key, Collections.emptyList()); } + /** + * Returns true if this record contains an attribute for the specified key. + * + * @param key key whose presence in this map is to be tested + * @return true if this map contains an attribute for the specified key + */ + public boolean hasAttribute(final String key) { + return attributes.containsKey(key); + } + + /** + * Most attributes in a GFF file are present just one time in a line, e.g. : gene_biotype, gene_name, etc ... + * This function returns an Optional.empty if the key is not present, + * an Optional.of(value) if there is only one value associated to the key, + * or it throws an IllegalArgumentException if there is more than one value. + * + * @param key key whose presence in the attributes is to be tested + * @return Optional<String> if this map contains zero or one attribute for the specified key + * @throws IllegalArgumentException if there is more than one value + */ + public Optional getUniqueAttribute(final String key) { + final List atts = getAttribute(key); + switch(atts.size()) { + case 0 : return Optional.empty(); + case 1 : return Optional.of(atts.get(0)); + default : throw new IllegalArgumentException("getUniqueAttribute cannot be called with key="+key+" because it contains more than one value " + String.join(", ", atts)); + } + } + public String getId() { return id; } diff --git a/src/main/java/htsjdk/tribble/gff/Gff3Feature.java b/src/main/java/htsjdk/tribble/gff/Gff3Feature.java index 53ac1ea92e..37a879a5b9 100644 --- a/src/main/java/htsjdk/tribble/gff/Gff3Feature.java +++ b/src/main/java/htsjdk/tribble/gff/Gff3Feature.java @@ -5,6 +5,7 @@ import java.util.List; import java.util.Map; +import java.util.Optional; import java.util.Set; /** @@ -55,7 +56,31 @@ default int getStart() { default List getAttribute(final String key) { return getBaseData().getAttribute(key); } + + /** + * Returns true if this record contains an attribute for the specified key. + * + * @param key key whose presence in this map is to be tested + * @return true if this map contains an attribute for the specified key + */ + default boolean hasAttribute(final String key) { + return getBaseData().hasAttribute(key); + } + /** + * Most attributes in a GFF file are present just one time in a line, e.g. : gene_biotype, gene_name, etc ... + * This function returns an Optional.empty if the key is not present, + * an Optional.of(value) if there is only one value associated to the key, + * or it throws an IllegalArgumentException if there is more than one value. + * + * @param key key whose presence in the attributes is to be tested + * @return Optional<String> if this map contains zero or one attribute for the specified key + * @throws IllegalArgumentException if there is more than one value. + */ + default Optional getUniqueAttribute(final String key) { + return getBaseData().getUniqueAttribute(key); + } + default Map> getAttributes() { return getBaseData().getAttributes();} default String getID() { return getBaseData().getId();} diff --git a/src/test/java/htsjdk/tribble/gff/Gff3CodecTest.java b/src/test/java/htsjdk/tribble/gff/Gff3CodecTest.java index d475f125fb..38b9fb5d9c 100644 --- a/src/test/java/htsjdk/tribble/gff/Gff3CodecTest.java +++ b/src/test/java/htsjdk/tribble/gff/Gff3CodecTest.java @@ -79,6 +79,8 @@ public void codecFilterOutFieldsTest(final Path inputGff3, final int expectedTot for (final Gff3Feature feature : reader.iterator()) { for(final String key : skip_attributes) { Assert.assertTrue(feature.getAttribute(key).isEmpty()); + Assert.assertFalse(feature.hasAttribute(key)); + Assert.assertFalse(feature.getUniqueAttribute(key).isPresent()); } countTotalFeatures++; } @@ -199,6 +201,10 @@ public void urlDecodingTest() throws IOException { Assert.assertEquals(feature.getType(), "a region"); Assert.assertEquals(feature.getID(), "this is the ID of this wacky feature^&%##$%*&>,. ,."); Assert.assertEquals(feature.getAttribute("Another key"), Arrays.asList("Another=value", "And a second, value")); + Assert.assertTrue(feature.hasAttribute("Another key")); + Assert.assertTrue(feature.hasAttribute(Gff3Constants.ID_ATTRIBUTE_KEY)); + Assert.assertTrue(feature.getUniqueAttribute(Gff3Constants.ID_ATTRIBUTE_KEY).isPresent()); + Assert.assertFalse(feature.getUniqueAttribute("missing").isPresent()); }