Blob Blame History Raw
From 14b5d7aa0b55275969809fdf84e8a8caee857c0f Mon Sep 17 00:00:00 2001
From: Christos Zoulas <christos@zoulas.com>
Date: Mon, 18 Apr 2022 21:38:10 +0000
Subject: [PATCH] From Dirk Mueller: * regex rules need literal dots escaped,
 otherwise they are considered   any character * literal search strings can be
 searched using search rather than the   much more expensive regex * use
 standard xml declaration search as used in other format matchers * only match
 the first 1024 bytes, the information we look for should   be in the very
 first tag * remove unnecessary parentheses

---
 magic/Magdir/dataone | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/magic/Magdir/dataone b/magic/Magdir/dataone
index 8ef3f7981..566633eff 100644
--- a/magic/Magdir/dataone
+++ b/magic/Magdir/dataone
@@ -1,6 +1,6 @@
 
 #------------------------------------------------------------------------------
-# $File: dataone,v 1.2 2019/04/19 00:42:27 christos Exp $
+# $File: dataone,v 1.3 2022/04/18 21:38:10 christos Exp $
 #
 # DataONE- files from Dave Vieglais <dave.vieglais@gmail.com> &
 #                     Pratik Shrivastava <pratikshrivastava23@gmail.com>
@@ -9,39 +9,39 @@
 #------------------------------------------------------------------------------
 
 # EML (Ecological Metadata Language Format)
-0	string	<?xml
->&0	regex	(eml)-[0-9].[0-9].[0-9]+	eml://ecoinformatics.org/%s
+0	string	\<?xml\ version=
+>&0	regex/1024	eml-[0-9]\\.[0-9]\\.[0-9]+	eml://ecoinformatics.org/%s
 
 # onedcx (DataONE Dublin Core Extended v1.0)
->&0	regex	(onedcx/v)[0-9].[0-9]+		https://ns.dataone.org/metadata/schema/onedcx/v1.0
+>&0	regex/1024	onedcx/v[0-9]\\.[0-9]+		https://ns.dataone.org/metadata/schema/onedcx/v1.0
 
 # FGDC-STD-001-1998 (Content Standard for Digital Geospatial Metadata,
 # version 001-1998)
->&0	regex	fgdc				FGDC-STD-001-1998
+>&0	search/1024	fgdc				FGDC-STD-001-1998
 
 # Mercury (Oak Ridge National Lab Mercury Metadata version 1.0)
->&0	regex	(mercury/terms/v)[0-9].[0-9]	https://purl.org/ornl/schema/mercury/terms/v1.0
+>&0	regex/1024	mercury/terms/v[0-9]\\.[0-9]	https://purl.org/ornl/schema/mercury/terms/v1.0
 
 # ISOTC211 (Geographic MetaData (GMD) Extensible Markup Language)
->&0	regex	isotc211
->>&0	regex	eng;USA				https://www.isotc211.org/2005/gmd
+>&0	search/1024	isotc211
+>>&0	search/1024	eng;USA				https://www.isotc211.org/2005/gmd
 
 # ISOTC211 (NOAA Variant Geographic MetaData (GMD) Extensible Markup Language)
->>&0	regex	gov.noaa.nodc:[0-9]+		https://www.isotc211.org/2005/gmd-noaa
+>>&0	regex/1024	gov\\.noaa\\.nodc:[0-9]+		https://www.isotc211.org/2005/gmd-noaa
 
 # ISOTC211 PANGAEA Variant Geographic MetaData (GMD) Extensible Markup Language
->>&0	regex	pangaea.dataset[0-9][0-9][0-9][0-9][0-9][0-9]+	https://www.isotc211.org/2005/gmd-pangaea
+>>&0	regex/1024	pangaea\\.dataset[0-9][0-9][0-9][0-9][0-9][0-9]+	https://www.isotc211.org/2005/gmd-pangaea
 !:mime	text/xml
 
 
 # Object Reuse and Exchange Vocabulary
-0	string	<?xml
->&0	regex	rdf
->>&0	regex	openarchives	https://www.openarchives.org/ore/terms
+0	string	\<?xml\ version=
+>&0	search/1024	rdf
+>>&0	search/1024	openarchives	https://www.openarchives.org/ore/terms
 !:mime application/rdf+xml
 
 
 # Dryad Metadata Application Profile Version 3.1
 0	string	<DryadData
->&0	regex	(dryad-bibo/v)[0-9].[0-9]	https://datadryad.org/profile/v3.1
+>&0	regex/1024	dryad-bibo/v[0-9]\\.[0-9]	https://datadryad.org/profile/v3.1
 !:mime	text/xml