Blame SOURCES/file-5.39-regex-optimalizations.patch

840325
From 14b5d7aa0b55275969809fdf84e8a8caee857c0f Mon Sep 17 00:00:00 2001
840325
From: Christos Zoulas <christos@zoulas.com>
840325
Date: Mon, 18 Apr 2022 21:38:10 +0000
840325
Subject: [PATCH] From Dirk Mueller: * regex rules need literal dots escaped,
840325
 otherwise they are considered   any character * literal search strings can be
840325
 searched using search rather than the   much more expensive regex * use
840325
 standard xml declaration search as used in other format matchers * only match
840325
 the first 1024 bytes, the information we look for should   be in the very
840325
 first tag * remove unnecessary parentheses
840325
840325
---
840325
 magic/Magdir/dataone | 28 ++++++++++++++--------------
840325
 1 file changed, 14 insertions(+), 14 deletions(-)
840325
840325
diff --git a/magic/Magdir/dataone b/magic/Magdir/dataone
840325
index 8ef3f7981..566633eff 100644
840325
--- a/magic/Magdir/dataone
840325
+++ b/magic/Magdir/dataone
840325
@@ -1,6 +1,6 @@
840325
 
840325
 #------------------------------------------------------------------------------
840325
-# $File: dataone,v 1.2 2019/04/19 00:42:27 christos Exp $
840325
+# $File: dataone,v 1.3 2022/04/18 21:38:10 christos Exp $
840325
 #
840325
 # DataONE- files from Dave Vieglais <dave.vieglais@gmail.com> &
840325
 #                     Pratik Shrivastava <pratikshrivastava23@gmail.com>
840325
@@ -9,39 +9,39 @@
840325
 #------------------------------------------------------------------------------
840325
 
840325
 # EML (Ecological Metadata Language Format)
840325
-0	string	
840325
->&0	regex	(eml)-[0-9].[0-9].[0-9]+	eml://ecoinformatics.org/%s
840325
+0	string	\
840325
+>&0	regex/1024	eml-[0-9]\\.[0-9]\\.[0-9]+	eml://ecoinformatics.org/%s
840325
 
840325
 # onedcx (DataONE Dublin Core Extended v1.0)
840325
->&0	regex	(onedcx/v)[0-9].[0-9]+		https://ns.dataone.org/metadata/schema/onedcx/v1.0
840325
+>&0	regex/1024	onedcx/v[0-9]\\.[0-9]+		https://ns.dataone.org/metadata/schema/onedcx/v1.0
840325
 
840325
 # FGDC-STD-001-1998 (Content Standard for Digital Geospatial Metadata,
840325
 # version 001-1998)
840325
->&0	regex	fgdc				FGDC-STD-001-1998
840325
+>&0	search/1024	fgdc				FGDC-STD-001-1998
840325
 
840325
 # Mercury (Oak Ridge National Lab Mercury Metadata version 1.0)
840325
->&0	regex	(mercury/terms/v)[0-9].[0-9]	https://purl.org/ornl/schema/mercury/terms/v1.0
840325
+>&0	regex/1024	mercury/terms/v[0-9]\\.[0-9]	https://purl.org/ornl/schema/mercury/terms/v1.0
840325
 
840325
 # ISOTC211 (Geographic MetaData (GMD) Extensible Markup Language)
840325
->&0	regex	isotc211
840325
->>&0	regex	eng;USA				https://www.isotc211.org/2005/gmd
840325
+>&0	search/1024	isotc211
840325
+>>&0	search/1024	eng;USA				https://www.isotc211.org/2005/gmd
840325
 
840325
 # ISOTC211 (NOAA Variant Geographic MetaData (GMD) Extensible Markup Language)
840325
->>&0	regex	gov.noaa.nodc:[0-9]+		https://www.isotc211.org/2005/gmd-noaa
840325
+>>&0	regex/1024	gov\\.noaa\\.nodc:[0-9]+		https://www.isotc211.org/2005/gmd-noaa
840325
 
840325
 # ISOTC211 PANGAEA Variant Geographic MetaData (GMD) Extensible Markup Language
840325
->>&0	regex	pangaea.dataset[0-9][0-9][0-9][0-9][0-9][0-9]+	https://www.isotc211.org/2005/gmd-pangaea
840325
+>>&0	regex/1024	pangaea\\.dataset[0-9][0-9][0-9][0-9][0-9][0-9]+	https://www.isotc211.org/2005/gmd-pangaea
840325
 !:mime	text/xml
840325
 
840325
 
840325
 # Object Reuse and Exchange Vocabulary
840325
-0	string	
840325
->&0	regex	rdf
840325
->>&0	regex	openarchives	https://www.openarchives.org/ore/terms
840325
+0	string	\
840325
+>&0	search/1024	rdf
840325
+>>&0	search/1024	openarchives	https://www.openarchives.org/ore/terms
840325
 !:mime application/rdf+xml
840325
 
840325
 
840325
 # Dryad Metadata Application Profile Version 3.1
840325
 0	string	
840325
->&0	regex	(dryad-bibo/v)[0-9].[0-9]	https://datadryad.org/profile/v3.1
840325
+>&0	regex/1024	dryad-bibo/v[0-9]\\.[0-9]	https://datadryad.org/profile/v3.1
840325
 !:mime	text/xml