Blame SOURCES/0001-avoid-problems-detecting-HTML-files-with-.xls-ext.patch

f085be
From df7ff240db01ee0e993c7cbc30d3370d6d1d0956 Mon Sep 17 00:00:00 2001
f085be
From: David Tardon <dtardon@redhat.com>
f085be
Date: Tue, 8 Jul 2014 17:01:27 +0200
f085be
Subject: [PATCH] avoid problems detecting HTML files with .xls ext.
f085be
f085be
(cherry picked from commit 86c6f18c2766aad43d6e3bfcf3530e40440ebca7)
f085be
Signed-off-by: David Tardon <dtardon@redhat.com>
f085be
f085be
Conflicts:
f085be
	filter/source/textfilterdetect/filterdetect.cxx
f085be
f085be
Change-Id: I9955223aac20f3f640fde51bb7231666c269ca70
f085be
---
f085be
 filter/Configuration_filter.mk                     |   1 +
f085be
 filter/source/config/fragments/types/calc_HTML.xcu |  35 ++++
f085be
 sc/Library_scd.mk                                  |   1 +
f085be
 sc/inc/htmlfilterdetect.hxx                        |  80 +++++++++
f085be
 sc/source/filter/html/htmlfilterdetect.cxx         | 180 +++++++++++++++++++++
f085be
 sc/source/ui/unoobj/detreg.cxx                     |   9 ++
f085be
 sc/util/scd.component                              |   3 +
f085be
 7 files changed, 309 insertions(+)
f085be
 create mode 100644 filter/source/config/fragments/types/calc_HTML.xcu
f085be
 create mode 100644 sc/inc/htmlfilterdetect.hxx
f085be
 create mode 100644 sc/source/filter/html/htmlfilterdetect.cxx
f085be
f085be
diff --git a/filter/Configuration_filter.mk b/filter/Configuration_filter.mk
f085be
index fe84350..36cf294 100644
f085be
--- a/filter/Configuration_filter.mk
f085be
+++ b/filter/Configuration_filter.mk
f085be
@@ -514,6 +514,7 @@ $(call filter_Configuration_add_ui_filters,fcfg_langpack,filter/source/config/fr
f085be
 $(call filter_Configuration_add_types,fcfg_langpack,fcfg_calc_types.xcu,filter/source/config/fragments/types,\
f085be
 	calc_DIF \
f085be
 	calc_ODS_FlatXML \
f085be
+	calc_HTML \
f085be
 	generic_HTML \
f085be
 	generic_Text \
f085be
 	calc_Lotus \
f085be
diff --git a/filter/source/config/fragments/types/calc_HTML.xcu b/filter/source/config/fragments/types/calc_HTML.xcu
f085be
new file mode 100644
f085be
index 0000000..f4682da
f085be
--- /dev/null
f085be
+++ b/filter/source/config/fragments/types/calc_HTML.xcu
f085be
@@ -0,0 +1,35 @@
f085be
+
f085be
+ * This file is part of the LibreOffice project.
f085be
+ *
f085be
+ * This Source Code Form is subject to the terms of the Mozilla Public
f085be
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
f085be
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
f085be
+ *
f085be
+ * This file incorporates work covered by the following license notice:
f085be
+ *
f085be
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
f085be
+ *   contributor license agreements. See the NOTICE file distributed
f085be
+ *   with this work for additional information regarding copyright
f085be
+ *   ownership. The ASF licenses this file to you under the Apache
f085be
+ *   License, Version 2.0 (the "License"); you may not use this file
f085be
+ *   except in compliance with the License. You may obtain a copy of
f085be
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
f085be
+-->
f085be
+    
f085be
+    extension. Allow to detect these early to avoid going through the
f085be
+    whole list of detectors. This also avoids the risk of misdetection
f085be
+    as something else, as there are some formats that are text files and
f085be
+    the detection is just a heuristic (e.g., wp1 or wp42 supported by
f085be
+    libwpd). -->
f085be
+    <node oor:name="calc_HTML" oor:op="replace" >
f085be
+        <prop oor:name="DetectService"><value>com.sun.star.comp.calc.HtmlFilterDetect</value></prop>
f085be
+        <prop oor:name="URLPattern"/>
f085be
+        <prop oor:name="Extensions"><value>xls</value></prop>
f085be
+        <prop oor:name="MediaType"><value>text/html</value></prop>
f085be
+        <prop oor:name="Preferred"><value>false</value></prop>
f085be
+        <prop oor:name="PreferredFilter"/>
f085be
+        <prop oor:name="UIName">
f085be
+            <value>HTML Table</value>
f085be
+        </prop>
f085be
+        <prop oor:name="ClipboardFormat"/>
f085be
+    </node>
f085be
diff --git a/sc/Library_scd.mk b/sc/Library_scd.mk
f085be
index 4d02ae1..1b4d035 100644
f085be
--- a/sc/Library_scd.mk
f085be
+++ b/sc/Library_scd.mk
f085be
@@ -37,6 +37,7 @@ $(eval $(call gb_Library_use_libraries,scd,\
f085be
 ))
f085be
 
f085be
 $(eval $(call gb_Library_add_exception_objects,scd,\
f085be
+	sc/source/filter/html/htmlfilterdetect \
f085be
 	sc/source/ui/unoobj/detreg \
f085be
 	sc/source/ui/unoobj/scdetect \
f085be
 	sc/source/ui/unoobj/exceldetect \
f085be
diff --git a/sc/inc/htmlfilterdetect.hxx b/sc/inc/htmlfilterdetect.hxx
f085be
new file mode 100644
f085be
index 0000000..f131e89
f085be
--- /dev/null
f085be
+++ b/sc/inc/htmlfilterdetect.hxx
f085be
@@ -0,0 +1,80 @@
f085be
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
f085be
+/*
f085be
+ * This file is part of the LibreOffice project.
f085be
+ *
f085be
+ * This Source Code Form is subject to the terms of the Mozilla Public
f085be
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
f085be
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
f085be
+ *
f085be
+ * This file incorporates work covered by the following license notice:
f085be
+ *
f085be
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
f085be
+ *   contributor license agreements. See the NOTICE file distributed
f085be
+ *   with this work for additional information regarding copyright
f085be
+ *   ownership. The ASF licenses this file to you under the Apache
f085be
+ *   License, Version 2.0 (the "License"); you may not use this file
f085be
+ *   except in compliance with the License. You may obtain a copy of
f085be
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
f085be
+ */
f085be
+
f085be
+#ifndef INCLUDED_SC_INC_HTMLFILTERDETECT_HXX
f085be
+#define INCLUDED_SC_INC_HTMLFILTERDETECT_HXX
f085be
+
f085be
+#include <com/sun/star/document/XExtendedFilterDetection.hpp>
f085be
+#include <com/sun/star/lang/XInitialization.hpp>
f085be
+#include <com/sun/star/lang/XServiceInfo.hpp>
f085be
+#include <com/sun/star/uno/XComponentContext.hpp>
f085be
+
f085be
+#include <cppuhelper/implbase3.hxx>
f085be
+
f085be
+namespace sc
f085be
+{
f085be
+
f085be
+class HtmlFilterDetect : public cppu::WeakImplHelper3<
f085be
+    com::sun::star::document::XExtendedFilterDetection,
f085be
+    com::sun::star::lang::XInitialization,
f085be
+    com::sun::star::lang::XServiceInfo>
f085be
+{
f085be
+    com::sun::star::uno::Reference<com::sun::star::uno::XComponentContext> mxCxt;
f085be
+
f085be
+public:
f085be
+
f085be
+    HtmlFilterDetect (const com::sun::star::uno::Reference<com::sun::star::uno::XComponentContext>& xCxt);
f085be
+    virtual ~HtmlFilterDetect();
f085be
+
f085be
+    // XExtendedFilterDetection
f085be
+
f085be
+    virtual OUString SAL_CALL detect(com::sun::star::uno::Sequence<com::sun::star::beans::PropertyValue>& lDescriptor)
f085be
+            throw( com::sun::star::uno::RuntimeException, std::exception ) SAL_OVERRIDE;
f085be
+
f085be
+    // XInitialization
f085be
+
f085be
+    virtual void SAL_CALL initialize( const ::com::sun::star::uno::Sequence<com::sun::star::uno::Any>& aArguments)
f085be
+        throw (com::sun::star::uno::Exception, com::sun::star::uno::RuntimeException, std::exception) SAL_OVERRIDE;
f085be
+
f085be
+    // XServiceInfo
f085be
+
f085be
+    virtual OUString SAL_CALL getImplementationName()
f085be
+        throw (com::sun::star::uno::RuntimeException, std::exception) SAL_OVERRIDE;
f085be
+
f085be
+    virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName)
f085be
+        throw (com::sun::star::uno::RuntimeException, std::exception) SAL_OVERRIDE;
f085be
+
f085be
+    virtual com::sun::star::uno::Sequence<OUString> SAL_CALL getSupportedServiceNames()
f085be
+        throw (com::sun::star::uno::RuntimeException, std::exception) SAL_OVERRIDE;
f085be
+};
f085be
+
f085be
+OUString HtmlFilterDetect_getImplementationName();
f085be
+
f085be
+bool HtmlFilterDetect_supportsService(const OUString& ServiceName);
f085be
+
f085be
+com::sun::star::uno::Sequence<OUString> HtmlFilterDetect_getSupportedServiceNames();
f085be
+
f085be
+com::sun::star::uno::Reference<com::sun::star::uno::XInterface>
f085be
+HtmlFilterDetect_createInstance(const com::sun::star::uno::Reference<com::sun::star::uno::XComponentContext>& rCxt);
f085be
+
f085be
+}
f085be
+
f085be
+#endif
f085be
+
f085be
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
f085be
diff --git a/sc/source/filter/html/htmlfilterdetect.cxx b/sc/source/filter/html/htmlfilterdetect.cxx
f085be
new file mode 100644
f085be
index 0000000..f2f3db5
f085be
--- /dev/null
f085be
+++ b/sc/source/filter/html/htmlfilterdetect.cxx
f085be
@@ -0,0 +1,180 @@
f085be
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
f085be
+/*
f085be
+ * This file is part of the LibreOffice project.
f085be
+ *
f085be
+ * This Source Code Form is subject to the terms of the Mozilla Public
f085be
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
f085be
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
f085be
+ */
f085be
+
f085be
+#include "htmlfilterdetect.hxx"
f085be
+
f085be
+#include <svtools/htmltokn.h>
f085be
+#include <ucbhelper/content.hxx>
f085be
+#include <unotools/mediadescriptor.hxx>
f085be
+#include <unotools/ucbstreamhelper.hxx>
f085be
+
f085be
+#include <com/sun/star/lang/XMultiServiceFactory.hpp>
f085be
+#include <com/sun/star/io/XInputStream.hpp>
f085be
+#include <cppuhelper/supportsservice.hxx>
f085be
+#include <boost/scoped_ptr.hpp>
f085be
+
f085be
+#define CALC_HTML_FILTER   "calc_HTML_WebQuery"
f085be
+
f085be
+namespace sc
f085be
+{
f085be
+
f085be
+using namespace ::com::sun::star;
f085be
+using utl::MediaDescriptor;
f085be
+
f085be
+namespace {
f085be
+
f085be
+bool IsHTMLStream( const uno::Reference<io::XInputStream>& xInStream )
f085be
+{
f085be
+    boost::scoped_ptr<SvStream> pInStream( utl::UcbStreamHelper::CreateStream( xInStream ) );
f085be
+    if ( !pInStream || pInStream->GetError() )
f085be
+        // No stream
f085be
+        return false;
f085be
+
f085be
+    // Read the stream header
f085be
+    pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW );
f085be
+    const sal_Size nUniPos = pInStream->Tell();
f085be
+    const sal_uInt16 nSize = 4096;
f085be
+
f085be
+    OString sHeader;
f085be
+    if ( nUniPos == 3 || nUniPos == 0 ) // UTF-8 or non-Unicode
f085be
+        sHeader = read_uInt8s_ToOString( *pInStream, nSize );
f085be
+    else // UTF-16 (nUniPos = 2)
f085be
+        sHeader = OUStringToOString( read_uInt16s_ToOUString( *pInStream, nSize ), RTL_TEXTENCODING_ASCII_US );
f085be
+
f085be
+    // Now check whether the stream begins with a known HTML tag.
f085be
+    enum DetectPhase { BeforeTag, TagOpened, InTagName };
f085be
+    DetectPhase dp = BeforeTag;
f085be
+
f085be
+    const char* pHeader = sHeader.getStr();
f085be
+    const int   nLength = sHeader.getLength();
f085be
+    int i = 0, nStartOfTagIndex = 0;
f085be
+
f085be
+    for ( i = 0; i < nLength; ++i, ++pHeader )
f085be
+    {
f085be
+        char c = *pHeader;
f085be
+        if ( c == ' ' || c == '\n' || c == '\t' || c == '\r' || c == '\f' )
f085be
+        {
f085be
+            if ( dp == TagOpened )
f085be
+                return false; // Invalid: Should start with a tag name
f085be
+            else if ( dp == InTagName )
f085be
+                break; // End of tag name reached
f085be
+        }
f085be
+        else if ( c == '<' )
f085be
+        {
f085be
+            if ( dp == BeforeTag )
f085be
+                dp = TagOpened;
f085be
+            else
f085be
+                return false; // Invalid: Nested '<'
f085be
+        }
f085be
+        else if ( c == '>' )
f085be
+        {
f085be
+            if ( dp == InTagName )
f085be
+                break; // End of tag name reached
f085be
+            else
f085be
+                return false; // Invalid: Empty tag or before '<'
f085be
+        }
f085be
+        else if ( c == '!' )
f085be
+        {
f085be
+            if ( dp == TagOpened )
f085be
+                return true; // "
f085be
+            else
f085be
+                return false; // Invalid: '!' before '<' or inside tag name
f085be
+        }
f085be
+        else
f085be
+        {
f085be
+            if ( dp == BeforeTag )
f085be
+                return false; // Invalid: Should start with a tag
f085be
+            else if ( dp == TagOpened )
f085be
+            {
f085be
+                nStartOfTagIndex = i;
f085be
+                dp = InTagName;
f085be
+            }
f085be
+        }
f085be
+    }
f085be
+
f085be
+    // The string following '<' has to be a known HTML token.
f085be
+    OString aToken = sHeader.copy( nStartOfTagIndex, i - nStartOfTagIndex );
f085be
+    if ( GetHTMLToken( OStringToOUString( aToken.toAsciiLowerCase(), RTL_TEXTENCODING_ASCII_US ) ) != 0 )
f085be
+        return true;
f085be
+
f085be
+    return false;
f085be
+}
f085be
+
f085be
+}
f085be
+
f085be
+HtmlFilterDetect::HtmlFilterDetect(const uno::Reference<uno::XComponentContext>& xCxt) :
f085be
+    mxCxt(xCxt) {}
f085be
+
f085be
+HtmlFilterDetect::~HtmlFilterDetect() {}
f085be
+
f085be
+OUString SAL_CALL HtmlFilterDetect::detect(uno::Sequence<beans::PropertyValue>& lDescriptor) throw (uno::RuntimeException, std::exception)
f085be
+{
f085be
+    MediaDescriptor aMediaDesc(lDescriptor);
f085be
+
f085be
+    OUString aType = aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_TYPENAME(), OUString() );
f085be
+
f085be
+    uno::Reference<io::XInputStream> xInStream(aMediaDesc[MediaDescriptor::PROP_INPUTSTREAM()], uno::UNO_QUERY);
f085be
+    if (!xInStream.is() || !IsHTMLStream(xInStream))
f085be
+        return OUString();
f085be
+
f085be
+    aMediaDesc[MediaDescriptor::PROP_FILTERNAME()] <<= OUString(CALC_HTML_FILTER);
f085be
+
f085be
+    aMediaDesc >> lDescriptor;
f085be
+    return aType;
f085be
+}
f085be
+
f085be
+// XInitialization
f085be
+
f085be
+void SAL_CALL HtmlFilterDetect::initialize(const uno::Sequence<uno::Any>& /*aArguments*/)
f085be
+    throw (uno::Exception, uno::RuntimeException, std::exception)
f085be
+{
f085be
+}
f085be
+
f085be
+OUString HtmlFilterDetect_getImplementationName()
f085be
+{
f085be
+    return OUString("com.sun.star.comp.calc.HtmlFilterDetect");
f085be
+}
f085be
+
f085be
+uno::Sequence<OUString> HtmlFilterDetect_getSupportedServiceNames()
f085be
+{
f085be
+    uno::Sequence<OUString> aRet(2);
f085be
+    OUString* pArray = aRet.getArray();
f085be
+    pArray[0] = "com.sun.star.document.ExtendedTypeDetection";
f085be
+    pArray[1] = "com.sun.star.comp.filters.HtmlFilterDetect";
f085be
+    return aRet;
f085be
+}
f085be
+
f085be
+uno::Reference<uno::XInterface> HtmlFilterDetect_createInstance(
f085be
+    const uno::Reference<uno::XComponentContext> & rCxt)
f085be
+{
f085be
+    return (cppu::OWeakObject*) new HtmlFilterDetect(rCxt);
f085be
+}
f085be
+
f085be
+// XServiceInfo
f085be
+OUString SAL_CALL HtmlFilterDetect::getImplementationName()
f085be
+    throw (uno::RuntimeException, std::exception)
f085be
+{
f085be
+    return HtmlFilterDetect_getImplementationName();
f085be
+}
f085be
+
f085be
+sal_Bool SAL_CALL HtmlFilterDetect::supportsService(const OUString& rServiceName)
f085be
+    throw (uno::RuntimeException, std::exception)
f085be
+{
f085be
+    return cppu::supportsService(this, rServiceName);
f085be
+}
f085be
+
f085be
+uno::Sequence<OUString> SAL_CALL HtmlFilterDetect::getSupportedServiceNames()
f085be
+    throw (uno::RuntimeException, std::exception)
f085be
+{
f085be
+    return HtmlFilterDetect_getSupportedServiceNames();
f085be
+}
f085be
+
f085be
+}
f085be
+
f085be
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
f085be
diff --git a/sc/source/ui/unoobj/detreg.cxx b/sc/source/ui/unoobj/detreg.cxx
f085be
index 6edc743..f840ac1 100644
f085be
--- a/sc/source/ui/unoobj/detreg.cxx
f085be
+++ b/sc/source/ui/unoobj/detreg.cxx
f085be
@@ -18,6 +18,7 @@
f085be
  */
f085be
 
f085be
 
f085be
+#include "htmlfilterdetect.hxx"
f085be
 #include "scdetect.hxx"
f085be
 #include "exceldetect.hxx"
f085be
 #include <cppuhelper/implementationentry.hxx>
f085be
@@ -42,6 +43,14 @@ static const cppu::ImplementationEntry spServices[] =
f085be
         0, 0
f085be
     },
f085be
 
f085be
+    {
f085be
+        sc::HtmlFilterDetect_createInstance,
f085be
+        sc::HtmlFilterDetect_getImplementationName,
f085be
+        sc::HtmlFilterDetect_getSupportedServiceNames,
f085be
+        cppu::createSingleComponentFactory,
f085be
+        0, 0
f085be
+    },
f085be
+
f085be
     { 0, 0, 0, 0, 0, 0 }
f085be
 };
f085be
 
f085be
diff --git a/sc/util/scd.component b/sc/util/scd.component
f085be
index 767429a..76ed959 100644
f085be
--- a/sc/util/scd.component
f085be
+++ b/sc/util/scd.component
f085be
@@ -25,4 +25,7 @@
f085be
   <implementation name="com.sun.star.comp.calc.ExcelBiffFormatDetector">
f085be
     <service name="com.sun.star.frame.ExtendedTypeDetection"/>
f085be
   </implementation>
f085be
+  <implementation name="com.sun.star.comp.calc.HtmlFilterDetect">
f085be
+    <service name="com.sun.star.frame.ExtendedTypeDetection"/>
f085be
+  </implementation>
f085be
 </component>
f085be
-- 
f085be
1.9.3
f085be