diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index e30e467..c20aa75 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -522,6 +522,18 @@ class QPDF }; friend class Resolver; + // Warner class allows QPDFObjectHandle to create warnings + class Warner + { + friend class QPDFObjectHandle; + private: + static void warn(QPDF* qpdf, QPDFExc const& e) + { + qpdf->warn(e); + } + }; + friend class Warner; + // Pipe class is restricted to QPDF_Stream class Pipe { diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 8140005..e5cabd0 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -28,6 +28,7 @@ class QPDF; class QPDF_Dictionary; class QPDF_Array; class QPDFTokenizer; +class QPDFExc; class QPDFObjectHandle { @@ -623,6 +624,9 @@ class QPDFObjectHandle static void parseContentStream_internal( QPDFObjectHandle stream, ParserCallbacks* callbacks); + // Other methods + static void warn(QPDF*, QPDFExc const&); + bool initialized; QPDF* qpdf; // 0 for direct object diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh index cbdc065..9acea97 100644 --- a/include/qpdf/QUtil.hh +++ b/include/qpdf/QUtil.hh @@ -125,6 +125,9 @@ namespace QUtil QPDF_DLL void initializeWithRandomBytes(unsigned char* data, size_t len); + + QPDF_DLL + bool is_space(char); }; #endif // __QUTIL_HH__ diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index cae7a7c..245f301 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1084,6 +1084,11 @@ QPDF::readObject(PointerHolder input, else { QTC::TC("qpdf", "QPDF stream without newline"); + if (! QUtil::is_space(ch)) + { + QTC::TC("qpdf", "QPDF stream with non-space"); + input->unreadCh(ch); + } warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), this->last_object_description, input->tell(), @@ -1274,9 +1280,9 @@ QPDF::recoverStreamLength(PointerHolder input, if (length == 0) { - throw QPDFExc(qpdf_e_damaged_pdf, input->getName(), + warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), this->last_object_description, stream_offset, - "unable to recover stream data"); + "unable to recover stream data; treating stream as empty")); } QTC::TC("qpdf", "QPDF recovered stream length"); @@ -1459,31 +1465,41 @@ QPDF::resolve(int objid, int generation) } QPDFXRefEntry const& entry = this->xref_table[og]; - switch (entry.getType()) - { - case 1: - { - qpdf_offset_t offset = entry.getOffset(); - // Object stored in cache by readObjectAtOffset - int aobjid; - int ageneration; - QPDFObjectHandle oh = - readObjectAtOffset(true, offset, "", objid, generation, - aobjid, ageneration); - } - break; + try + { + switch (entry.getType()) + { + case 1: + { + qpdf_offset_t offset = entry.getOffset(); + // Object stored in cache by readObjectAtOffset + int aobjid; + int ageneration; + QPDFObjectHandle oh = + readObjectAtOffset(true, offset, "", objid, generation, + aobjid, ageneration); + } + break; - case 2: - resolveObjectsInStream(entry.getObjStreamNumber()); - break; + case 2: + resolveObjectsInStream(entry.getObjStreamNumber()); + break; - default: - throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0, - "object " + - QUtil::int_to_string(objid) + "/" + - QUtil::int_to_string(generation) + - " has unexpected xref entry type"); - } + default: + throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0, + "object " + + QUtil::int_to_string(objid) + "/" + + QUtil::int_to_string(generation) + + " has unexpected xref entry type"); + } + } + catch (QPDFExc& e) + { + warn(e); + QPDFObjectHandle oh = QPDFObjectHandle::newNull(); + this->obj_cache[og] = + ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1); + } } return this->obj_cache[og].object; diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index eec4fae..55f8099 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1533,3 +1533,20 @@ QPDFObjectHandle::dereference() } } } + +void +QPDFObjectHandle::warn(QPDF* qpdf, QPDFExc const& e) +{ + // If parsing on behalf of a QPDF object and want to give a + // warning, we can warn through the object. If parsing for some + // other reason, such as an explicit creation of an object from a + // string, then just throw the exception. + if (qpdf) + { + QPDF::Warner::warn(qpdf, e); + } + else + { + throw e; + } +} diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index 91b1682..8ed6afc 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #include @@ -16,10 +17,6 @@ static bool is_hex_digit(char ch) { return (strchr("0123456789abcdefABCDEF", ch) != 0); } -static bool is_space(char ch) -{ - return (strchr(" \f\n\r\t\v", ch) != 0); -} QPDFTokenizer::QPDFTokenizer() : pound_special_in_name(true), @@ -511,7 +508,7 @@ QPDFTokenizer::readToken(PointerHolder input, } else { - if (is_space(static_cast(ch)) && + if (QUtil::is_space(static_cast(ch)) && (input->getLastOffset() == offset)) { ++offset; diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index cf45506..5f9c608 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -490,3 +490,9 @@ QUtil::srandom(unsigned int seed) srand(seed); #endif } + +bool +QUtil::is_space(char ch) +{ + return (strchr(" \f\n\r\t\v", ch) != 0); +} diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index e78a4f4..85ddef3 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -268,3 +268,4 @@ QPDF xref space 2 qpdf pages range omitted at end 0 qpdf pages range omitted in middle 0 qpdf npages 0 +QPDF stream with non-space 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 3b7ede5..354445b 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -435,7 +435,7 @@ $td->runtest("EOF terminating literal tokens", $td->NORMALIZE_NEWLINES); $td->runtest("EOF reading token", {$td->COMMAND => "qpdf --check eof-reading-token.pdf"}, - {$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 2}, + {$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("extra header text", {$td->COMMAND => "test_driver 32 minimal.pdf"}, @@ -739,7 +739,9 @@ $n_tests += @badfiles + 5; # neither Acrobat nor other PDF viewers really care. Tests 12 and 28 # have error conditions that used to be fatal but are now considered # non-fatal. -my %badtest_overrides = (6 => 0, 12 => 0, 28 => 0, 31 => 0); +my %badtest_overrides = (6 => 0, 12 => 0, 22 => 0, 23 => 0, 24 => 0, + 25 => 0, 26 => 0, 27 => 0, 28 => 0, 31 => 0, + 32 => 0, 34 => 0); for (my $i = 1; $i <= scalar(@badfiles); ++$i) { my $status = $badtest_overrides{$i}; @@ -788,7 +790,7 @@ $n_tests += @badfiles + 8; # though in some cases it may. Acrobat Reader would not be able to # recover any of these files any better. my %recover_failures = (); -for (1, 7, 13..21, 24, 29..30, 33, 35) +for (1, 7, 13..21, 29..30, 33, 35) { $recover_failures{$_} = 1; } diff --git a/qpdf/qtest/qpdf/bad22.out b/qpdf/qtest/qpdf/bad22.out index ec6d5f8..73d081e 100644 --- a/qpdf/qtest/qpdf/bad22.out +++ b/qpdf/qtest/qpdf/bad22.out @@ -1 +1,7 @@ -bad22.pdf (object 4 0, file position 314): stream dictionary lacks /Length key +WARNING: bad22.pdf (object 4 0, file position 314): stream dictionary lacks /Length key +/QTest is implicit +/QTest is indirect and has type null (2) +/QTest is null +unparse: 4 0 R +unparseResolved: null +test 0 done diff --git a/qpdf/qtest/qpdf/bad23.out b/qpdf/qtest/qpdf/bad23.out index b4cf25e..9bc1bdc 100644 --- a/qpdf/qtest/qpdf/bad23.out +++ b/qpdf/qtest/qpdf/bad23.out @@ -1 +1,7 @@ -bad23.pdf (object 4 0, file position 314): /Length key in stream dictionary is not an integer +WARNING: bad23.pdf (object 4 0, file position 314): /Length key in stream dictionary is not an integer +/QTest is implicit +/QTest is indirect and has type null (2) +/QTest is null +unparse: 4 0 R +unparseResolved: null +test 0 done diff --git a/qpdf/qtest/qpdf/bad24-recover.out b/qpdf/qtest/qpdf/bad24-recover.out index 0af01e9..d1bbe73 100644 --- a/qpdf/qtest/qpdf/bad24-recover.out +++ b/qpdf/qtest/qpdf/bad24-recover.out @@ -1,2 +1,9 @@ WARNING: bad24.pdf (object 4 0, file position 341): attempting to recover stream length -bad24.pdf (object 4 0, file position 341): unable to recover stream data +WARNING: bad24.pdf (object 4 0, file position 341): unable to recover stream data; treating stream as empty +WARNING: bad24.pdf (object 4 0, file position 778): EOF while reading token +/QTest is implicit +/QTest is indirect and has type null (2) +/QTest is null +unparse: 4 0 R +unparseResolved: null +test 1 done diff --git a/qpdf/qtest/qpdf/bad24.out b/qpdf/qtest/qpdf/bad24.out index f503214..0a4dc8e 100644 --- a/qpdf/qtest/qpdf/bad24.out +++ b/qpdf/qtest/qpdf/bad24.out @@ -1 +1,7 @@ -bad24.pdf (object 4 0, file position 385): expected endstream +WARNING: bad24.pdf (object 4 0, file position 385): expected endstream +/QTest is implicit +/QTest is indirect and has type null (2) +/QTest is null +unparse: 4 0 R +unparseResolved: null +test 0 done diff --git a/qpdf/qtest/qpdf/bad25.out b/qpdf/qtest/qpdf/bad25.out index f336b34..d6b13e4 100644 --- a/qpdf/qtest/qpdf/bad25.out +++ b/qpdf/qtest/qpdf/bad25.out @@ -1 +1,7 @@ -bad25.pdf (object 4 0, file position 307): expected n n obj +WARNING: bad25.pdf (object 4 0, file position 307): expected n n obj +/QTest is implicit +/QTest is indirect and has type null (2) +/QTest is null +unparse: 4 0 R +unparseResolved: null +test 0 done diff --git a/qpdf/qtest/qpdf/bad26.out b/qpdf/qtest/qpdf/bad26.out index 30c3b72..48263e5 100644 --- a/qpdf/qtest/qpdf/bad26.out +++ b/qpdf/qtest/qpdf/bad26.out @@ -1 +1,7 @@ -bad26.pdf (object 4 0, file position 307): expected n n obj +WARNING: bad26.pdf (object 4 0, file position 307): expected n n obj +/QTest is implicit +/QTest is indirect and has type null (2) +/QTest is null +unparse: 4 0 R +unparseResolved: null +test 0 done diff --git a/qpdf/qtest/qpdf/bad27.out b/qpdf/qtest/qpdf/bad27.out index 2c494e4..4f38cca 100644 --- a/qpdf/qtest/qpdf/bad27.out +++ b/qpdf/qtest/qpdf/bad27.out @@ -1 +1,7 @@ -bad27.pdf (object 4 0, file position 307): expected n n obj +WARNING: bad27.pdf (object 4 0, file position 307): expected n n obj +/QTest is implicit +/QTest is indirect and has type null (2) +/QTest is null +unparse: 4 0 R +unparseResolved: null +test 0 done diff --git a/qpdf/qtest/qpdf/bad32.out b/qpdf/qtest/qpdf/bad32.out index 60727cc..9b37770 100644 --- a/qpdf/qtest/qpdf/bad32.out +++ b/qpdf/qtest/qpdf/bad32.out @@ -1 +1,7 @@ -bad32.pdf (object 4 0, file position 307): expected 4 0 obj +WARNING: bad32.pdf (object 4 0, file position 307): expected 4 0 obj +/QTest is implicit +/QTest is indirect and has type null (2) +/QTest is null +unparse: 4 0 R +unparseResolved: null +test 0 done diff --git a/qpdf/qtest/qpdf/bad34.out b/qpdf/qtest/qpdf/bad34.out index ee65e14..7ba7916 100644 --- a/qpdf/qtest/qpdf/bad34.out +++ b/qpdf/qtest/qpdf/bad34.out @@ -1 +1,7 @@ -bad34.pdf (object 4 0, file position 322): expected n n obj +WARNING: bad34.pdf (object 4 0, file position 322): expected n n obj +/QTest is implicit +/QTest is indirect and has type null (2) +/QTest is null +unparse: 4 0 R +unparseResolved: null +test 0 done diff --git a/qpdf/qtest/qpdf/bad35-recover.out b/qpdf/qtest/qpdf/bad35-recover.out index add1666..db7ff5d 100644 --- a/qpdf/qtest/qpdf/bad35-recover.out +++ b/qpdf/qtest/qpdf/bad35-recover.out @@ -1 +1,2 @@ -bad35.pdf (object 1 0, file position 521): supposed object stream 1 has wrong type +WARNING: bad35.pdf (object 1 0, file position 521): supposed object stream 1 has wrong type +operation for Dictionary object attempted on object of wrong type diff --git a/qpdf/qtest/qpdf/bad35.out b/qpdf/qtest/qpdf/bad35.out index add1666..db7ff5d 100644 --- a/qpdf/qtest/qpdf/bad35.out +++ b/qpdf/qtest/qpdf/bad35.out @@ -1 +1,2 @@ -bad35.pdf (object 1 0, file position 521): supposed object stream 1 has wrong type +WARNING: bad35.pdf (object 1 0, file position 521): supposed object stream 1 has wrong type +operation for Dictionary object attempted on object of wrong type diff --git a/qpdf/qtest/qpdf/eof-reading-token.out b/qpdf/qtest/qpdf/eof-reading-token.out index 58e5b09..fefc516 100644 --- a/qpdf/qtest/qpdf/eof-reading-token.out +++ b/qpdf/qtest/qpdf/eof-reading-token.out @@ -2,4 +2,4 @@ checking eof-reading-token.pdf PDF Version: 1.3 File is not encrypted File is not linearized -object stream 12 (file position 5): EOF while reading token +WARNING: object stream 12 (file position 5): EOF while reading token diff --git a/qpdf/qtest/qpdf/stream-line-enders.out b/qpdf/qtest/qpdf/stream-line-enders.out index b7a7513..1932771 100644 --- a/qpdf/qtest/qpdf/stream-line-enders.out +++ b/qpdf/qtest/qpdf/stream-line-enders.out @@ -1,3 +1,4 @@ -WARNING: stream-line-enders.pdf (object 5 0, file position 378): stream keyword followed by carriage return only -WARNING: stream-line-enders.pdf (object 6 0, file position 437): stream keyword not followed by proper line terminator +WARNING: stream-line-enders.pdf (object 5 0, file position 384): stream keyword followed by carriage return only +WARNING: stream-line-enders.pdf (object 6 0, file position 443): stream keyword not followed by proper line terminator +WARNING: stream-line-enders.pdf (object 7 0, file position 503): stream keyword not followed by proper line terminator qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/stream-line-enders.pdf b/qpdf/qtest/qpdf/stream-line-enders.pdf index e623888..2e05caa 100644 --- a/qpdf/qtest/qpdf/stream-line-enders.pdf +++ b/qpdf/qtest/qpdf/stream-line-enders.pdf @@ -7,7 +7,7 @@ endobj << /Count 1 /Kids [ 3 0 R ] /Type /Pages >> endobj 3 0 obj -<< /Contents [ 4 0 R 5 0 R 6 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 7 0 R >> /ProcSet 8 0 R >> /Type /Page >> +<< /Contents [ 4 0 R 5 0 R 6 0 R 7 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 8 0 R >> /ProcSet 9 0 R >> /Type /Page >> endobj 4 0 obj << /Length 14 >> @@ -22,29 +22,35 @@ stream 72 720 Td endstream endobj 6 0 obj -<< /Length 15 >> +<< /Length 12 >> stream (Potato) Tj -ET endstream endobj 7 0 obj -<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +<< /Length 11 >> +stream%comment +ET +endstream endobj 8 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +9 0 obj [ /PDF /Text ] endobj xref -0 9 +0 10 0000000000 65535 f 0000000015 00000 n 0000000064 00000 n 0000000123 00000 n -0000000282 00000 n -0000000346 00000 n -0000000405 00000 n -0000000469 00000 n -0000000576 00000 n -trailer << /Root 1 0 R /Size 9 /ID [<08aa98c73f8a7262d77c8328772c3989><7b1f32865e2165debe277f27ee790092>] >> +0000000288 00000 n +0000000352 00000 n +0000000411 00000 n +0000000472 00000 n +0000000532 00000 n +0000000639 00000 n +trailer << /Root 1 0 R /Size 10 /ID [<08aa98c73f8a7262d77c8328772c3989><5a35fde138b2e8251b9e079b560c0253>] >> startxref -606 +669 %%EOF diff --git a/qpdf/qtest/qpdf/stream-line-enders.qdf b/qpdf/qtest/qpdf/stream-line-enders.qdf index 3353602..aa14901 100644 --- a/qpdf/qtest/qpdf/stream-line-enders.qdf +++ b/qpdf/qtest/qpdf/stream-line-enders.qdf @@ -29,6 +29,7 @@ endobj 4 0 R 6 0 R 8 0 R + 10 0 R ] /MediaBox [ 0 @@ -39,9 +40,9 @@ endobj /Parent 2 0 R /Resources << /Font << - /F1 10 0 R + /F1 12 0 R >> - /ProcSet 11 0 R + /ProcSet 13 0 R >> /Type /Page >> @@ -86,16 +87,31 @@ endobj >> stream (Potato) Tj -ET endstream endobj 9 0 obj -15 +12 endobj +%% Contents for page 1 %% Original object ID: 7 0 10 0 obj +<< + /Length 11 0 R +>> +stream +%comment +ET +endstream +endobj + +11 0 obj +12 +endobj + +%% Original object ID: 8 0 +12 0 obj << /BaseFont /Helvetica /Encoding /WinAnsiEncoding @@ -105,8 +121,8 @@ endobj >> endobj -%% Original object ID: 8 0 -11 0 obj +%% Original object ID: 9 0 +13 0 obj [ /PDF /Text @@ -114,24 +130,26 @@ endobj endobj xref -0 12 +0 14 0000000000 65535 f 0000000052 00000 n 0000000133 00000 n 0000000242 00000 n -0000000516 00000 n -0000000585 00000 n -0000000654 00000 n -0000000719 00000 n -0000000788 00000 n -0000000858 00000 n -0000000904 00000 n -0000001050 00000 n +0000000527 00000 n +0000000596 00000 n +0000000665 00000 n +0000000730 00000 n +0000000799 00000 n +0000000866 00000 n +0000000935 00000 n +0000001004 00000 n +0000001051 00000 n +0000001197 00000 n trailer << /Root 1 0 R - /Size 12 + /Size 14 /ID [<08aa98c73f8a7262d77c8328772c3989><31415926535897932384626433832795>] >> startxref -1086 +1233 %%EOF