Ich sehe, dass eine ähnliche Frage gestellt wurde (Ein Rechteck muss eine nicht negative Höhe haben), aber ich weiß nicht, wie ich mit der Antwort fortfahren soll ... Außerdem sieht es etwas anders aus. Wie auch immer:
Ich habe den Code, den ich hier gefunden habe (https://github.com/jorisschellekens/bor ... xpressions), in C geändert :\Benutzer\Erik\Desktop\Strings_uit_pdf\stringsuitpdf.py. Ich habe auch ein PDF: C:\Users\Erik\Desktop\Strings_uit_pdf\Jim.pdf. Es ist ziemlich groß: https://drive.google.com/file/d/19ykr26 ... sp=sharing.
Dies ist der Code:
#!chapter_005/src/snippet_006.py
import typing
from borb.pdf import Document
from borb.pdf import PDF
from borb.toolkit import RegularExpressionTextExtraction
teonderzoekenpdf = "Jim.pdf"
zoekstring = 'Cited by'
def main():
# read the Document
# fmt: off
print("Stap 2")
doc: typing.Optional[Document] = None
l: RegularExpressionTextExtraction = RegularExpressionTextExtraction(zoekstring)
with open(teonderzoekenpdf, "rb") as in_file_handle:
print("Stap 3")
doc = PDF.loads(in_file_handle, [l])
print("Stap 3,5")
# fmt: on
# check whether we have read a Document
print("Stap 4")
assert doc is not None
print("Stap 5")
# print matching groups
for i, m in enumerate(l.get_matches()[0]):
print("Stap 6")
print("%d %s" % (i, m.group(0)))
#for r in m.get_bounding_boxes():
#print(
# "\t%f %f %f %f" % (r.get_x(), r.get_y(), r.get_width(), r.get_height())
#)
if __name__ == "__main__":
print("Stap 1")
main()
Dies ist die Ausgabe:
PS C:\Users\Erik\Desktop\Strings_uit_pdf> c:; cd 'c:\Users\Erik\Desktop\Strings_uit_pdf'; & 'c:\Python312\python.exe' 'c:\Users\Erik\.vscode\extensions\ms-python.debugpy-2024.14.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher' '51988' '--' 'c:\Users\Erik\Desktop\Strings_uit_pdf\stringsuitpdf.py'
Stap 1
Stap 2
Stap 3
Traceback (most recent call last):
File "c:\Users\Erik\Desktop\Strings_uit_pdf\stringsuitpdf.py", line 56, in
main()
File "c:\Users\Erik\Desktop\Strings_uit_pdf\stringsuitpdf.py", line 29, in main
doc = PDF.loads(in_file_handle, [l])
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\pdf\pdf.py", line 85, in loads
document: Document = ReadAnyObjectTransformer().transform(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\any_object_transformer.py", line 113, in transform
return super().transform(
^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\transformer.py", line 149, in transform
out = h.transform(
^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\reference\xref_transformer.py", line 305, in transform
trailer = self.get_root_transformer().transform(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\any_object_transformer.py", line 113, in transform
return super().transform(
^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\transformer.py", line 149, in transform
out = h.transform(
^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\object\dictionary_transformer.py", line 69, in transform
v = self.get_root_transformer().transform(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\any_object_transformer.py", line 113, in transform
return super().transform(
^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\transformer.py", line 149, in transform
out = h.transform(
^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\reference\reference_transformer.py", line 169, in transform
transformed_referenced_object = self.get_root_transformer().transform(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\any_object_transformer.py", line 113, in transform
return super().transform(
^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\transformer.py", line 149, in transform
out = h.transform(
^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\page\root_dictionary_transformer.py", line 112, in transform
transformed_root_dictionary = t.transform(
^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\object\dictionary_transformer.py", line 69, in transform
v = self.get_root_transformer().transform(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\any_object_transformer.py", line 113, in transform
return super().transform(
^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\transformer.py", line 149, in transform
out = h.transform(
^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\reference\reference_transformer.py", line 169, in transform
transformed_referenced_object = self.get_root_transformer().transform(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\any_object_transformer.py", line 113, in transform
return super().transform(
^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\transformer.py", line 149, in transform
out = h.transform(
^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\object\dictionary_transformer.py", line 69, in transform
v = self.get_root_transformer().transform(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\any_object_transformer.py", line 113, in transform
return super().transform(
^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\transformer.py", line 149, in transform
out = h.transform(
^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\object\array_transformer.py", line 69, in transform
object_to_transform = self.get_root_transformer().transform(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\any_object_transformer.py", line 113, in transform
return super().transform(
^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\transformer.py", line 149, in transform
out = h.transform(
^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\reference\reference_transformer.py", line 169, in transform
transformed_referenced_object = self.get_root_transformer().transform(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\any_object_transformer.py", line 113, in transform
return super().transform(
^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\transformer.py", line 149, in transform
out = h.transform(
^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\io\read\page\page_dictionary_transformer.py", line 129, in transform
CanvasStreamProcessor(page_out, canvas, []).read(
File "c:\Python312\Lib\site-packages\borb\pdf\canvas\canvas_stream_processor.py", line 277, in read
raise e
File "c:\Python312\Lib\site-packages\borb\pdf\canvas\canvas_stream_processor.py", line 271, in read
operator.invoke(self, operands, event_listeners)
File "c:\Python312\Lib\site-packages\borb\pdf\canvas\operator\text\show_text_with_glyph_positioning.py", line 84, in invoke
l._event_occurred(tri)
File "c:\Python312\Lib\site-packages\borb\toolkit\text\regular_expression_text_extraction.py", line 326, in _event_occurred
self._render_text(event)
File "c:\Python312\Lib\site-packages\borb\toolkit\text\regular_expression_text_extraction.py", line 338, in _render_text
for e in text_render_info.split_on_glyphs():
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\pdf\canvas\event\chunk_of_text_render_event.py", line 177, in split_on_glyphs
e._baseline_bounding_box = Rectangle(
^^^^^^^^^^
File "c:\Python312\Lib\site-packages\borb\pdf\canvas\geometry\rectangle.py", line 30, in __init__
assert width >= 0, "A Rectangle must have a non-negative width."
^^^^^^^^^^
AssertionError: A Rectangle must have a non-negative width.
Ich suche nur nach Text, daher verstehe ich nicht, was das Rechteck mit irgendetwas zu tun hat ... Aber es gibt einige Kästchen im Dokument, also ist das vielleicht der Ort Fehler kommt von?
Wie auch immer, ich würde gerne eine Liste der Erwähnungen meines Teilstrings und möglicherweise eine Seitenzahl jeder Erwähnung abrufen können.
Wenn jemand weiß, was ich tun könnte, damit das funktioniert, dann wäre das so Großartig!
Vielen Dank im Voraus!
Borb RegularExpressionTextExtraction „AssertionError: Ein Rechteck muss eine nicht negative Breite haben.“ ⇐ Python
-
- Similar Topics
- Replies
- Views
- Last post