I have the following PDF https://www.dco.uscg.mil/Portals/9/NMC/pdfs/forms/CG_719B.pdf
I have tried a number of different ways to find access to the text boxes within code,
async function fillAllFields() {
const file = document.getElementById('pdf-upload').files[0];
if (!file) return alert("Please upload a PDF");
const arrayBuffer = await file.arrayBuffer();
const pdfDoc = await PDFLib.PDFDocument.load(arrayBuffer);
const form = pdfDoc.getForm();
const fields = form.getFields();
fields.forEach(field => {
const name = field.getName();
try {
form.getTextField(name).setText(name);
const widgets = field.acroField.dict.get('Kids') || [field.acroField];
widgets.forEach(widget => {
const rect = widget.get('Rect');
if (rect) {
const [x1, y1, x2, y2] = rect.map(n => n.number);
console.log(`Field "${name}" at [${x1}, ${y1}, ${x2}, ${y2}]`);
}
});
} catch (e) {
console.log(`Skipping non-text field: ${name}`);
}
});
const pdfBytes = await pdfDoc.save();
const blob = new Blob([pdfBytes], { type: "application/pdf" });
const link = document.createElement("a");
link.href = URL.createObjectURL(blob);
link.download = "filled_with_names.pdf";
link.click();
}
However this does not give me access to the text boxes, I have tried to change it up and add text above such as {{First_name}} in the hopes that I could access this piece of text and change it however when I use PDFPlumber to extract the text it does not return it
import pdfplumber
with pdfplumber.open("CG_719B_filled.pdf") as pdf:
for page in pdf.pages:
if page.page_number == 2:
print(page.extract_text)
print(page.extract_text())
So now I am checking for any kind of arcoform and it does not seem to have one.
import pdfplumber
from pdfplumber.utils.pdfinternals import resolve_and_decode, resolve
pdf = pdfplumber.open("CG_719B_filled.pdf")
def parse_field_helper(form_data, field, prefix=None):
"""appends any PDF AcroForm field/value pairs in `field` to provided `form_data` list
if `field` has child fields, those will be parsed recursively.
"""
resolved_field = field.resolve()
field_name = ".".join(
filter(lambda x: x, [prefix, resolve_and_decode(resolved_field.get("T"))])
)
if "Kids" in resolved_field:
for kid_field in resolved_field["Kids"]:
parse_field_helper(form_data, kid_field, prefix=field_name)
if "T" in resolved_field or "TU" in resolved_field:
# "T" is a field-name, but it's sometimes absent.
# "TU" is the "alternate field name" and is often more human-readable
# your PDF may have one, the other, or both.
alternate_field_name = (
resolve_and_decode(resolved_field.get("TU"))
if resolved_field.get("TU")
else None
)
field_value = (
resolve_and_decode(resolved_field["V"]) if "V" in resolved_field else None
)
form_data.append([field_name, alternate_field_name, field_value])
form_data = []
# Check if the PDF has an AcroForm (interactive form fields)
if "AcroForm" in pdf.doc.catalog:
acro_form = resolve(pdf.doc.catalog["AcroForm"])
if "Fields" in acro_form:
fields = resolve(acro_form["Fields"])
for field in fields:
parse_field_helper(form_data, field)
print(form_data)
else:
print("PDF has AcroForm but no Fields")
else:
print("PDF does not contain an AcroForm (no interactive form fields)")
pdf.close()
PDF does not contain an AcroForm (no interactive form fields) :(
Why did I think this was gonna be so easy to populate a PDF form, I'm at a loss of what path to take, I'm almost tempted to remake the total form in something that can be quickly filled with variables that can be replaced.
I would appreciate if someone could explain what exactly the issue is and how I could perhaps resolve it either convert this to an Acroform with fields and then a simple way to reference and add the data or a way to recreate this form that be filled in via code.
