with Google Cloud Document AI and PostgreSQL?
Emit new event when a new column is added to a table. See the documentation
Emit new event when a row is added or modified. See the documentation
Emit new event when a new row is added to a table. See the documentation
Emit new event when new rows are returned from a custom query that you provide. See the documentation
Emit new event when a new table is added to the database. See the documentation
Finds a row in a table via a custom query. See the documentation
import { DocumentProcessorServiceClient } from '@google-cloud/documentai/build/src/v1/index.js';
import { promises as fs } from 'fs';
import { get } from 'https';
import { writeFile } from 'fs/promises';
import { join } from 'path';
export default defineComponent({
props: {
google_cloud_document_ai: {
type: "app",
app: "google_cloud_document_ai",
}
},
async run({ steps, $ }) {
//Sample pdf file to process by Google Document AI API
const url = 'https://www.learningcontainer.com/wp-content/uploads/2019/09/sample-pdf-file.pdf';
const filePath = join('/tmp', 'my_document.pdf');
const downloadFile = async () => {
const res = await new Promise((resolve) => get(url, resolve));
const chunks = [];
for await (const chunk of res) {
chunks.push(chunk);
}
await writeFile(filePath, Buffer.concat(chunks));
console.log(`File downloaded successfully to ${filePath}`);
};
await downloadFile();
const projectId = this.google_cloud_document_ai.$auth.project_id;
const location = this.google_cloud_document_ai.$auth.location;
const processorId = this.google_cloud_document_ai.$auth.processor_id;
// Instantiates a client
// apiEndpoint regions available: eu-documentai.googleapis.com, us-documentai.googleapis.com (Required if using eu based processor)
// const client = new DocumentProcessorServiceClient({apiEndpoint: 'eu-documentai.googleapis.com'});
const client = new DocumentProcessorServiceClient();
async function testRequest() {
// The full resource name of the processor, e.g.:
// projects/project-id/locations/location/processor/processor-id
// You must create new processors in the Cloud Console first
const name = `projects/${projectId}/locations/${location}/processors/${processorId}`;
// Read the file into memory.
const imageFile = await fs.readFile(filePath);
// Convert the image data to a Buffer and base64 encode it.
const encodedImage = Buffer.from(imageFile).toString('base64');
const request = {
name,
rawDocument: {
content: encodedImage,
mimeType: 'application/pdf',
},
};
// Recognizes text entities in the PDF document
const [result] = await client.processDocument(request);
const { document } = result;
// Get all of the document text as one big string
const { text } = document;
// Extract shards from the text field
const getText = textAnchor => {
if (!textAnchor.textSegments || textAnchor.textSegments.length === 0) {
return '';
}
// First shard in document doesn't have startIndex property
const startIndex = textAnchor.textSegments[0].startIndex || 0;
const endIndex = textAnchor.textSegments[0].endIndex;
return text.substring(startIndex, endIndex);
};
// Read the text recognition output from the processor
const [page1] = document.pages;
const { paragraphs } = page1;
let concatenatedText = "";
for (const paragraph of paragraphs) {
const paragraphText = getText(paragraph.layout.textAnchor);
concatenatedText += paragraphText;
}
return concatenatedText;
}
return await testRequest();
}
})
On Pipedream, you can leverage the PostgreSQL app to create workflows that automate database operations, synchronize data across platforms, and react to database events in real-time. Think handling new row entries, updating records from webhooks, or even compiling reports on a set schedule. Pipedream's serverless platform provides a powerful way to connect PostgreSQL with a variety of apps, enabling you to create tailored automation that fits your specific needs.
import postgresql from "@pipedream/postgresql"
export default defineComponent({
props: {
postgresql,
},
async run({ steps, $ }) {
// Component source code:
// https://github.com/PipedreamHQ/pipedream/tree/master/components/postgresql
const queryObj = {
text: "SELECT NOW()",
values: [], // Ignored since query does not contain placeholders
};
return await this.postgresql.executeQuery(queryObj);
},
})