with Google Cloud Document AI and Snowflake?
import { DocumentProcessorServiceClient } from '@google-cloud/documentai/build/src/v1/index.js';
import { promises as fs } from 'fs';
import { get } from 'https';
import { writeFile } from 'fs/promises';
import { join } from 'path';
export default defineComponent({
props: {
google_cloud_document_ai: {
type: "app",
app: "google_cloud_document_ai",
}
},
async run({ steps, $ }) {
//Sample pdf file to process by Google Document AI API
const url = 'https://www.learningcontainer.com/wp-content/uploads/2019/09/sample-pdf-file.pdf';
const filePath = join('/tmp', 'my_document.pdf');
const downloadFile = async () => {
const res = await new Promise((resolve) => get(url, resolve));
const chunks = [];
for await (const chunk of res) {
chunks.push(chunk);
}
await writeFile(filePath, Buffer.concat(chunks));
console.log(`File downloaded successfully to ${filePath}`);
};
await downloadFile();
const projectId = this.google_cloud_document_ai.$auth.project_id;
const location = this.google_cloud_document_ai.$auth.location;
const processorId = this.google_cloud_document_ai.$auth.processor_id;
// Instantiates a client
// apiEndpoint regions available: eu-documentai.googleapis.com, us-documentai.googleapis.com (Required if using eu based processor)
// const client = new DocumentProcessorServiceClient({apiEndpoint: 'eu-documentai.googleapis.com'});
const client = new DocumentProcessorServiceClient();
async function testRequest() {
// The full resource name of the processor, e.g.:
// projects/project-id/locations/location/processor/processor-id
// You must create new processors in the Cloud Console first
const name = `projects/${projectId}/locations/${location}/processors/${processorId}`;
// Read the file into memory.
const imageFile = await fs.readFile(filePath);
// Convert the image data to a Buffer and base64 encode it.
const encodedImage = Buffer.from(imageFile).toString('base64');
const request = {
name,
rawDocument: {
content: encodedImage,
mimeType: 'application/pdf',
},
};
// Recognizes text entities in the PDF document
const [result] = await client.processDocument(request);
const { document } = result;
// Get all of the document text as one big string
const { text } = document;
// Extract shards from the text field
const getText = textAnchor => {
if (!textAnchor.textSegments || textAnchor.textSegments.length === 0) {
return '';
}
// First shard in document doesn't have startIndex property
const startIndex = textAnchor.textSegments[0].startIndex || 0;
const endIndex = textAnchor.textSegments[0].endIndex;
return text.substring(startIndex, endIndex);
};
// Read the text recognition output from the processor
const [page1] = document.pages;
const { paragraphs } = page1;
let concatenatedText = "";
for (const paragraph of paragraphs) {
const paragraphText = getText(paragraph.layout.textAnchor);
concatenatedText += paragraphText;
}
return concatenatedText;
}
return await testRequest();
}
})
Snowflake offers a cloud database and related tools to help developers create robust, secure, and scalable data warehouses. See Snowflake's Key Concepts & Architecture
Snowflake recommends you create a new user, role, and warehouse when you integrate a third-party tool like Pipedream. This way, you can control permissions via the user / role, and separate Pipedream compute and costs with the warehouse. You can do this directly in the Snowflake UI
We recommend you create a read-only account if you only need to query Snowflake. If you need to insert data into Snowflake, add permissions on the appropriate objects after you create your user.
Visit https://pipedream.com/accounts. Click the button to Connect an App. Enter the required Snowflake account data.
You'll only need to connect your account once in Pipedream. You can connect this account to multiple workflows to run queries against Snowflake, insert data, and more.
Visit https://pipedream.com/new to build your first workflow. Pipedream workflows let you connect Snowflake with 1,000+ other apps. You can trigger workflows on Snowflake queries, sending results to Slack, Google Sheets, or any app that exposes an API. Or you can accept data from another app, transform it with Python, Node.js, Go or Bash code, and insert it into Snowflake.
Learn more at Pipedream University
import snowflake from '@pipedream/snowflake';
export default defineComponent({
props: {
snowflake,
},
async run({ $ }) {
// Component source code:
// https://github.com/PipedreamHQ/pipedream/tree/master/components/snowflake
return this.snowflake.executeQuery({
sqlText: `SELECT CURRENT_TIMESTAMP()`,
binds: [],
});
},
});