import { axios } from “/platform”
import microsoft_word from “@U03K4ERLE68/microsoft_word”
import fs from “fs”
import { promisify } from “util”
import mammoth from “mammoth”
import { AnonymizerEngine, RecognizerResult } from “presidio-anonymizer”
export default defineComponent({
key: “microsoft-word-presidio-anonymize”,
name: “Anonymize Word Document”,
description: “Anonymizes sensitive data in a Microsoft Word document using Presidio”,
version: “0.0.1",
props: {
microsoft_word,
documentId: {
type: “string”,
label: “Document ID”,
description: “The ID of the Word document to anonymize”,
async options() {
const response = await axios($, {
url: “https://graph.microsoft.com/v1.0/me/drive/root/search(q=‘.docx’)”,
headers: {
Authorization: Bearer ${this.microsoft_word.$auth.oauth_access_token}
,
},
})
return response.value.map(file => ({
label: file.name,
value: file.id
}))
}
},
entityTypes: {
type: “string“,
label: “Entity Types to Anonymize”,
description: “Select the types of entities to anonymize”,
options: [
“PERSON”,
“EMAIL_ADDRESS”,
“PHONE_NUMBER”,
“CREDIT_CARD”,
“CRYPTO”,
“DATE_TIME”,
“LOCATION”,
“NRP”,
“IP_ADDRESS”,
“US_SSN”
]
}
},
async run({ steps, $ }) {
// Download the Word document
const response = await axios($, {
url: https://graph.microsoft.com/v1.0/me/drive/items/${this.documentId}/content
,
headers: {
Authorization: Bearer ${this.microsoft_word.$auth.oauth_access_token}
,
},
responseType: “arraybuffer”
})
// Save document to temp file
const tempInputPath = “/tmp/input.docx”
await promisify(fs.writeFile)(tempInputPath, response.data)
// Extract text from Word document
const { value: text } = await mammoth.extractRawText({ path: tempInputPath })
// Initialize Presidio Anonymizer
const engine = new AnonymizerEngine()
// Create analyzer results for selected entity types
const analyzerResults = this.entityTypes.map(type => new RecognizerResult({
entity_type: type,
start: 0,
end: text.length,
score: 0.8
}))
// Anonymize the text
const anonymizedResult = await engine.anonymize(
text,
analyzerResults,
{
“DEFAULT”: {
type: “replace”,
new_value: “<ANONYMIZED>”
}
}
)
// Create new Word document with anonymized text
const tempOutputPath = “/tmp/output.docx”
const docx = require(“docx”)
const doc = new docx.Document({
sections: [{
properties: {},
children: [
new docx.Paragraph({
children: [new docx.TextRun(anonymizedResult)]
})
]
}]
})
// Save the new document
const buffer = await docx.Packer.toBuffer(doc)
await promisify(fs.writeFile)(tempOutputPath, buffer)
// Upload anonymized document back to OneDrive
const formData = new FormData()
formData.append(“file”, fs.createReadStream(tempOutputPath))
const uploadResponse = await axios($, {
method: “PUT”,
url: `https://graph.microsoft.com/v1.0/me/drive/items/${this.documentId}/content`,
headers: {
Authorization: `Bearer ${this.microsoft_word.$auth.oauth_access_token}`,
“Content-Type”: “application/vnd.openxmlformats-officedocument.wordprocessingml.document”
},
data: buffer
})
// Clean up temp.unlinkSync(tempInputPath)
fs.unlinkSync(tempOutputPath)
return {
documentId: uploadResponse.id,
name: uploadResponse.name,
webUrl: uploadResponse.webUrl
}
}
})