From a702a481c6d99ea9de74fbee7e0f8c2a0b7d060f Mon Sep 17 00:00:00 2001 From: Moritz Zingg Date: Thu, 6 Mar 2025 08:33:10 +0100 Subject: [PATCH 1/3] From bb711575f92efce300764abeb817f17899a31e44 Mon Sep 17 00:00:00 2001 From: Moritz Zingg Date: Thu, 6 Mar 2025 09:51:02 +0100 Subject: [PATCH 2/3] Added functions and config to interact with wikidata --- .env.example | 7 +- package-lock.json | 29 +- package.json | 1 + ...push-existing-verified-data-to-wikidata.ts | 194 +++++++++++ src/config/wikidata.ts | 139 ++++++++ src/lib/wikidata.ts | 321 +++++++++++++----- 6 files changed, 603 insertions(+), 88 deletions(-) create mode 100644 scripts/push-existing-verified-data-to-wikidata.ts create mode 100644 src/config/wikidata.ts diff --git a/.env.example b/.env.example index e164be1b..aafc057b 100644 --- a/.env.example +++ b/.env.example @@ -33,4 +33,9 @@ GITHUB_CLIENT_SECRET= GITHUB_ORGANIZATION=Klimatbyran GITHUB_REDIRECT_URI=http://localhost:5137/auth/callback JWT_SECRET="" -JWT_EXPIRES_IN= \ No newline at end of file +JWT_EXPIRES_IN= + +# Wikidata credentials +WIKIDATA_URL=https://test.wikidata.org #Use https://www.wikidata.org for production +WIKIDATA_USERNAME= +WIKIDATA_PASSWORD= \ No newline at end of file diff --git a/package-lock.json b/package-lock.json index 74e27d9e..6da47546 100644 --- a/package-lock.json +++ b/package-lock.json @@ -37,6 +37,7 @@ "redis": "^4.7.0", "sharp": "^0.33.5", "tsx": "^4.19.2", + "wikibase-edit": "^7.2.3", "wikibase-sdk": "^10.2.1", "zod": "^3.23.8" }, @@ -2991,6 +2992,12 @@ "node": ">= 8" } }, + "node_modules/crypto-js": { + "version": "4.2.0", + "resolved": "https://registry.npmjs.org/crypto-js/-/crypto-js-4.2.0.tgz", + "integrity": "sha512-KALDyEYgpY+Rlob/iriUtjV6d5Eq+Y191A5g4UqLAi8CyGP9N1+FdVbkc1SxKc2r4YAYqG8JzO2KGL+AizD70Q==", + "license": "MIT" + }, "node_modules/crypto-random-string": { "version": "2.0.0", "license": "MIT", @@ -5023,7 +5030,6 @@ }, "node_modules/lodash.isequal": { "version": "4.5.0", - "dev": true, "license": "MIT" }, "node_modules/lodash.isfunction": { @@ -5399,6 +5405,12 @@ "node": ">=8" } }, + "node_modules/oauth-1.0a": { + "version": "2.2.6", + "resolved": "https://registry.npmjs.org/oauth-1.0a/-/oauth-1.0a-2.2.6.tgz", + "integrity": "sha512-6bkxv3N4Gu5lty4viIcIAnq5GbxECviMBeKR3WX/q87SPQ8E8aursPZUtsXDnxCs787af09WPRBLqYrf/lwoYQ==", + "license": "MIT" + }, "node_modules/obliterator": { "version": "2.0.5", "resolved": "https://registry.npmjs.org/obliterator/-/obliterator-2.0.5.tgz", @@ -7106,6 +7118,21 @@ "node": ">= 8" } }, + "node_modules/wikibase-edit": { + "version": "7.2.3", + "resolved": "https://registry.npmjs.org/wikibase-edit/-/wikibase-edit-7.2.3.tgz", + "integrity": "sha512-9CfjPTiSqSmfP+pzeNe5lwHaB2MYF1NozEnkBgbNDTDXOLfHBV06IGEYV8xBxyqLcam2Gmkw1QBdJFPCNeyMGg==", + "license": "MIT", + "dependencies": { + "crypto-js": "^4.1.1", + "lodash.isequal": "^4.5.0", + "oauth-1.0a": "^2.2.6", + "wikibase-sdk": "^10.1.0" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/wikibase-sdk": { "version": "10.2.1", "license": "MIT", diff --git a/package.json b/package.json index f4d45fe0..42d50cba 100644 --- a/package.json +++ b/package.json @@ -54,6 +54,7 @@ "redis": "^4.7.0", "sharp": "^0.33.5", "tsx": "^4.19.2", + "wikibase-edit": "^7.2.3", "wikibase-sdk": "^10.2.1", "zod": "^3.23.8" }, diff --git a/scripts/push-existing-verified-data-to-wikidata.ts b/scripts/push-existing-verified-data-to-wikidata.ts new file mode 100644 index 00000000..7781c744 --- /dev/null +++ b/scripts/push-existing-verified-data-to-wikidata.ts @@ -0,0 +1,194 @@ +import { PrismaClient } from '@prisma/client' +import wikidataConfig from "../src/config/wikidata"; +import { bulkCreateOrEditCarbonFootprintClaim, Claim } from '../src/lib/wikidata'; + +//Currently still in testing the filters filter out only data related to ABB as this company is present in the Sandbox +const entityDownloadId: `Q${number}` = "Q731938"; +const entityUploadId: `Q${number}` = "Q238689"; + +const prisma = new PrismaClient() + +const emissionsScope1 = await prisma.scope1.findMany({ + select: { + total: true, + unit: true, + metadata: { + select: { + verifiedByUserId: true + } + }, + emissions: { + select: { + reportingPeriod: { + select: { + startDate: true, + endDate: true, + reportURL: true, + company: { + select: { + wikidataId: true, + } + } + } + } + } + }, + }, + where: { + metadata: {some: {verifiedByUserId: {not: null}}}, + emissions: {reportingPeriod: {year: "2023"}} + } +}) +let claims: Claim[] = []; +let filtered1 = emissionsScope1.filter((emission) => emission.emissions?.reportingPeriod?.company.wikidataId === entityDownloadId); + +for(const emission of filtered1) { + if(emission.total !== null && emission.unit !== null && emission.emissions !== null + && emission.emissions!.reportingPeriod !== null && emission.emissions!.reportingPeriod!.startDate !== null + && emission.emissions!.reportingPeriod!.endDate !== null && emission.emissions!.reportingPeriod!.reportURL !== null + && emission.emissions!.reportingPeriod!.company !== null && emission.emissions!.reportingPeriod!.company!.wikidataId !== null) { + + claims.push({ + startDate: emission.emissions!.reportingPeriod!.startDate.toISOString(), + endDate: emission.emissions!.reportingPeriod!.endDate.toISOString(), + value: emission.total!.toString(), + referenceUrl: emission.emissions!.reportingPeriod!.reportURL!, + scope: wikidataConfig.entities.SCOPE_1 + }) + } +} + + +const emissionsScope2 = await prisma.scope2.findMany({ + select: { + mb: true, + lb: true, + unknown: true, + unit: true, + metadata: { + select: { + verifiedByUserId: true + } + }, + emissions: { + select: { + reportingPeriod: { + select: { + startDate: true, + endDate: true, + reportURL: true, + company: { + select: { + wikidataId: true, + } + } + } + } + } + }, + }, + where: { + metadata: {some: {verifiedByUserId: {not: null}}}, + emissions: {reportingPeriod: {year: "2023"}} + + } +}) + +let filtered2 = emissionsScope2.filter((emission) => emission.emissions?.reportingPeriod?.company.wikidataId === entityDownloadId); + +for(const emission of filtered2) { + if(emission.unit !== null && emission.emissions !== null + && emission.emissions!.reportingPeriod !== null && emission.emissions!.reportingPeriod!.startDate !== null + && emission.emissions!.reportingPeriod!.endDate !== null && emission.emissions!.reportingPeriod!.reportURL !== null + && emission.emissions!.reportingPeriod!.company !== null && emission.emissions!.reportingPeriod!.company!.wikidataId !== null) { + + if(emission.mb !== null) { + claims.push({ + startDate: emission.emissions!.reportingPeriod!.startDate.toISOString(), + endDate: emission.emissions!.reportingPeriod!.endDate.toISOString(), + value: emission.mb!.toString(), + referenceUrl: emission.emissions!.reportingPeriod!.reportURL!, + scope: wikidataConfig.entities.SCOPE_2_MARKET_BASED + }) + } + + if(emission.lb !== null) { + claims.push({ + startDate: emission.emissions!.reportingPeriod!.startDate.toISOString(), + endDate: emission.emissions!.reportingPeriod!.endDate.toISOString(), + value: emission.lb!.toString(), + referenceUrl: emission.emissions!.reportingPeriod!.reportURL!, + scope: wikidataConfig.entities.SCOPE_2_LOCATION_BASED + }) + } + + if(emission.unknown !== null) { + claims.push({ + startDate: emission.emissions!.reportingPeriod!.startDate.toISOString(), + endDate: emission.emissions!.reportingPeriod!.endDate.toISOString(), + value: emission.unknown!.toString(), + referenceUrl: emission.emissions!.reportingPeriod!.reportURL!, + scope: wikidataConfig.entities.SCOPE_2 + }) + } + } +} + +const emissionsScope3 = await prisma.scope3Category.findMany({ + select: { + total: true, + unit: true, + category: true, + metadata: { + select: { + verifiedByUserId: true + } + }, + scope3: { + select: { + emissions: { + select: { + reportingPeriod: { + select: { + startDate: true, + endDate: true, + reportURL: true, + company: { + select: { + wikidataId: true, + } + } + } + } + } + }, + } + } + }, + where: { + metadata: {some: {verifiedByUserId: {not: null}}}, + scope3: {emissions: {reportingPeriod: {year: "2023"}}} + } +}) + +let filtered3 = emissionsScope3.filter((emission) => emission.scope3.emissions?.reportingPeriod?.company.wikidataId === entityDownloadId); + +for(const emission of filtered3) { + if(emission.unit !== null && emission.scope3 !== null && emission.scope3.emissions !== null + && emission.scope3.emissions!.reportingPeriod !== null && emission.scope3.emissions!.reportingPeriod!.startDate !== null + && emission.scope3.emissions!.reportingPeriod!.endDate !== null && emission.scope3.emissions!.reportingPeriod!.reportURL !== null + && emission.scope3.emissions!.reportingPeriod!.company !== null && emission.scope3.emissions!.reportingPeriod!.company!.wikidataId !== null) { + + if(emission.category !== 16) { + claims.push({ + startDate: emission.scope3!.emissions!.reportingPeriod!.startDate.toISOString(), + endDate: emission.scope3!.emissions!.reportingPeriod!.endDate.toISOString(), + value: emission.total!.toString(), + referenceUrl: emission.scope3!.emissions!.reportingPeriod!.reportURL!, + scope: wikidataConfig.entities.SCOPE_3, + category: wikidataConfig.translateIdToCategory(emission.category) + }) + } + } +} +await bulkCreateOrEditCarbonFootprintClaim(entityUploadId, claims); \ No newline at end of file diff --git a/src/config/wikidata.ts b/src/config/wikidata.ts new file mode 100644 index 00000000..8afc01b7 --- /dev/null +++ b/src/config/wikidata.ts @@ -0,0 +1,139 @@ +import 'dotenv/config' +import { number, z } from 'zod' + +const envSchema = z.object({ + WIKIDATA_URL: z.string().default('https://www.wikidata.org'), + WIKIDATA_USERNAME: z.string().default('KlimatkollenGarboBot'), + WIKIDATA_PASSWORD: z.string().default('') +}) + +const env = envSchema.parse(process.env) + +interface WikidataProperties { + CARBON_FOOTPRINT: `P${number}`, + START_TIME: `P${number}`, + END_TIME: `P${number}`, + DETERMINATION_METHOD_OR_STANDARD: `P${number}`, + REFERENCE_URL: `P${number}`, + OBJECT_OF_STATEMENT_HAS_ROLE: `P${number}`, + APPLIES_TO_PART: `P${number}`, + STATED_IN: `P${number}` +} + +interface WikidataEntities { + TONNE_OF_CARBON_DIOXIDE_EQUIVALENT: `Q${number}`, + GHG_PROTOCOL: `Q${number}`, + SCOPE_1: `Q${number}`, + SCOPE_2: `Q${number}`, + SCOPE_2_MARKET_BASED: `Q${number}`, + SCOPE_2_LOCATION_BASED: `Q${number}`, + SCOPE_3: `Q${number}`, + PURCHASED_GOODS_AND_SERVICES: `Q${number}`, + CAPITAL_GOODS: `Q${number}`, + FUEL_AND_ENERGY_RELATED_ACTIVITIES: `Q${number}`, + UPSTREAM_TRANSPORTATION_AND_DISTRIBUTION: `Q${number}`, + WASTE_GENERATED_IN_OPERATIONS: `Q${number}`, + BUSINESS_TRAVEL: `Q${number}`, + EMPLOYEE_COMMUTING: `Q${number}`, + UPSTREAM_LEASED_ASSETS: `Q${number}`, + DOWNSTREAM_TRANSPORTATION_AND_DISTRIBUTION: `Q${number}`, + PROCESSING_OF_SOLID_PRODUCTS: `Q${number}`, + USE_OF_SOLD_PRODUCTS: `Q${number}`, + END_OF_LIFE_TREATMENT_OF_SOLD_PRODUCTS: `Q${number}`, + DOWNSTREAM_LEASED_ASSETS: `Q${number}`, + FRANCHISES: `Q${number}`, + INVESTMENTS: `Q${number}`, +} + +export const LiveWikidataProperties: WikidataProperties = { + CARBON_FOOTPRINT: "P5991", + START_TIME: "P580", + END_TIME: "P582", + DETERMINATION_METHOD_OR_STANDARD: "P459", + REFERENCE_URL: "P854", + OBJECT_OF_STATEMENT_HAS_ROLE: "P3831", + APPLIES_TO_PART: "P518", + STATED_IN: "P248" +} as const + +export const TestWikidataProperties: WikidataProperties = { + CARBON_FOOTPRINT: "P98845", + START_TIME: "P355", + END_TIME: "P356", + DETERMINATION_METHOD_OR_STANDARD: "P98847", + REFERENCE_URL: "P93", + OBJECT_OF_STATEMENT_HAS_ROLE: "P98849", + APPLIES_TO_PART: "P822", + STATED_IN: "P149" +} as const + +export const LiveWikidataEntities : WikidataEntities = { + TONNE_OF_CARBON_DIOXIDE_EQUIVALENT: "Q57084755", + GHG_PROTOCOL: "Q56296245", + SCOPE_1: "Q124883250", + SCOPE_2: "Q124883301", + SCOPE_2_MARKET_BASED: "Q124883330", + SCOPE_2_LOCATION_BASED: "Q124883327", + SCOPE_3: "Q124883309", + PURCHASED_GOODS_AND_SERVICES: "Q124883638", + CAPITAL_GOODS: "Q124883639", + FUEL_AND_ENERGY_RELATED_ACTIVITIES: "Q124883640", + UPSTREAM_TRANSPORTATION_AND_DISTRIBUTION: "Q124883642", + WASTE_GENERATED_IN_OPERATIONS: "Q124883643", + BUSINESS_TRAVEL: "Q124883644", + EMPLOYEE_COMMUTING: "Q124883646", + UPSTREAM_LEASED_ASSETS: "Q124883647", + DOWNSTREAM_TRANSPORTATION_AND_DISTRIBUTION: "Q124883648", + PROCESSING_OF_SOLID_PRODUCTS: "Q124883649", + USE_OF_SOLD_PRODUCTS: "Q124883650", + END_OF_LIFE_TREATMENT_OF_SOLD_PRODUCTS: "Q124883651", + DOWNSTREAM_LEASED_ASSETS: "Q124883652", + FRANCHISES: "Q124883653", + INVESTMENTS: "Q124883654", +} as const + +export const TestWikidataEntities : WikidataEntities = { + TONNE_OF_CARBON_DIOXIDE_EQUIVALENT: "Q238307", + GHG_PROTOCOL: "Q238313", + SCOPE_1: "Q238314", + SCOPE_2: "Q238329", + SCOPE_2_MARKET_BASED: "Q238315", + SCOPE_2_LOCATION_BASED: "Q238316", + SCOPE_3: "Q238317", + PURCHASED_GOODS_AND_SERVICES: "Q238318", //we don't created every category in the sandbox, therefore for tests we just alternate between the first two + CAPITAL_GOODS: "Q238319", + FUEL_AND_ENERGY_RELATED_ACTIVITIES: "Q238618", + UPSTREAM_TRANSPORTATION_AND_DISTRIBUTION: "Q238619", + WASTE_GENERATED_IN_OPERATIONS: "Q238620", + BUSINESS_TRAVEL: "Q238621", + EMPLOYEE_COMMUTING: "Q238622", + UPSTREAM_LEASED_ASSETS: "Q238623", + DOWNSTREAM_TRANSPORTATION_AND_DISTRIBUTION: "Q238624", + PROCESSING_OF_SOLID_PRODUCTS: "Q238625", + USE_OF_SOLD_PRODUCTS: "Q238626", + END_OF_LIFE_TREATMENT_OF_SOLD_PRODUCTS: "Q238627", + DOWNSTREAM_LEASED_ASSETS: "Q238628", + FRANCHISES: "Q238629", + INVESTMENTS: "Q238630", +} as const + +const translateIdToCategory = (entities: WikidataEntities, id: number) => { + if(id > 0 && id < 16) { + //Last non category in the entities object has index 6 + return entities[Object.keys(entities)[id + 6]]; + } + return null; +} + +const wikidataConfig = { + wikidataURL: env.WIKIDATA_URL, + wikidataUsername: env.WIKIDATA_USERNAME, + wikidataPassword: env.WIKIDATA_PASSWORD, + entities: env.WIKIDATA_URL === "https://www.wikidata.org" ? LiveWikidataEntities : TestWikidataEntities, + properties: env.WIKIDATA_URL === "https://www.wikidata.org" ? LiveWikidataProperties : TestWikidataProperties, + translateIdToCategory: (id) => translateIdToCategory(env.WIKIDATA_URL === "https://www.wikidata.org" ? LiveWikidataEntities : TestWikidataEntities, id) +} as const + + + +export default wikidataConfig; \ No newline at end of file diff --git a/src/lib/wikidata.ts b/src/lib/wikidata.ts index 216ea0fd..ba57dbbb 100644 --- a/src/lib/wikidata.ts +++ b/src/lib/wikidata.ts @@ -1,98 +1,43 @@ -import WBK, { SearchResponse, EntityId, Entity } from 'wikibase-sdk' +import WBK, { SearchResponse, EntityId, Entity, ItemId } from 'wikibase-sdk' import { WbGetEntitiesResponse } from 'wikibase-sdk/dist/src/helpers/parse_responses' import { SearchEntitiesOptions } from 'wikibase-sdk/dist/src/queries/search_entities' - -/*const transformData = (data: any): any => { - return Object.entries(data) - .map(([key, wikidata]: [string, any]) => { - if (!wikidata || !wikidata.claims) return null - - const verifiedUrl = `https://www.wikidata.org/wiki/${wikidata.id}` - - const emissionsData = (wikidata.claims.P5991 || []).map( - (emission: any) => { - const year = emission.qualifiers.P580[0].datavalue.value.time.slice( - 1, - 5 - ) - const scope1Emission = emission.qualifiers.P3831 - ? parseFloat(emission.qualifiers.P3831[0].datavalue.value.amount) - : null - const scope2Emission = emission.qualifiers.P580 - ? parseFloat(emission.qualifiers.P580[0].datavalue.value.amount) - : null - const scope3Emission = emission.qualifiers.P582 - ? parseFloat(emission.qualifiers.P582[0].datavalue.value.amount) - : null - - return { - year: year, - reference: emission.references[0].snaks.P854[0].datavalue.value, - scope1: { - emissions: scope1Emission, - verified: verifiedUrl, - unit: 'tCO2e', - }, - scope2: { - emissions: scope2Emission, - verified: verifiedUrl, - unit: 'tCO2e', - }, - scope3: { - emissions: scope3Emission, - verified: verifiedUrl, - unit: 'tCO2e', - categories: { - //TODO: add scope 3 categories - '1_purchasedGoods': null, - '2_capitalGoods': null, - '3_fuelAndEnergyRelatedActivities': null, - '4_upstreamTransportationAndDistribution': null, - '5_wasteGeneratedInOperations': null, - '6_businessTravel': null, - '7_employeeCommuting': null, - '8_upstreamLeasedAssets': null, - '9_downstreamTransportationAndDistribution': null, - '10_processingOfSoldProducts': null, - '11_useOfSoldProducts': null, - '12_endOfLifeTreatmentOfSoldProducts': null, - '13_downstreamLeasedAssets': null, - '14_franchises': null, - '15_investments': null, - '16_other': null, - }, - }, - } - } - ) - - return { - node: wikidata.id, - url: `https://www.wikidata.org/wiki/${wikidata.id}`, - logo: wikidata.claims.P18 - ? `https://commons.wikimedia.org/wiki/File:${wikidata.claims.P18[0].mainsnak.datavalue.value}` - : null, - label: wikidata.labels ? wikidata.labels.en.value : key, - description: - wikidata.descriptions && wikidata.descriptions.en - ? wikidata.descriptions.en.value - : null, - emissions: emissionsData, - } - }) - .filter((item) => item !== null) -}*/ +import wikidataConfig from '../config/wikidata' +import WBEdit from 'wikibase-edit' const wbk = WBK({ - instance: 'https://www.wikidata.org', + instance: wikidataConfig.wikidataURL, sparqlEndpoint: 'https://query.wikidata.org/sparql', }) +const wikibaseEditConfig = { + instance: wikidataConfig.wikidataURL, + credentials: { + username: wikidataConfig.wikidataUsername, + password: wikidataConfig.wikidataPassword + }, + userAgent: 'KlimatkollenGarbotBot/v0.1.0 (https://klimatkollen.se)', +} + +const { + TONNE_OF_CARBON_DIOXIDE_EQUIVALENT, + GHG_PROTOCOL, +} = wikidataConfig.entities; + +const { + CARBON_FOOTPRINT, + START_TIME, + END_TIME, + DETERMINATION_METHOD_OR_STANDARD, + REFERENCE_URL, + OBJECT_OF_STATEMENT_HAS_ROLE, + APPLIES_TO_PART +} = wikidataConfig.properties; + export async function searchCompany({ companyName, language = 'sv', }: { - companyName: string + companyName language?: SearchEntitiesOptions['language'] }): Promise { // TODO: try to search in multiple languages. Maybe we can find a page in English if it doesn't exist in Swedish? @@ -125,7 +70,211 @@ export async function getWikidataEntities(ids: EntityId[]) { ) return Object.values(entities) as (Entity & { - labels: { [lang: string]: { language: string; value: string } } - descriptions: { [lang: string]: { language: string; value: string } } + labels: { [lang]: { language; value } } + descriptions: { [lang]: { language; value } } })[] } + +export async function getClaims(entity: ItemId): Promise { + const url = wbk.getEntities({ + ids: entity, + languages: ["en"] + }) + + const res = await fetch(url); + const wikidataEntities = (await res.json()).entities; + + if(wikidataEntities === undefined) { + return []; + } + + const claims = wikidataEntities[entity].claims; + + if(claims === undefined) { + return []; + } + + const carbonFootprintClaims = claims[CARBON_FOOTPRINT] ?? []; + + return carbonFootprintClaims.map(claim => { + return { + startDate: transformFromWikidataDateStringToDate(claim.qualifiers[START_TIME][0].datavalue.value.time), + endDate: transformFromWikidataDateStringToDate(claim.qualifiers[START_TIME][0].datavalue.value.time), + value: claim.mainsnak.datavalue.value.amount, + category: claim.qualifiers[APPLIES_TO_PART] ? claim.qualifiers[APPLIES_TO_PART][0].datavalue.value.id : undefined, + scope: claim.qualifiers[OBJECT_OF_STATEMENT_HAS_ROLE][0].datavalue.value.id, + id: claim.id + } as Claim + }) +} + +export async function editEntity(entity: ItemId, claims: Claim[], removeClaim: RemoveClaim[]) { + const wbEdit = WBEdit(wikibaseEditConfig); + const claimBody = claims.map((claim) => { + const claimObject = { + value: { + amount: claim.value, + unit: TONNE_OF_CARBON_DIOXIDE_EQUIVALENT + }, + qualifiers: { + [START_TIME]: claim.startDate, + [END_TIME]: claim.endDate, + [DETERMINATION_METHOD_OR_STANDARD]: GHG_PROTOCOL, + }, + references: [ + {[REFERENCE_URL]: claim.referenceUrl} + ] + } + + if(claim.scope !== undefined) { + claimObject.qualifiers[OBJECT_OF_STATEMENT_HAS_ROLE] = claim.scope; + } + + if(claim.category !== undefined) { + claimObject.qualifiers[APPLIES_TO_PART] = claim.category; + } + + return claimObject; + }) + + const body = { + id: entity, + claims: { + [CARBON_FOOTPRINT]: [ + ...claimBody, + ...removeClaim + ] + }, + summary: "Added/Updated carbon footprint data" + } + + await wbEdit.entity.edit(body); +} + + +/** + * Compares if two claims have the same scope and optionally category + * @param newClaim + * @param exisitingClaim + * @returns true if scope and category are equal + */ +function compareClaims(newClaim: Claim, exisitingClaim: Claim) { + if( (newClaim.scope === undefined && exisitingClaim.scope !== undefined) || + (newClaim.scope !== undefined && (exisitingClaim.scope === undefined || exisitingClaim.scope !== newClaim.scope))) { + return false; + } + if( (newClaim.category === undefined && exisitingClaim.category !== undefined) || + (newClaim.category !== undefined && (exisitingClaim.category === undefined || exisitingClaim.category !== newClaim.category))) { + return false; + } + return true; +} + + +/** + * Compares two date strings + * @param date1 + * @param date2 + * @returns difference in milliseconds + */ +function compareDateStrings(date1?: string, date2?: string) { + const epoch = "1970-01-01T00:00:00Z"; + return (new Date(date1 || epoch)).getTime() - (new Date(date2 || epoch).getTime()) +} + + +/** + * Calculates the claims to add and which to remove in order to update the entity + * @param entity Entity for which the exisiting and adding Claims should be compared + * @param claims The claims to add + * @returns + */ +async function diffCarbonFootprintClaims(entity: ItemId, claims: Claim[]) { + const existingClaims = await getClaims(entity); + const newClaims: Claim[] = []; + const rmClaims: RemoveClaim[] = []; + + for(const claim of claims) { + let duplicate = false; + for(const existingClaim of existingClaims) { + /** + * Bit of explanaiton for the different cases + * The compareClaim function looks if there is already a claim with the same scope and optional category + * If that is the case we only want the most recent claim of that scope and category to be on wikidata + * Therefore, we look at the end date of the claim's reporting period to find the most recent one + * All older claims will not be added or are removed if there are on wikidata + */ + if(compareClaims(claim, existingClaim)) { + if(compareDateStrings(existingClaim.endDate, claim.endDate) < 0) { + if(existingClaim.id !== undefined) { + rmClaims.push({id: existingClaim.id, remove: true}); //Remove older claims; + } + continue; + } else if(compareDateStrings(existingClaim.endDate, claim.endDate) > 0) { + duplicate = true; //If there is a more recent one do not add that claim + } else if(compareDateStrings(existingClaim.endDate, claim.endDate) === 0 + && compareDateStrings(existingClaim.startDate, claim.startDate) === 0) { + if(("+" + claim.value) !== existingClaim.value) { + newClaims.push(claim); //Update value by removing old claim and adding new claim + if(existingClaim.id !== undefined) { + rmClaims.push({id: existingClaim.id, remove: true}); + } + } + duplicate = true; + } else { + newClaims.push(claim); //if for some reason the start times differ we still opt for our claim + if(existingClaim.id !== undefined) { + rmClaims.push({id: existingClaim.id, remove: true}); + } + duplicate = true; + } + } + } + if(!duplicate) { + newClaims.push(claim); //only add claims that not exist already + } + } + return {newClaims, rmClaims}; +} + +export async function bulkCreateOrEditCarbonFootprintClaim(entity: ItemId, claims: Claim[]) { + const {newClaims, rmClaims} = await diffCarbonFootprintClaims(entity, claims); + await editEntity(entity, newClaims, rmClaims); +} + +export async function getWikipediaTitle(id: EntityId): Promise { + const url = wbk.getEntities({ + ids: [id], + props: ['sitelinks'], + }) + const { entities }: WbGetEntitiesResponse = await fetch(url).then((res) => + res.json() + ) + const entity = entities[id] + const title = entity?.sitelinks?.enwiki?.title ?? entity?.sitelinks?.svwiki?.title ?? null + + if (!title) { + throw new Error('No Wikipedia site link found') + } + + return title +} + +function transformFromWikidataDateStringToDate(date: string) { + return date.substring(1); +} + +export interface Claim { + id?: string; + startDate: string; + endDate: string; + value: string; + referenceUrl?: string; + scope?: ItemId; + category?: ItemId; +} + +export interface RemoveClaim { + id: string; + remove: boolean; +} From c29a984f0ae17a05682215a514440ae8167607ab Mon Sep 17 00:00:00 2001 From: Moritz Zingg Date: Thu, 6 Mar 2025 14:29:55 +0100 Subject: [PATCH 3/3] Added worker for wikidata upload --- .env.example | 2 +- src/lib/wikidata.ts | 69 +++++++++++++++++++++++++++++++++++ src/workers/saveToAPI.ts | 14 ++++++- src/workers/wikidataUpload.ts | 55 ++++++++++++++++++++++++++++ 4 files changed, 138 insertions(+), 2 deletions(-) create mode 100644 src/workers/wikidataUpload.ts diff --git a/.env.example b/.env.example index aafc057b..554eabe9 100644 --- a/.env.example +++ b/.env.example @@ -32,7 +32,7 @@ GITHUB_CLIENT_ID= GITHUB_CLIENT_SECRET= GITHUB_ORGANIZATION=Klimatbyran GITHUB_REDIRECT_URI=http://localhost:5137/auth/callback -JWT_SECRET="" +JWT_SECRET="1" JWT_EXPIRES_IN= # Wikidata credentials diff --git a/src/lib/wikidata.ts b/src/lib/wikidata.ts index ba57dbbb..e3623d85 100644 --- a/src/lib/wikidata.ts +++ b/src/lib/wikidata.ts @@ -21,6 +21,11 @@ const wikibaseEditConfig = { const { TONNE_OF_CARBON_DIOXIDE_EQUIVALENT, GHG_PROTOCOL, + SCOPE_1, + SCOPE_2, + SCOPE_2_LOCATION_BASED, + SCOPE_2_MARKET_BASED, + SCOPE_3, } = wikidataConfig.entities; const { @@ -264,6 +269,70 @@ function transformFromWikidataDateStringToDate(date: string) { return date.substring(1); } +export function transformEmissionsToClaims(emissions, startDate, endDate, referenceUrl): Claim[] { + const claims: Claim[] = []; + + claims.push({ + startDate, + endDate, + referenceUrl, + scope: SCOPE_1, + value: emissions.scope1.total, + }); + + claims.push({ + scope: SCOPE_2_MARKET_BASED, + startDate, + endDate, + referenceUrl, + value: emissions.scope2.mb, + }); + claims.push({ + scope: SCOPE_2_LOCATION_BASED, + startDate, + endDate, + referenceUrl, + value: emissions.scope2.lb, + }); + claims.push({ + scope: SCOPE_2, + startDate, + endDate, + referenceUrl, + value: emissions.scope2.unknown, + }); + + emissions.scope3.categories.forEach(category => { + claims.push({ + scope: SCOPE_3, + startDate, + endDate, + referenceUrl, + category: wikidataConfig.translateIdToCategory(category.category), + value: category.total, + }); + }); + + return claims; +} + +export function reduceToMostRecentClaims(claims: Claim[]): Claim[] { + const claimMap = new Map(); + + for(const claim of claims) { + if(claimMap.has(claim.scope + "-" + (claim.category ?? ""))) { + const exisitingClaim = claimMap.get(claim.scope + "-" + (claim.category ?? "")); + if(exisitingClaim?.endDate === undefined || exisitingClaim.endDate < claim.endDate) { + claimMap.set(claim.scope + "-" + (claim.category ?? ""), claim); + } + } else { + claimMap.set(claim.scope + "-" + (claim.category ?? ""), claim); + } + } + + return Array.from(claimMap.values()); +} + export interface Claim { id?: string; startDate: string; diff --git a/src/workers/saveToAPI.ts b/src/workers/saveToAPI.ts index 1149faca..9fc8f809 100644 --- a/src/workers/saveToAPI.ts +++ b/src/workers/saveToAPI.ts @@ -2,9 +2,11 @@ import { DiscordJob, DiscordWorker } from '../lib/DiscordWorker' import discord from '../discord' import apiConfig from '../config/api' import { apiFetch } from '../lib/api' +import wikidataUpload from './wikidataUpload' export interface SaveToApiJob extends DiscordJob { data: DiscordJob['data'] & { + companyName?: string approved?: boolean requiresApproval: boolean diff: string @@ -19,6 +21,7 @@ export const saveToAPI = new DiscordWorker( async (job: SaveToApiJob) => { try { const { + companyName, wikidata, approved, requiresApproval = true, @@ -62,10 +65,19 @@ export const saveToAPI = new DiscordWorker( } if (!requiresApproval || approved) { + if(apiSubEndpoint === "reporting-periods") { + await wikidataUpload.queue.add("Wikidata Upload for " + companyName, + { + ...job.data + } + ) + } + console.log(`Saving approved data for ${wikidataId} to API`) await apiFetch(`/companies/${wikidataId}/${apiSubEndpoint}`, { body: removeNullValuesFromGarbo(body), - }) + }) + return { success: true } } diff --git a/src/workers/wikidataUpload.ts b/src/workers/wikidataUpload.ts new file mode 100644 index 00000000..a5663e0d --- /dev/null +++ b/src/workers/wikidataUpload.ts @@ -0,0 +1,55 @@ +import { DiscordJob, DiscordWorker } from '../lib/DiscordWorker' +import { bulkCreateOrEditCarbonFootprintClaim, Claim, reduceToMostRecentClaims, transformEmissionsToClaims } from '../lib/wikidata' + +export class WikidataUploadJob extends DiscordJob { + declare data: DiscordJob['data'] & { + companyName: string + existingCompany: any + wikidata: { node: `Q${number}` } + fiscalYear: any + scope12?: any[] + scope3?: any[] + biogenic?: any[] + economy?: any[] + body?: any + } +} + +const wikidataUpload = new DiscordWorker( + 'wikidataUpload', + async (job) => { + const { + url, + wikidata, + fiscalYear, + companyName, + existingCompany, + scope12 = [], + scope3 = [], + biogenic = [], + economy = [], + body + + } = job.data + + console.log(job.data) + + const allClaims: Claim[] = []; + body.reportingPeriods.forEach(reportingPeriod => { + const startDate = (new Date(reportingPeriod.startDate)).toISOString(); + const endDate = (new Date(reportingPeriod.endDate)).toISOString(); + const referenceUrl = reportingPeriod.reportURL; + if(reportingPeriod.emissions !== undefined) { + allClaims.push(...transformEmissionsToClaims(reportingPeriod.emissions, startDate, endDate, referenceUrl)); + } + }); + + const uniqueClaimSet = reduceToMostRecentClaims(allClaims); + + bulkCreateOrEditCarbonFootprintClaim(wikidata.node, uniqueClaimSet); + + return { success: true } + } +) + +export default wikidataUpload