Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: projected traffic calculation for meta-tags #573

Open
wants to merge 62 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 46 commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
2a5de33
fix: rum api integration
dipratap Jan 9, 2025
d7efa09
fix: query method
dipratap Jan 9, 2025
fb3021c
fix: process rum traffic data
dipratap Jan 9, 2025
714a0ed
fix: process rum traffic data
dipratap Jan 9, 2025
3ca4c8c
fix: error
dipratap Jan 9, 2025
2937bf9
fix: error
dipratap Jan 9, 2025
ec12a64
fix: percentage increase based on issue
dipratap Jan 9, 2025
6a1d468
fix: percentage increase based on issue
dipratap Jan 9, 2025
47ac390
fix: error
dipratap Jan 9, 2025
90ec06e
fix: traffic
dipratap Jan 10, 2025
c237ee7
fix: full audit ref remove
dipratap Jan 10, 2025
a9ddcfb
fix: error
dipratap Jan 10, 2025
701f8a0
fix: error
dipratap Jan 10, 2025
b944489
fix: update interval
dipratap Jan 10, 2025
0438f36
fix: add log
dipratap Jan 10, 2025
2fec3cc
Merge branch 'main' into metrics-automate
solaris007 Jan 11, 2025
76c0296
fix: traffic
dipratap Jan 13, 2025
a0f87d1
Merge branch 'metrics-automate' of github.com:adobe/spacecat-audit-wo…
dipratap Jan 13, 2025
d741271
fix: traffic
dipratap Jan 13, 2025
3b306fe
fix: traffic
dipratap Jan 13, 2025
0d553d6
fix: refactoring
dipratap Jan 14, 2025
0e63f39
fix: test
dipratap Jan 14, 2025
86245ec
fix: calculate cpc value and add tests
dipratap Jan 15, 2025
a34e9bf
fix: add comments and some fixes
dipratap Jan 16, 2025
d2f5967
fix: tests
dipratap Jan 16, 2025
b1d36e5
fix: tests
dipratap Jan 16, 2025
4032765
fix: tests
dipratap Jan 16, 2025
fbec486
fix: tests
dipratap Jan 17, 2025
70c21ae
fix: tests
dipratap Jan 17, 2025
2b7fbff
fix: tests
dipratap Jan 17, 2025
026bc68
fix: tests
dipratap Jan 17, 2025
37ef497
fix: tests
dipratap Jan 17, 2025
c9ca2e7
fix: tests
dipratap Jan 17, 2025
125ca4c
fix: tests
dipratap Jan 17, 2025
841d113
fix: tests
dipratap Jan 17, 2025
9beae9e
fix: tests
dipratap Jan 17, 2025
b8472a0
fix: tests
dipratap Jan 17, 2025
c7791dc
fix: tests
dipratap Jan 17, 2025
9bf709a
fix: tests
dipratap Jan 17, 2025
f157a6d
fix: tests
dipratap Jan 17, 2025
a52680a
fix: tests
dipratap Jan 17, 2025
2c1870f
fix: update oppty
dipratap Jan 20, 2025
e4013b8
fix: update oppty
dipratap Jan 20, 2025
415404e
fix: test
dipratap Jan 20, 2025
3a766e2
fix: add comment to explain calculation
dipratap Jan 21, 2025
12c4bd0
fix: lint error
dipratap Jan 21, 2025
d7a0dec
fix: log message
dipratap Jan 28, 2025
2967fc7
fix: merge conflicts
dipratap Jan 28, 2025
87e274f
fix: use earned traffic only
dipratap Jan 28, 2025
9233147
fix: use earned traffic only
dipratap Jan 28, 2025
2fde9db
fix: set rum time duration
dipratap Feb 4, 2025
d9fcccd
fix: tests
dipratap Feb 4, 2025
957c2eb
fix: log
dipratap Feb 7, 2025
dc99049
fix: log
dipratap Feb 7, 2025
0e913d9
fix: log
dipratap Feb 7, 2025
7c2fd75
fix: log
dipratap Feb 7, 2025
101bf6d
fix: log
dipratap Feb 7, 2025
4956e70
fix: log
dipratap Feb 9, 2025
702cd29
fix: log
dipratap Feb 9, 2025
544743c
fix: log
dipratap Feb 9, 2025
f545af8
fix: log
dipratap Feb 9, 2025
12ca449
fix: log
dipratap Feb 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 67 additions & 6 deletions src/metatags/handler.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,11 +10,13 @@
* governing permissions and limitations under the License.
*/

import RUMAPIClient from '@adobe/spacecat-shared-rum-api-client';
import { getObjectFromKey, getObjectKeysUsingPrefix } from '../utils/s3-utils.js';
import SeoChecks from './seo-checks.js';
import { AuditBuilder } from '../common/audit-builder.js';
import { noopUrlResolver } from '../common/audit.js';
import convertToOpportunity from './opportunityHandler.js';
import { calculateCPCValue, getRUMDomainkey } from '../support/utils.js';
import { noopUrlResolver, wwwUrlResolver } from '../common/audit.js';
import { AuditBuilder } from '../common/audit-builder.js';

export async function fetchAndProcessPageObject(s3Client, bucketName, key, prefix, log) {
const object = await getObjectFromKey(s3Client, bucketName, key, log);
Expand All @@ -32,6 +34,61 @@ export async function fetchAndProcessPageObject(s3Client, bucketName, key, prefi
};
}

// Extract endpoint from a url, removes trailing slash if present
function extractEndpoint(url) {
const urlObj = new URL(url);
return urlObj.pathname.replace(/\/$/, '');
}

// Preprocess RUM data into a map with endpoint as the key
function preprocessRumData(rumTrafficData) {
const dataMap = new Map();
rumTrafficData.forEach((item) => {
const endpoint = extractEndpoint(item.url);
dataMap.set(endpoint, item);
});
return dataMap;
}

// Get organic traffic for a given endpoint
function getOrganicTrafficForEndpoint(endpoint, dataMap, log) {
// remove trailing slash from endpoint, if present, and then find in the datamap
const target = dataMap.get(endpoint.replace(/\/$/, ''));
if (!target) {
log.warn(`No rum data found for ${endpoint}`);
return 0;
}
const trafficSum = target.earned + target.paid;
log.info(`Found ${trafficSum} page views for ${endpoint}`);
return trafficSum;
}

// Calculate the projected traffic lost for a site
async function calculateProjectedTraffic(context, site, detectedTags, log) {
const rumAPIClient = RUMAPIClient.createFrom(context);
const domainkey = await getRUMDomainkey(site.getBaseURL(), context);
const options = {
domain: wwwUrlResolver(site),
domainkey,
interval: 14,
granularity: 'DAILY',
};
const queryResults = await rumAPIClient.query('traffic-acquisition', options);
const rumTrafficDataMap = preprocessRumData(queryResults, log);
let projectedTraffic = 0;
Object.entries(detectedTags).forEach(([endpoint, tags]) => {
const organicTraffic = getOrganicTrafficForEndpoint(endpoint, rumTrafficDataMap, log);
Object.values((tags)).forEach((tagIssueDetails) => {
// Multiplying by 1% for missing tags, and 0.5% for other tag issues
// For duplicate tags, each page's traffic is multiplied by .5% so
// it amounts to 0.5% * number of duplicates.
const multiplier = tagIssueDetails.issue.includes('Missing') ? 0.01 : 0.005;
projectedTraffic += organicTraffic * multiplier;
});
});
return projectedTraffic;
}

export async function auditMetaTagsRunner(baseURL, context, site) {
const { log, s3Client } = context;
// Fetch site's scraped content from S3
Expand All @@ -51,22 +108,26 @@ export async function auditMetaTagsRunner(baseURL, context, site) {
if (extractedTagsCount === 0) {
log.error(`Failed to extract tags from scraped content for bucket ${bucketName} and prefix ${prefix}`);
}
log.info(`Performing SEO checks for ${extractedTagsCount} tags`);
log.info(`Performing SEO checks for ${extractedTagsCount} tags.`);
// Perform SEO checks
const seoChecks = new SeoChecks(log);
for (const [pageUrl, pageTags] of Object.entries(extractedTags)) {
seoChecks.performChecks(pageUrl || '/', pageTags);
}
seoChecks.finalChecks();
const detectedTags = seoChecks.getDetectedTags();

const projectedTrafficLost = await calculateProjectedTraffic(context, site, detectedTags, log);
const cpcValue = await calculateCPCValue(context, site.getId());
log.warn(`Expected cpc value: ${cpcValue}`);
const projectedTrafficValue = projectedTrafficLost * cpcValue;
const auditResult = {
detectedTags,
sourceS3Folder: `${bucketName}/${prefix}`,
fullAuditRef: 'na',
fullAuditRef: '',
finalUrl: baseURL,
projectedTrafficLost,
projectedTrafficValue,
};

return {
auditResult,
fullAuditRef: baseURL,
Expand Down
11 changes: 9 additions & 2 deletions src/metatags/opportunityHandler.js
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,7 @@ export default async function convertToOpportunity(auditUrl, auditData, context)
throw new Error(`Failed to fetch opportunities for siteId ${auditData.siteId}: ${e.message}`);
}

const { detectedTags, projectedTrafficLost, projectedTrafficValue } = auditData.auditResult;
try {
if (!metatagsOppty) {
const opportunityData = {
Expand All @@ -180,19 +181,25 @@ export default async function convertToOpportunity(auditUrl, auditData, context)
],
},
tags: ['Traffic acquisition'],
data: {
projectedTrafficLost,
projectedTrafficValue,
},
};
metatagsOppty = await Opportunity.create(opportunityData);
log.debug('Meta-tags Opportunity created');
} else {
metatagsOppty.setAuditId(auditData.siteId);
metatagsOppty.setData({
projectedTrafficLost,
projectedTrafficValue,
});
await metatagsOppty.save();
}
} catch (e) {
log.error(`Creating meta-tags opportunity for siteId ${auditData.siteId} failed with error: ${e.message}`, e);
throw new Error(`Failed to create meta-tags opportunity for siteId ${auditData.siteId}: ${e.message}`);
}

const { detectedTags } = auditData.auditResult;
const suggestions = [];
// Generate suggestions data to be inserted in meta-tags opportunity suggestions
Object.keys(detectedTags).forEach((endpoint) => {
Expand Down
39 changes: 39 additions & 0 deletions src/support/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,10 @@ import {
import URI from 'urijs';
import { JSDOM } from 'jsdom';
import { GetSecretValueCommand, SecretsManagerClient } from '@aws-sdk/client-secrets-manager';
import { getObjectFromKey } from '../utils/s3-utils.js';

URI.preventInvalidHostname = true;
const DEFAULT_CPC_VALUE = 1; // $1

// weekly pageview threshold to eliminate urls with lack of samples

Expand Down Expand Up @@ -289,3 +291,40 @@ export const enhanceBacklinksWithFixes = (brokenBacklinks, keywords, log) => {
}
return result;
};

/**
* Fetches the organic traffic data for a site from S3 and calculate the CPC value as per
* https://wiki.corp.adobe.com/pages/viewpage.action?spaceKey=AEMSites&title=Success+Studio+Projected+Business+Impact+Metrics#SuccessStudioProjectedBusinessImpactMetrics-IdentifyingCPCvalueforadomain
* @param context
* @param siteId
* @returns {number} CPC value
*/
export async function calculateCPCValue(context, siteId) {
if (!context?.env?.S3_IMPORTER_BUCKET_NAME) {
throw new Error('S3 importer bucket name is required');
}
if (!context.s3Client) {
throw new Error('S3 client is required');
}
if (!context.log) {
throw new Error('Logger is required');
}
if (!siteId) {
throw new Error('SiteId is required');
}
const { s3Client, log } = context;
const bucketName = context.env.S3_IMPORTER_BUCKET_NAME;
const key = `metrics/${siteId}/ahrefs/organic-traffic.json`;
try {
const organicTrafficData = await getObjectFromKey(s3Client, bucketName, key, log);
if (!Array.isArray(organicTrafficData) || organicTrafficData.length === 0) {
log.info(`Organic traffic data not available for ${siteId}. Using Default CPC value.`);
return DEFAULT_CPC_VALUE;
}
const lastTraffic = organicTrafficData[organicTrafficData.length - 1];
return lastTraffic.cost / lastTraffic.value;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dipratap this cost value is in cents, as in this ahrefs doc and discussed in slack.
For Bamboo, this came as 229, but that's a very high number for CPC, so, we should divide this by 100 and return the property CPC value in USD

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks @rpapani for spotting. I will update this.

} catch (err) {
log.error(`Error fetching organic traffic data for site ${siteId}. Using Default CPC value.`, err);
return DEFAULT_CPC_VALUE;
}
}
11 changes: 6 additions & 5 deletions src/utils/s3-utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,10 @@ export async function getObjectKeysUsingPrefix(
let continuationToken = null;
if (!s3Client || !bucketName || !prefix) {
log.error(
`Invalid input parameters: ensure s3Client, bucketName:${bucketName}, and prefix:${prefix} are provided.`,
`Invalid input parameters in getObjectKeysUsingPrefix: ensure s3Client, bucketName:${bucketName}, and prefix:${prefix} are provided.`,
);
throw new Error(
'Invalid input parameters: ensure s3Client, bucketName, and prefix are provided.',
'Invalid input parameters in getObjectKeysUsingPrefix: ensure s3Client, bucketName, and prefix are provided.',
);
}
try {
Expand All @@ -41,7 +41,9 @@ export async function getObjectKeysUsingPrefix(
// eslint-disable-next-line no-await-in-loop
const data = await s3Client.send(new ListObjectsV2Command(params));
data?.Contents?.forEach((obj) => {
objectKeys.push(obj.Key);
if (obj.Key?.endsWith('scrape.json')) {
objectKeys.push(obj.Key);
}
});
continuationToken = data?.NextContinuationToken;
} while (continuationToken);
Expand Down Expand Up @@ -72,7 +74,7 @@ export async function getObjectKeysUsingPrefix(
export async function getObjectFromKey(s3Client, bucketName, key, log) {
if (!s3Client || !bucketName || !key) {
log.error(
'Invalid input parameters: ensure s3Client, bucketName, and key are provided.',
'Invalid input parameters in getObjectFromKey: ensure s3Client, bucketName, and key are provided.',
);
return null;
}
Expand All @@ -93,7 +95,6 @@ export async function getObjectFromKey(s3Client, bucketName, key, log) {
return null;
}
}

// Always return body for non-JSON content types
return body;
} catch (err) {
Expand Down
Loading
Loading