Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix app crash on big csv downloads #909

Merged
merged 7 commits into from
Aug 10, 2023
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions packages/api/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,7 @@
"geotiff": "^1.0.4",
"hashids": "^2.2.1",
"lodash": "^4.17.15",
"luxon": "^3.3.0",
"md5-file": "^5.0.0",
"moment": "^2.27.0",
"moment-timezone": "^0.5.31",
Expand All @@ -115,6 +116,7 @@
"@types/faker": "^4.1.12",
"@types/jest": "24.9.0",
"@types/lodash": "^4.14.156",
"@types/luxon": "^3.3.1",
"@types/moment-timezone": "^0.5.13",
"@types/multer": "^1.4.3",
"@types/node": "^18.16.16",
Expand Down
183 changes: 134 additions & 49 deletions packages/api/src/time-series/time-series.service.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
import { InjectRepository } from '@nestjs/typeorm';
import { createReadStream, unlinkSync } from 'fs';
import {
closeSync,
createReadStream,
openSync,
unlinkSync,
writeSync,
} from 'fs';
import { Repository } from 'typeorm';
import Bluebird from 'bluebird';
import type { Response } from 'express';
Expand All @@ -15,6 +21,7 @@ import { join } from 'path';
// https://github.com/adaltas/node-csv/issues/372
// eslint-disable-next-line import/no-unresolved
import { stringify } from 'csv-stringify/sync';
import { DateTime } from 'luxon';
import { SiteDataDto } from './dto/site-data.dto';
import { SurveyPointDataDto } from './dto/survey-point-data.dto';
import { TimeSeries } from './time-series.entity';
Expand All @@ -25,6 +32,8 @@ import {
getDataQuery,
getDataRangeQuery,
groupByMetricAndSource,
getAvailableMetricsQuery,
getAvailableDataDates,
} from '../utils/time-series.utils';
import { Site } from '../sites/sites.entity';
import { SiteSurveyPoint } from '../site-survey-points/site-survey-points.entity';
Expand Down Expand Up @@ -115,74 +124,150 @@ export class TimeSeriesService {
) {
const { siteId } = siteDataDto;

const data: TimeSeriesData[] = await getDataQuery({
const uniqueMetrics = await getAvailableMetricsQuery({
timeSeriesRepository: this.timeSeriesRepository,
siteId,
metrics,
start: startDate,
end: endDate,
hourly,
csv: true,
metrics,
});

const metricSourceAsKey = data.map((x) => ({
key: `${x.metric}_${x.source}`,
value: x.value,
timestamp: x.timestamp,
}));

const allKeys = [
const headerKeys = [
'timestamp',
...new Map(metricSourceAsKey.map((x) => [x.key, x])).keys(),
...uniqueMetrics.map((x) => `${x.metric}_${x.source}`),
];

const emptyRow = Object.fromEntries(allKeys.map((x) => [x, undefined])) as {
const emptyRow = Object.fromEntries(
headerKeys.map((x) => [x, undefined]),
) as {
[k: string]: any;
};

const groupedByTimestamp = metricSourceAsKey.reduce(
(acc, curr) => {
const key = curr.timestamp.toISOString();
const accValue = acc[key];
if (typeof accValue === 'object') {
// eslint-disable-next-line fp/no-mutating-methods
accValue.push(curr);
} else {
// eslint-disable-next-line fp/no-mutation
acc[key] = [curr];
}
return acc;
},
{} as {
[k: string]: {
key: string;
value: number;
timestamp: Date;
}[];
},
const { min, max } = (await getAvailableDataDates({
timeSeriesRepository: this.timeSeriesRepository,
siteId,
metrics,
})) || { min: new Date(), max: new Date() };

const minDate = DateTime.fromISO(startDate || min.toISOString()).startOf(
'hour',
);
const maxDate = DateTime.fromISO(endDate || max.toISOString()).startOf(
'hour',
);

const rows = Object.entries(groupedByTimestamp).map(([timestamp, values]) =>
values.reduce((acc, curr) => {
// eslint-disable-next-line fp/no-mutation
acc[curr.key] = curr.value;
// eslint-disable-next-line fp/no-mutation
acc.timestamp = timestamp;
return acc;
}, structuredClone(emptyRow)),
const monthChunkSize = 6;

const createChunks = (
curr: DateTime,
acc: { start: DateTime; end: DateTime }[],
): { start: DateTime; end: DateTime }[] => {
if (curr.diff(minDate, 'months').months < monthChunkSize)
return [
...acc,
{ end: curr.minus({ milliseconds: 1 }), start: minDate },
];

const next = curr.minus({ months: monthChunkSize });
const item = { end: curr.minus({ milliseconds: 1 }), start: next };

return createChunks(next, [...acc, item]);
};

const chunks = createChunks(maxDate, []);

const tempFileName = join(
process.cwd(),
Math.random().toString(36).substring(2, 15),
);

const fileName = `data_site_${siteId}_${moment(startDate).format(
DATE_FORMAT,
)}_${moment(endDate).format(DATE_FORMAT)}.csv`;
const fd = openSync(tempFileName, 'w');

try {
// eslint-disable-next-line fp/no-mutation, no-plusplus
for (let i = 0; i < chunks.length; i++) {
const first = i === 0;

// we want this not to run in parallel, thats why it is ok her to disable no-await-in-loop
echaidemenos marked this conversation as resolved.
Show resolved Hide resolved
// eslint-disable-next-line no-await-in-loop
const data: TimeSeriesData[] = await getDataQuery({
timeSeriesRepository: this.timeSeriesRepository,
siteId,
metrics,
start: chunks[i].start.toISO() as string,
end: chunks[i].end.toISO() as string,
hourly,
csv: true,
order: 'DESC',
});

const metricSourceAsKey = data.map((x) => ({
key: `${x.metric}_${x.source}`,
value: x.value,
timestamp: x.timestamp,
}));

const groupedByTimestamp = metricSourceAsKey.reduce(
(acc, curr) => {
const key = curr.timestamp.toISOString();
const accValue = acc[key];
if (typeof accValue === 'object') {
// eslint-disable-next-line fp/no-mutating-methods
accValue.push(curr);
} else {
// eslint-disable-next-line fp/no-mutation
acc[key] = [curr];
}
return acc;
},
{} as {
[k: string]: {
key: string;
value: number;
timestamp: Date;
}[];
},
);

const rows = Object.entries(groupedByTimestamp).map(
([timestamp, values]) =>
values.reduce((acc, curr) => {
// eslint-disable-next-line fp/no-mutation
acc[curr.key] = curr.value;
// eslint-disable-next-line fp/no-mutation
acc.timestamp = timestamp;
return acc;
}, structuredClone(emptyRow)),
);

const csvLines = stringify(rows, { header: first });

res
.set({
writeSync(fd, csvLines);
}

closeSync(fd);

const fileName = `data_site_${siteId}_${moment(startDate).format(
DATE_FORMAT,
)}_${moment(endDate).format(DATE_FORMAT)}.csv`;

const readStream = createReadStream(tempFileName);

res.set({
'Content-Disposition': `attachment; filename=${encodeURIComponent(
fileName,
)}`,
})
.send(stringify(rows, { header: true }));
});

readStream.pipe(res);

readStream.on('end', () => {
unlinkSync(tempFileName);
});
} catch (error) {
console.error(error);
unlinkSync(tempFileName);
}
}

async findSurveyPointDataRange(
Expand Down
85 changes: 83 additions & 2 deletions packages/api/src/utils/time-series.utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,85 @@ export const groupByMetricAndSource = <T extends TimeSeriesGroupable>(
.toJSON();
};

export const getAvailableMetricsQuery = ({
timeSeriesRepository,
siteId,
start: startDate,
end: endDate,
surveyPointId,
metrics,
}: {
timeSeriesRepository: Repository<TimeSeries>;
siteId: number;
start?: string;
end?: string;
surveyPointId?: number;
metrics: Metric[];
}) => {
const { sql: surveyPointConditionSql, params: surveyPointConditionParams } =
surveyPointId
? {
sql: 'AND (source.survey_point_id = :surveyPointId OR source.survey_point_id IS NULL)',
params: { surveyPointId },
}
: { sql: '', params: {} };

return timeSeriesRepository
.createQueryBuilder('time_series')
.select('metric')
.addSelect('source.type', 'source')
.distinct(true)
.innerJoin(
'time_series.source',
'source',
`source.site_id = :siteId ${surveyPointConditionSql}`,
{ siteId, ...surveyPointConditionParams },
)
.leftJoin('source.surveyPoint', 'surveyPoint')
.andWhere(startDate ? 'timestamp >= :startDate' : '1=1', { startDate })
.andWhere(endDate ? 'timestamp <= :endDate' : '1=1', { endDate })
.andWhere(metrics.length > 0 ? 'metric IN (:...metrics)' : '1=1', {
metrics,
})
.getRawMany();
};

export const getAvailableDataDates = ({
timeSeriesRepository,
siteId,
surveyPointId,
metrics,
}: {
timeSeriesRepository: Repository<TimeSeries>;
siteId: number;
surveyPointId?: number;
metrics: Metric[];
}): Promise<{ min: Date; max: Date } | undefined> => {
const { sql: surveyPointConditionSql, params: surveyPointConditionParams } =
surveyPointId
? {
sql: 'AND (source.survey_point_id = :surveyPointId OR source.survey_point_id IS NULL)',
params: { surveyPointId },
}
: { sql: '', params: {} };

return timeSeriesRepository
.createQueryBuilder('time_series')
.select('min("timestamp")')
.addSelect('max("timestamp")')
.innerJoin(
'time_series.source',
'source',
`source.site_id = :siteId ${surveyPointConditionSql}`,
{ siteId, ...surveyPointConditionParams },
)
.leftJoin('source.surveyPoint', 'surveyPoint')
.andWhere(metrics.length > 0 ? 'metric IN (:...metrics)' : '1=1', {
echaidemenos marked this conversation as resolved.
Show resolved Hide resolved
metrics,
})
.getRawOne();
};

interface GetDataQueryParams {
timeSeriesRepository: Repository<TimeSeries>;
siteId: number;
Expand All @@ -87,6 +166,7 @@ interface GetDataQueryParams {
hourly?: boolean;
surveyPointId?: number;
csv?: boolean;
order?: 'ASC' | 'DESC';
}

export const getDataQuery = ({
Expand All @@ -98,6 +178,7 @@ export const getDataQuery = ({
hourly,
surveyPointId,
csv = false,
order = 'ASC',
}: GetDataQueryParams): Promise<TimeSeriesData[]> => {
const { endDate, startDate } = csv
? { startDate: start, endDate: end }
Expand Down Expand Up @@ -140,9 +221,9 @@ export const getDataQuery = ({
.groupBy(
"date_trunc('hour', timestamp), metric, source.type, surveyPoint.id",
)
.orderBy("date_trunc('hour', timestamp)", 'ASC')
.orderBy("date_trunc('hour', timestamp)", order)
.getRawMany()
: mainQuery.orderBy('timestamp', 'ASC').getRawMany();
: mainQuery.orderBy('timestamp', order).getRawMany();
};

export const getDataRangeQuery = (
Expand Down
10 changes: 10 additions & 0 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -4947,6 +4947,11 @@
resolved "https://registry.yarnpkg.com/@types/long/-/long-4.0.2.tgz#b74129719fc8d11c01868010082d483b7545591a"
integrity sha512-MqTGEo5bj5t157U6fA/BiDynNkn0YknVdh48CMPkTSpFTVmvao5UQmm7uEF6xBEo7qIMAlY/JSleYaE6VOdpaA==

"@types/luxon@^3.3.1":
version "3.3.1"
resolved "https://registry.yarnpkg.com/@types/luxon/-/luxon-3.3.1.tgz#08727da7d81ee6a6c702b9dc6c8f86be010eb4dc"
integrity sha512-XOS5nBcgEeP2PpcqJHjCWhUCAzGfXIU8ILOSLpx2FhxqMW9KdxgCGXNOEKGVBfveKtIpztHzKK5vSRVLyW/NqA==

"@types/markdown-it@^12.2.3":
version "12.2.3"
resolved "https://registry.yarnpkg.com/@types/markdown-it/-/markdown-it-12.2.3.tgz#0d6f6e5e413f8daaa26522904597be3d6cd93b51"
Expand Down Expand Up @@ -14232,6 +14237,11 @@ lru-memoizer@^2.1.4:
lodash.clonedeep "^4.5.0"
lru-cache "~4.0.0"

luxon@^3.3.0:
version "3.3.0"
resolved "https://registry.yarnpkg.com/luxon/-/luxon-3.3.0.tgz#d73ab5b5d2b49a461c47cedbc7e73309b4805b48"
integrity sha512-An0UCfG/rSiqtAIiBPO0Y9/zAnHUZxAMiCpTd5h2smgsj7GGmcenvrvww2cqNA8/4A5ZrD1gJpHN2mIHZQF+Mg==

lz-string@^1.4.4, lz-string@^1.5.0:
version "1.5.0"
resolved "https://registry.yarnpkg.com/lz-string/-/lz-string-1.5.0.tgz#c1ab50f77887b712621201ba9fd4e3a6ed099941"
Expand Down
Loading