-
Notifications
You must be signed in to change notification settings - Fork 1
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Investigate performance of /v1/query/run #45
Comments
Existing view: USE [M3_ANNOTATIONS]
GO
/****** Object: View [dbo].[annotations] Script Date: 2/14/2025 2:56:18 PM ******/
SET ANSI_NULLS ON
GO
SET QUOTED_IDENTIFIER ON
GO
CREATE VIEW [dbo].[annotations]
AS
SELECT
im.uuid AS imaged_moment_uuid,
im.elapsed_time_millis AS index_elapsed_time_millis,
im.recorded_timestamp AS index_recorded_timestamp,
im.timecode AS index_timecode,
obs.uuid AS observation_uuid,
obs.activity,
obs.concept,
obs.duration_millis,
obs.observation_group,
obs.observation_timestamp,
obs.observer,
ir.uuid AS image_reference_uuid,
ir.description AS image_description,
ir.format AS image_format,
ir.height_pixels AS image_height,
ir.width_pixels AS image_width,
ir.url AS image_url,
ass.link_name,
ass.link_value,
ass.to_concept,
ass.mime_type AS association_mime_type,
CONCAT(ass.link_name, ' | ', ass.to_concept, ' | ', ass.link_value) AS associations,
ass.uuid AS association_uuid,
ad.altitude,
ad.coordinate_reference_system,
ad.depth_meters,
ad.latitude,
ad.longitude,
ad.oxygen_ml_per_l,
ad.phi,
ad.xyz_position_units,
ad.pressure_dbar,
ad.psi,
ad.salinity,
ad.temperature_celsius,
ad.theta,
ad.x,
ad.y,
ad.z,
ad.light_transmission,
vr.uuid AS video_reference_uuid,
vr.audio_codec,
vr.container AS video_container,
vr.description AS video_reference_description,
vr.frame_rate,
vr.height AS video_height,
vr.sha512 AS video_sha512,
vr.size_bytes AS video_size_bytes,
vr.uri AS video_uri,
vr.video_codec,
vr.width AS video_width,
v.description AS video_description,
v.duration_millis AS video_duration_millis,
v.name AS video_name,
v.start_time AS video_start_timestamp,
vs.camera_id,
vs.description AS video_sequence_description,
vs.name AS video_sequence_name,
info.mission_contact AS chief_scientist,
info.mission_id AS dive_number,
info.platform_name AS camera_platform
FROM
imaged_moments im
LEFT JOIN observations obs ON obs.imaged_moment_uuid = im.uuid
LEFT JOIN image_references ir ON ir.imaged_moment_uuid = im.uuid
LEFT JOIN associations ass ON ass.observation_uuid = obs.uuid
LEFT JOIN ancillary_data ad ON ad.imaged_moment_uuid = im.uuid
LEFT JOIN M3_VIDEO_ASSETS.dbo.video_references vr ON vr.uuid = im.video_reference_uuid
LEFT JOIN M3_VIDEO_ASSETS.dbo.videos v ON v.uuid = vr.video_uuid
LEFT JOIN M3_VIDEO_ASSETS.dbo.video_sequences vs ON vs.uuid = v.video_sequence_uuid
LEFT JOIN video_reference_information info ON info.video_reference_uuid = im.video_reference_uuid
GO
|
I've verified that the indices exist on all primary and foreign keys in both M3_ANNOTATIONS and M3_VIDEO_ASSETS |
BSON Sanity CheckChecking the relative size of bson vs json: #!/usr/bin/env -S scala shebang
//> using dep "org.mongodb:bson:5.3.1"
import java.nio.charset.StandardCharsets
import java.util.HexFormat
import java.nio.ByteBuffer
import org.bson.BsonDocument
import org.bson.BsonBinaryReader
import org.bson.BsonBinaryWriter
import org.bson.Document
import org.bson.codecs.DocumentCodec
import org.bson.codecs.EncoderContext
import org.bson.io.BasicOutputBuffer
val json = """{"data" :[
| {"name": "MongoDB", "type": "database", "count": 1, "versions": [ "v3.2", "v3.0", "v2.6" ], "info": { x: 203, y: 102 } },
| {"name": "dbm", "type": "database", "count": 1, "versions": [ "v3.2", "v3.0", "v2.6" ], "info": { x: 203, y: 102 } },
| {"name": "dbm", "type": "database", "count": 2, "versions": [ "v3.2", "v3.0", "v2.6" ], "info": { x: 203, y: 1102 } },
| {"name": "dbm", "type": "database", "count": 3, "versions": [ "v3.2", "v3.0", "v2.6" ], "info": { x: 203, y: 2102 } },
| {"name": "dbm", "type": "database", "count": 4, "versions": [ "v3.2", "v3.0", "v2.6" ], "info": { x: 203, y: 3102 } },
| {"name": "dbm", "type": "database", "count": 5, "versions": [ "v3.2", "v3.0", "v2.6" ], "info": { x: 203, y: 4102 } },
| {"name": "dbm", "type": "database", "count": 6, "versions": [ "v3.2", "v3.0", "v2.6" ], "info": { x: 203, y: 5102 } },
| {"name": "dbm", "type": "database", "count": 7, "versions": [ "v3.2", "v3.0", "v2.6" ], "info": { x: 203, y: 6102 } },
| ]
|}
""".stripMargin
val document = Document.parse(json)
val bytes = toBsonBytes(document)
val hexFormat = HexFormat.of()
val hex = hexFormat.formatHex(bytes)
println(s"SIZE: ${bytes.size}, HEX: $hex")
val decoded = fromBsonBytes(bytes)
val decodedJson = decoded.toJson()
println(s"SIZE: ${decodedJson.getBytes(StandardCharsets.UTF_8).size}, JSON: ${decodedJson}")
def toBsonBytes(document: Document): Array[Byte] = {
val buffer = BasicOutputBuffer()
val write = BsonBinaryWriter(buffer)
new DocumentCodec().encode(write, document, EncoderContext.builder().build())
buffer.toByteArray()
}
def fromBsonBytes(bytes: Array[Byte]) = {
val reader = BsonBinaryReader(ByteBuffer.wrap(bytes))
val bsonDoc = BsonDocument.parse(new DocumentCodec().decode(reader, null).toJson())
Document.parse(bsonDoc.toJson())
} Output. The size is the compressed bson byte array length vs the json string (UTF-8) byte length EDIT: I modified it to compare the minimized json string to the bson byte length
|
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
The current implementation reads from the database, builds a tab delimited string in memory, then serializes that out. (Note that compression is enabled on the server, so the streaming size of the file is not necessarily the issue) This implementation is slow. Look in to improving the performance.
Options include:
The text was updated successfully, but these errors were encountered: