
{"id":12596,"date":"2024-08-20T08:00:52","date_gmt":"2024-08-20T08:00:52","guid":{"rendered":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarising-optimizing-movie-selections-a-multi-task-multi-modal-framework-with-strategies-for-missing-modality-challenges-copy\/"},"modified":"2024-08-20T11:47:40","modified_gmt":"2024-08-20T11:47:40","slug":"summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection","status":"publish","type":"post","link":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/","title":{"rendered":"Summarizing \u2018Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection\u2019"},"content":{"rendered":"\t\t<div data-elementor-type=\"wp-post\" data-elementor-id=\"12596\" class=\"elementor elementor-12596\" data-elementor-post-type=\"post\">\n\t\t\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-cd44eb5 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"cd44eb5\" data-element_type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-9f11b70\" data-id=\"9f11b70\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-215a70e elementor-widget elementor-widget-heading\" data-id=\"215a70e\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">BLOGS<\/h2>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-28dc161 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"28dc161\" data-element_type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-63cf269\" data-id=\"63cf269\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-6837436 elementor-widget elementor-widget-heading\" data-id=\"6837436\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Summarizing \u2018Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection\u2019<\/h2>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-9bd1630 elementor-widget elementor-widget-text-editor\" data-id=\"9bd1630\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\tAyush Ghadiya, Purbayan Kar, Vishal Chudasama, Pankaj Wasnik\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-7a034cb elementor-widget elementor-widget-text-editor\" data-id=\"7a034cb\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>20<sup>th<\/sup> August 2024<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-a7d1e72 elementor-widget elementor-widget-image\" data-id=\"a7d1e72\" data-element_type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img fetchpriority=\"high\" decoding=\"async\" width=\"1600\" height=\"746\" src=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-12.png\" class=\"attachment-full size-full wp-image-12651\" alt=\"\" srcset=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-12.png 1600w, https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-12-300x140.png 300w, https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-12-1024x477.png 1024w, https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-12-768x358.png 768w, https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-12-1536x716.png 1536w, https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-12-1568x731.png 1568w\" sizes=\"(max-width: 1600px) 100vw, 1600px\" style=\"width:100%;height:46.63%;max-width:1600px\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-acbeaeb elementor-widget elementor-widget-text-editor\" data-id=\"acbeaeb\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p><span style=\"font-weight: 400;\">Overview of proposed framework<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-9202657 elementor-widget elementor-widget-text-editor\" data-id=\"9202657\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p><span style=\"font-weight: 400;\">Vishal Chudasama summarises paper titled <\/span><a href=\"https:\/\/openaccess.thecvf.com\/content\/CVPR2024W\/MULA\/papers\/Ghadiya_Cross-Modal_Fusion_and_Attention_Mechanism_for_Weakly_Supervised_Video_Anomaly_CVPRW_2024_paper.pdf\"><span style=\"font-weight: 400;\">Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection<\/span><\/a><span style=\"font-weight: 400;\"> co-authored by Ayush Ghadiya, Purbayan Kar ,Vishal Chudasama, Pankaj Wasnik accepted at the <\/span><a href=\"https:\/\/mula-workshop.github.io\/\"><b>CVPR 2024 7<\/b><b>th<\/b><b>\u00a0MULA Workshop | June 2024<\/b><\/a><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-f0a3e28 elementor-widget elementor-widget-text-editor\" data-id=\"f0a3e28\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<h4><b>Introduction<\/b><\/h4>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-d95d9a3 elementor-widget elementor-widget-text-editor\" data-id=\"d95d9a3\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p><span style=\"font-weight: 400;\">Weakly supervised video anomaly detection (WS-VAD) is essential for identifying anomalous events in videos, such as violence and nudity, using minimal labelling effort. This task is crucial for content moderation and surveillance applications, ensuring safer online environments and effective security systems. While deep learning and multi-model learning methods have significantly improved WS-VAD tasks&#8217; performance, they still face many significant challenges.\u00a0One of the challenges in WS-VAD is the imbalanced modality information and the inconsistent discrimination between normal and abnormal features. This imbalance often hampers the detection accuracy, as the model struggles to appropriately weigh the contributions of different modalities (audio and visual).\u00a0<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-27a8d90 elementor-widget elementor-widget-text-editor\" data-id=\"27a8d90\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p><b>To address these issues, we introduce two novel components<\/b><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-a51851b elementor-widget elementor-widget-text-editor\" data-id=\"a51851b\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<ul>\n \t<li style=\"font-weight: 400;\" aria-level=\"1\"><b>Cross-modal Fusion Adapter (CFA)<\/b><span style=\"font-weight: 400;\"> dynamically selects and enhances audio-visual features highly relevant to the visual modality. It adjusts the contribution of each modality based on the importance of audio features relative to the visual modality, thereby overcoming modality imbalance.<\/span><\/li>\n \t<li style=\"font-weight: 400;\" aria-level=\"1\"><b>Hyperbolic Lorentzian Graph Attention (HLGAtt)<\/b><span style=\"font-weight: 400;\"> operates within hyperbolic space to learn layer-wise curvature parameters, which aids in more effectively distinguishing between normal and abnormal features.\n<\/span><\/li>\n<\/ul>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-580c6ea elementor-widget elementor-widget-text-editor\" data-id=\"580c6ea\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<h4><b>Key Results<\/b><\/h4>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-e94b615 elementor-widget elementor-widget-image\" data-id=\"e94b615\" data-element_type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img decoding=\"async\" data-src=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/elementor\/thumbs\/blog-2-qsvphxt667q0qenc9akbyxlfup573h5vepafhaby68.png\" title=\"blog-2\" alt=\"blog-2\" src=\"data:image\/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==\" class=\"lazyload\" style=\"--smush-placeholder-width: 800px; --smush-placeholder-aspect-ratio: 800\/608;\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-3b2614d elementor-widget elementor-widget-text-editor\" data-id=\"3b2614d\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p><span style=\"font-weight: 400;\">Table: Comparison with state-of-the-art methods on XD-Violence Dataset<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-fa43ee6 elementor-widget elementor-widget-image\" data-id=\"fa43ee6\" data-element_type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img decoding=\"async\" width=\"693\" height=\"365\" data-src=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-3.png\" class=\"attachment-full size-full wp-image-12610 lazyload\" alt=\"\" data-srcset=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-3.png 693w, https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-3-300x158.png 300w\" data-sizes=\"(max-width: 693px) 100vw, 693px\" style=\"--smush-placeholder-width: 693px; --smush-placeholder-aspect-ratio: 693\/365;width:100%;height:52.67%;max-width:693px\" src=\"data:image\/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-8af8320 elementor-widget elementor-widget-text-editor\" data-id=\"8af8320\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p><span style=\"font-weight: 400;\">Fig: Visual comparison in terms of anomaly score curves on sample video for Violence Detection task.\u00a0 Here, yellow regions are the temporal ground-truths.<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-b204e23 elementor-widget elementor-widget-text-editor\" data-id=\"b204e23\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<h4><b>Conclusion<\/b><\/h4>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-df93121 elementor-widget elementor-widget-text-editor\" data-id=\"df93121\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p><span style=\"font-weight: 400;\">We present a new WS-VAD framework with a Cross-modal Fusion Adapter (CFA) module and a Hyperbolic Lorentzian Graph Attention (HLGAtt) module to detect anomaly events such as violence and nudity accurately. The proposed CFA module addresses the imbalanced modality information issue and effectively facilitates multi-modal interaction by dynamically selecting the relevant audio features with corresponding visual features. Additionally, the proposed HLGAtt module captures the hierarchical relationships within normal and abnormal representations, thereby improving the accuracy of separating normal and abnormal features. The proposed framework outperforms existing state-of-the-art methods on violence and nudity detection tasks.<\/span><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-0362925 elementor-widget elementor-widget-text-editor\" data-id=\"0362925\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>To know more about Sony Research India\u2019s Research Publications, visit the \u2018Publications\u2019 section on our \u2018Open Innovation\u2019s page: <a href=\"https:\/\/www.sonyresearchindia.com\/open-innovation\/\" target=\"_blank\" rel=\"noopener\">Open Innovation with Sony R&amp;D \u2013 Sony Research India<\/a><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-c0518a1 elementor-hidden-desktop elementor-hidden-tablet elementor-hidden-mobile elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"c0518a1\" data-element_type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-33 elementor-top-column elementor-element elementor-element-b15be70\" data-id=\"b15be70\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap\">\n\t\t\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<div class=\"elementor-column elementor-col-33 elementor-top-column elementor-element elementor-element-55dd72b\" data-id=\"55dd72b\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-e06d72d elementor-widget elementor-widget-image\" data-id=\"e06d72d\" data-element_type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img decoding=\"async\" width=\"512\" height=\"322\" data-src=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/02\/19th-Cover-Image-2.png\" class=\"attachment-full size-full wp-image-11786 lazyload\" alt=\"\" data-srcset=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/02\/19th-Cover-Image-2.png 512w, https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/02\/19th-Cover-Image-2-300x189.png 300w\" data-sizes=\"(max-width: 512px) 100vw, 512px\" style=\"--smush-placeholder-width: 512px; --smush-placeholder-aspect-ratio: 512\/322;width:100%;height:62.89%;max-width:512px\" src=\"data:image\/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<div class=\"elementor-column elementor-col-33 elementor-top-column elementor-element elementor-element-fd52b32\" data-id=\"fd52b32\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap\">\n\t\t\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-9b69060 elementor-hidden-desktop elementor-hidden-tablet elementor-hidden-mobile elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"9b69060\" data-element_type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-cfbe302\" data-id=\"cfbe302\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-6d045fb elementor-widget elementor-widget-text-editor\" data-id=\"6d045fb\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\tThe introduced modules and techniques help the proposed method to align known class\nrepresentations effectively so that it can detect the unknown objects accurately. To validate\nthis, we carried out extensive experiments &#038; ablation studies and found that the proposed\nmethod outperforms existing SOTA methods with significant improvement on the MS-COCO\n&#038; PASCAL VOC dataset for the OSOD task.\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-f97c4c4 elementor-widget elementor-widget-text-editor\" data-id=\"f97c4c4\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\tTo know more about the paper, visit: <a href=\"https:\/\/openaccess.thecvf.com\/content\/WACV2024\/papers\/Sarkar_Open-Set_Object_Detection_by_Aligning_Known_Class_Representations_WACV_2024_paper.pdf\" target=\"_blank\" rel=\"noopener\">Open-Set Object Detection by Aligning Known Class\nRepresentations (thecvf.com)<\/a>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-9e2f9cc elementor-widget elementor-widget-text-editor\" data-id=\"9e2f9cc\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\tTo know more about Sony Research India\u2019s Research Publications, visit the \u2018Publications\u2019\nsection on our \u2018Open Innovation\u2019s page: <a href=\"https:\/\/www.sonyresearchindia.com\/open-innovation\/\" target=\"_blank\" rel=\"noopener\">Open Innovation with Sony R&amp;D \u2013 Sony Research\nIndia<\/a>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<\/div>\n\t\t","protected":false},"excerpt":{"rendered":"<p>Vishal Chudasama summarises paper titled Cross-Modal Fusion&#8230;<\/p>\n","protected":false},"author":1,"featured_media":12602,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"elementor_header_footer","format":"standard","meta":{"footnotes":""},"categories":[22,17],"tags":[],"class_list":["post-12596","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-all-blogs","category-technology","entry"],"yoast_head":"\n<title>Summarizing \u2018Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection\u2019 - Sony Research India<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Summarizing \u2018Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection\u2019 - Sony Research India\" \/>\n<meta property=\"og:description\" content=\"Vishal Chudasama summarises paper titled Cross-Modal Fusion...\" \/>\n<meta property=\"og:url\" content=\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/\" \/>\n<meta property=\"og:site_name\" content=\"Sony Research India\" \/>\n<meta property=\"article:published_time\" content=\"2024-08-20T08:00:52+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2024-08-20T11:47:40+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-1.png\" \/>\n\t<meta property=\"og:image:width\" content=\"1725\" \/>\n\t<meta property=\"og:image:height\" content=\"805\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/png\" \/>\n<meta name=\"author\" content=\"sri_user@2021\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"sri_user@2021\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"3 minutes\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"Article\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#article\",\"isPartOf\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/\"},\"author\":{\"name\":\"sri_user@2021\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/person\/589cf1e285a7c37cf0cb9feba7ae4338\"},\"headline\":\"Summarizing \u2018Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection\u2019\",\"datePublished\":\"2024-08-20T08:00:52+00:00\",\"dateModified\":\"2024-08-20T11:47:40+00:00\",\"mainEntityOfPage\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/\"},\"wordCount\":511,\"publisher\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#organization\"},\"image\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#primaryimage\"},\"thumbnailUrl\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-1.png\",\"articleSection\":[\"All Blogs\",\"Technology\"],\"inLanguage\":\"en-US\"},{\"@type\":\"WebPage\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/\",\"url\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/\",\"name\":\"Summarizing \u2018Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection\u2019 - Sony Research India\",\"isPartOf\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#primaryimage\"},\"image\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#primaryimage\"},\"thumbnailUrl\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-1.png\",\"datePublished\":\"2024-08-20T08:00:52+00:00\",\"dateModified\":\"2024-08-20T11:47:40+00:00\",\"breadcrumb\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#breadcrumb\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#primaryimage\",\"url\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-1.png\",\"contentUrl\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-1.png\",\"width\":1725,\"height\":805},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Summarizing \u2018Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection\u2019\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#website\",\"url\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/\",\"name\":\"Sony Research India\",\"description\":\"\",\"publisher\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#organization\"},\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en-US\"},{\"@type\":\"Organization\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#organization\",\"name\":\"sonyresearchindia\",\"url\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/\",\"logo\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/logo\/image\/\",\"url\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2023\/03\/Sony_Logo.png\",\"contentUrl\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2023\/03\/Sony_Logo.png\",\"width\":168,\"height\":31,\"caption\":\"sonyresearchindia\"},\"image\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/logo\/image\/\"}},{\"@type\":\"Person\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/person\/589cf1e285a7c37cf0cb9feba7ae4338\",\"name\":\"sri_user@2021\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/e0c9edcfb42567c720cc449d4b1e0812298e8172a5a7e4296127a0adba7e705b?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/e0c9edcfb42567c720cc449d4b1e0812298e8172a5a7e4296127a0adba7e705b?s=96&d=mm&r=g\",\"caption\":\"sri_user@2021\"},\"sameAs\":[\"http:\/\/whiteriversmediasolutions.com\/staging\/SRI\"]}]}<\/script>\n","yoast_head_json":{"title":"Summarizing \u2018Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection\u2019 - Sony Research India","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/","og_locale":"en_US","og_type":"article","og_title":"Summarizing \u2018Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection\u2019 - Sony Research India","og_description":"Vishal Chudasama summarises paper titled Cross-Modal Fusion...","og_url":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/","og_site_name":"Sony Research India","article_published_time":"2024-08-20T08:00:52+00:00","article_modified_time":"2024-08-20T11:47:40+00:00","og_image":[{"width":1725,"height":805,"url":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-1.png","type":"image\/png"}],"author":"sri_user@2021","twitter_card":"summary_large_image","twitter_misc":{"Written by":"sri_user@2021","Est. reading time":"3 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#article","isPartOf":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/"},"author":{"name":"sri_user@2021","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/person\/589cf1e285a7c37cf0cb9feba7ae4338"},"headline":"Summarizing \u2018Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection\u2019","datePublished":"2024-08-20T08:00:52+00:00","dateModified":"2024-08-20T11:47:40+00:00","mainEntityOfPage":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/"},"wordCount":511,"publisher":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#organization"},"image":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#primaryimage"},"thumbnailUrl":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-1.png","articleSection":["All Blogs","Technology"],"inLanguage":"en-US"},{"@type":"WebPage","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/","url":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/","name":"Summarizing \u2018Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection\u2019 - Sony Research India","isPartOf":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#website"},"primaryImageOfPage":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#primaryimage"},"image":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#primaryimage"},"thumbnailUrl":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-1.png","datePublished":"2024-08-20T08:00:52+00:00","dateModified":"2024-08-20T11:47:40+00:00","breadcrumb":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#primaryimage","url":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-1.png","contentUrl":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/08\/blog-1.png","width":1725,"height":805},{"@type":"BreadcrumbList","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-cross-modal-fusion-and-attention-mechanism-for-weakly-supervised-video-anomaly-detection\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/whiteriversmediasolutions.com\/Sony\/"},{"@type":"ListItem","position":2,"name":"Summarizing \u2018Cross-Modal Fusion and Attention Mechanism for Weakly Supervised Video Anomaly Detection\u2019"}]},{"@type":"WebSite","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#website","url":"https:\/\/whiteriversmediasolutions.com\/Sony\/","name":"Sony Research India","description":"","publisher":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/whiteriversmediasolutions.com\/Sony\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#organization","name":"sonyresearchindia","url":"https:\/\/whiteriversmediasolutions.com\/Sony\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/logo\/image\/","url":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2023\/03\/Sony_Logo.png","contentUrl":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2023\/03\/Sony_Logo.png","width":168,"height":31,"caption":"sonyresearchindia"},"image":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/logo\/image\/"}},{"@type":"Person","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/person\/589cf1e285a7c37cf0cb9feba7ae4338","name":"sri_user@2021","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/person\/image\/","url":"https:\/\/secure.gravatar.com\/avatar\/e0c9edcfb42567c720cc449d4b1e0812298e8172a5a7e4296127a0adba7e705b?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/e0c9edcfb42567c720cc449d4b1e0812298e8172a5a7e4296127a0adba7e705b?s=96&d=mm&r=g","caption":"sri_user@2021"},"sameAs":["http:\/\/whiteriversmediasolutions.com\/staging\/SRI"]}]}},"_links":{"self":[{"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/posts\/12596","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/comments?post=12596"}],"version-history":[{"count":54,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/posts\/12596\/revisions"}],"predecessor-version":[{"id":12657,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/posts\/12596\/revisions\/12657"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/media\/12602"}],"wp:attachment":[{"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/media?parent=12596"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/categories?post=12596"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/tags?post=12596"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}