
{"id":13180,"date":"2025-05-20T10:33:43","date_gmt":"2025-05-20T10:33:43","guid":{"rendered":"https:\/\/whiteriversmediasolutions.com\/Sony\/summarizing-faster-machine-translation-ensembling-with-reinforcement-learning-and-competitive-correction-copy\/"},"modified":"2025-05-20T10:56:38","modified_gmt":"2025-05-20T10:56:38","slug":"enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization","status":"publish","type":"post","link":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/","title":{"rendered":"Enhancing Whisper\u2019s Accuracy and Speed for Indian Languages through Prompt-Tuning and Tokenization"},"content":{"rendered":"\t\t<div data-elementor-type=\"wp-post\" data-elementor-id=\"13180\" class=\"elementor elementor-13180\" data-elementor-post-type=\"post\">\n\t\t\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-cd44eb5 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"cd44eb5\" data-element_type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-9f11b70\" data-id=\"9f11b70\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-215a70e elementor-widget elementor-widget-heading\" data-id=\"215a70e\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">BLOGS<\/h2>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-28dc161 elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"28dc161\" data-element_type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-63cf269\" data-id=\"63cf269\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-6837436 elementor-widget elementor-widget-heading\" data-id=\"6837436\" data-element_type=\"widget\" data-widget_type=\"heading.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t<h2 class=\"elementor-heading-title elementor-size-default\">Summarizing \u2018Faster Machine Translation Ensembling with Reinforcement Learning and Competitive Correction\u2019<\/h2>\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-9bd1630 elementor-widget elementor-widget-text-editor\" data-id=\"9bd1630\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\tKumud Tripathi, Raj Gothi, Pankaj Wasnik\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-7a034cb elementor-hidden-desktop elementor-hidden-tablet elementor-hidden-mobile elementor-widget elementor-widget-text-editor\" data-id=\"7a034cb\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>30<sup>th<\/sup> September 2024<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-a7d1e72 elementor-widget elementor-widget-image\" data-id=\"a7d1e72\" data-element_type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img fetchpriority=\"high\" decoding=\"async\" width=\"635\" height=\"521\" src=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/enhancing1.png\" class=\"attachment-medium_large size-medium_large wp-image-13183\" alt=\"\" srcset=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/enhancing1.png 635w, https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/enhancing1-300x246.png 300w\" sizes=\"(max-width: 635px) 100vw, 635px\" style=\"width:100%;height:82.05%;max-width:635px\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-7ed0fea elementor-widget elementor-widget-text-editor\" data-id=\"7ed0fea\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p style=\"text-align: center;\">Overview of proposed framework<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-9202657 elementor-widget elementor-widget-text-editor\" data-id=\"9202657\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>Kumud Tripathi summarises paper titled <strong>Enhancing Whisper\u2019s Accuracy and Speed for Indian Languages through Prompt-Tuning and Tokenization<\/strong> co-authored by Raj Gothi, Pankaj Wasnik accepted at the <a href=\"https:\/\/2025.ieeeicassp.org\/\">ICASSP 2025<\/a>.<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-f0a3e28 elementor-widget elementor-widget-text-editor\" data-id=\"f0a3e28\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<h4><strong>Introduction: <\/strong><\/h4>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-d95d9a3 elementor-widget elementor-widget-text-editor\" data-id=\"d95d9a3\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>Advancements in automatic speech recognition (ASR) have been driven by large foundational models like Whisper, which leverage multilingual speech recognition (MSR) to improve accuracy by utilizing linguistic similarities across languages. Modified Whisper models for Indian languages address these challenges by incorporating techniques like prompting to enhance recognition accuracy. Despite these advancements, Whisper\u2019s effectiveness in Indian languages is hampered by deficiencies in tokenization. The tokenization process, which is crucial for ASR speed, affects low-resource languages more heavily. High-resource languages benefit from extensive token sets, whereas low-resource languages face slower inference times due to fewer tokens in the pre-trained Whisper tokenizer.<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-1246231 elementor-widget elementor-widget-text-editor\" data-id=\"1246231\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<strong>To address these issues, we introduce two innovative strategies:<\/strong>\n<ul>\n \t<li><strong>Prompt-tuning with language family information: <\/strong>We utilize prompt-tuning with language family information to reduce Word Error Rate (WER) by addressing phonetic and linguistic similarities.\n\n<\/li>\n \t<li><strong>\tCustomized Tokenizer: <\/strong>We introduce a customized tokenizer for Indian languages to improve the Whisper\u2019s efficiency during the inference time.<\/li>\n<\/ul>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-424bb7e elementor-widget elementor-widget-text-editor\" data-id=\"424bb7e\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<h4><strong>Key Results:<\/strong><\/h4>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-8981a35 elementor-widget elementor-widget-image\" data-id=\"8981a35\" data-element_type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img decoding=\"async\" width=\"750\" height=\"145\" data-src=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/enhancing2-768x148.png\" class=\"attachment-medium_large size-medium_large wp-image-13184 lazyload\" alt=\"\" data-srcset=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/enhancing2-768x148.png 768w, https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/enhancing2-300x58.png 300w, https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/enhancing2.png 966w\" data-sizes=\"(max-width: 750px) 100vw, 750px\" style=\"--smush-placeholder-width: 750px; --smush-placeholder-aspect-ratio: 750\/145;width:100%;height:19.25%;max-width:966px\" src=\"data:image\/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-1556a07 elementor-widget elementor-widget-text-editor\" data-id=\"1556a07\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>Table: WER (in %) and inference time (in min.) on Kathbath using Whisper Medium-based baseline and proposed models.<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-d5416e1 elementor-widget elementor-widget-text-editor\" data-id=\"d5416e1\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<h4><strong>Conclusion<\/strong>:<\/h4>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-9aff2cc elementor-widget elementor-widget-text-editor\" data-id=\"9aff2cc\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>We demonstrate a significant advancement in multilingual speech recognition for Indian languages using the Whisper model. We have successfully improved the model accuracy for underrepresented Indian languages. By incorporating prompt-tuning with language family information, we leveraged linguistically related languages. Additionally, we introduced a new tokenizer to enhance the model\u2019s efficiency in terms of inference time by reducing the number of generated tokens without compromising performance. Our consistently experiments show that both prompt fine-tuning and the proposed tokenizer individually outperform baseline ASR models, and their combination achieves an optimal balance between WER and inference speed. The resulting efficient Whisper model provides a flexible solution, enabling users to adjust the trade-off between accuracy and speed according to their specific application needs.<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-67d4f0b elementor-widget elementor-widget-text-editor\" data-id=\"67d4f0b\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>To know more about Sony Research India\u2019s Research Publications, visit the \u2018Publications\u2019 section on our \u2018Open Innovation\u2019s page:\u00a0<a href=\"https:\/\/www.sonyresearchindia.com\/open-innovation\/\">Open Innovation with Sony R&amp;D \u2013 Sony Research India<\/a><\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-0362925 elementor-hidden-desktop elementor-hidden-tablet elementor-hidden-mobile elementor-widget elementor-widget-text-editor\" data-id=\"0362925\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t<p>In most of the cases, it has been found that Content Driven sessions outperform the time driven sessions. The results are obtained on 6 baselines: STAMP, NARM, GRU4Rec, CD-HRNN, Tr4Rec on datasets like Movielens (Movies), GoodRead Book, LastFM (Music), Amazon (e-commerce).<\/p>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-c0518a1 elementor-hidden-desktop elementor-hidden-tablet elementor-hidden-mobile elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"c0518a1\" data-element_type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-33 elementor-top-column elementor-element elementor-element-b15be70\" data-id=\"b15be70\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap\">\n\t\t\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<div class=\"elementor-column elementor-col-33 elementor-top-column elementor-element elementor-element-55dd72b\" data-id=\"55dd72b\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-e06d72d elementor-widget elementor-widget-image\" data-id=\"e06d72d\" data-element_type=\"widget\" data-widget_type=\"image.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<img decoding=\"async\" width=\"512\" height=\"322\" data-src=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/02\/19th-Cover-Image-2.png\" class=\"attachment-full size-full wp-image-11786 lazyload\" alt=\"\" data-srcset=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/02\/19th-Cover-Image-2.png 512w, https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2024\/02\/19th-Cover-Image-2-300x189.png 300w\" data-sizes=\"(max-width: 512px) 100vw, 512px\" style=\"--smush-placeholder-width: 512px; --smush-placeholder-aspect-ratio: 512\/322;width:100%;height:62.89%;max-width:512px\" src=\"data:image\/gif;base64,R0lGODlhAQABAAAAACH5BAEKAAEALAAAAAABAAEAAAICTAEAOw==\" \/>\t\t\t\t\t\t\t\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t<div class=\"elementor-column elementor-col-33 elementor-top-column elementor-element elementor-element-fd52b32\" data-id=\"fd52b32\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap\">\n\t\t\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<section class=\"elementor-section elementor-top-section elementor-element elementor-element-9b69060 elementor-hidden-desktop elementor-hidden-tablet elementor-hidden-mobile elementor-section-boxed elementor-section-height-default elementor-section-height-default\" data-id=\"9b69060\" data-element_type=\"section\">\n\t\t\t\t\t\t<div class=\"elementor-container elementor-column-gap-default\">\n\t\t\t\t\t<div class=\"elementor-column elementor-col-100 elementor-top-column elementor-element elementor-element-cfbe302\" data-id=\"cfbe302\" data-element_type=\"column\">\n\t\t\t<div class=\"elementor-widget-wrap elementor-element-populated\">\n\t\t\t\t\t\t<div class=\"elementor-element elementor-element-6d045fb elementor-widget elementor-widget-text-editor\" data-id=\"6d045fb\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\tThe introduced modules and techniques help the proposed method to align known class\nrepresentations effectively so that it can detect the unknown objects accurately. To validate\nthis, we carried out extensive experiments &#038; ablation studies and found that the proposed\nmethod outperforms existing SOTA methods with significant improvement on the MS-COCO\n&#038; PASCAL VOC dataset for the OSOD task.\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-f97c4c4 elementor-widget elementor-widget-text-editor\" data-id=\"f97c4c4\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\tTo know more about the paper, visit: <a href=\"https:\/\/openaccess.thecvf.com\/content\/WACV2024\/papers\/Sarkar_Open-Set_Object_Detection_by_Aligning_Known_Class_Representations_WACV_2024_paper.pdf\" target=\"_blank\" rel=\"noopener\">Open-Set Object Detection by Aligning Known Class\nRepresentations (thecvf.com)<\/a>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t<div class=\"elementor-element elementor-element-9e2f9cc elementor-widget elementor-widget-text-editor\" data-id=\"9e2f9cc\" data-element_type=\"widget\" data-widget_type=\"text-editor.default\">\n\t\t\t\t<div class=\"elementor-widget-container\">\n\t\t\t\t\t\t\t\t\tTo know more about Sony Research India\u2019s Research Publications, visit the \u2018Publications\u2019\nsection on our \u2018Open Innovation\u2019s page: <a href=\"https:\/\/www.sonyresearchindia.com\/open-innovation\/\" target=\"_blank\" rel=\"noopener\">Open Innovation with Sony R&amp;D \u2013 Sony Research\nIndia<\/a>\t\t\t\t\t\t\t\t<\/div>\n\t\t\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/div>\n\t\t\t\t\t<\/div>\n\t\t<\/section>\n\t\t\t\t<\/div>\n\t\t","protected":false},"excerpt":{"rendered":"<p>Kumud Tripathi summarises paper titled Enhancing Whisper\u2019s&#8230;<\/p>\n","protected":false},"author":1,"featured_media":13188,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"elementor_header_footer","format":"standard","meta":{"footnotes":""},"categories":[22,17],"tags":[],"class_list":["post-13180","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-all-blogs","category-technology","entry"],"yoast_head":"\n<title>Enhancing Whisper\u2019s Accuracy and Speed for Indian Languages through Prompt-Tuning and Tokenization - Sony Research India<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/\" \/>\n<meta property=\"og:locale\" content=\"en_US\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"Enhancing Whisper\u2019s Accuracy and Speed for Indian Languages through Prompt-Tuning and Tokenization - Sony Research India\" \/>\n<meta property=\"og:description\" content=\"Kumud Tripathi summarises paper titled Enhancing Whisper\u2019s...\" \/>\n<meta property=\"og:url\" content=\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/\" \/>\n<meta property=\"og:site_name\" content=\"Sony Research India\" \/>\n<meta property=\"article:published_time\" content=\"2025-05-20T10:33:43+00:00\" \/>\n<meta property=\"article:modified_time\" content=\"2025-05-20T10:56:38+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/BlogCover_-Kumud_-ICASSP-25.png\" \/>\n\t<meta property=\"og:image:width\" content=\"380\" \/>\n\t<meta property=\"og:image:height\" content=\"190\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/png\" \/>\n<meta name=\"author\" content=\"sri_user@2021\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"Written by\" \/>\n\t<meta name=\"twitter:data1\" content=\"sri_user@2021\" \/>\n\t<meta name=\"twitter:label2\" content=\"Est. reading time\" \/>\n\t<meta name=\"twitter:data2\" content=\"4 minutes\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\/\/schema.org\",\"@graph\":[{\"@type\":\"Article\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#article\",\"isPartOf\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/\"},\"author\":{\"name\":\"sri_user@2021\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/person\/589cf1e285a7c37cf0cb9feba7ae4338\"},\"headline\":\"Enhancing Whisper\u2019s Accuracy and Speed for Indian Languages through Prompt-Tuning and Tokenization\",\"datePublished\":\"2025-05-20T10:33:43+00:00\",\"dateModified\":\"2025-05-20T10:56:38+00:00\",\"mainEntityOfPage\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/\"},\"wordCount\":537,\"publisher\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#organization\"},\"image\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#primaryimage\"},\"thumbnailUrl\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/BlogCover_-Kumud_-ICASSP-25.png\",\"articleSection\":[\"All Blogs\",\"Technology\"],\"inLanguage\":\"en-US\"},{\"@type\":\"WebPage\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/\",\"url\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/\",\"name\":\"Enhancing Whisper\u2019s Accuracy and Speed for Indian Languages through Prompt-Tuning and Tokenization - Sony Research India\",\"isPartOf\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#primaryimage\"},\"image\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#primaryimage\"},\"thumbnailUrl\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/BlogCover_-Kumud_-ICASSP-25.png\",\"datePublished\":\"2025-05-20T10:33:43+00:00\",\"dateModified\":\"2025-05-20T10:56:38+00:00\",\"breadcrumb\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#breadcrumb\"},\"inLanguage\":\"en-US\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#primaryimage\",\"url\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/BlogCover_-Kumud_-ICASSP-25.png\",\"contentUrl\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/BlogCover_-Kumud_-ICASSP-25.png\",\"width\":380,\"height\":190},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"Home\",\"item\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"Enhancing Whisper\u2019s Accuracy and Speed for Indian Languages through Prompt-Tuning and Tokenization\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#website\",\"url\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/\",\"name\":\"Sony Research India\",\"description\":\"\",\"publisher\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#organization\"},\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"en-US\"},{\"@type\":\"Organization\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#organization\",\"name\":\"sonyresearchindia\",\"url\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/\",\"logo\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/logo\/image\/\",\"url\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2023\/03\/Sony_Logo.png\",\"contentUrl\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2023\/03\/Sony_Logo.png\",\"width\":168,\"height\":31,\"caption\":\"sonyresearchindia\"},\"image\":{\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/logo\/image\/\"}},{\"@type\":\"Person\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/person\/589cf1e285a7c37cf0cb9feba7ae4338\",\"name\":\"sri_user@2021\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"en-US\",\"@id\":\"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/person\/image\/\",\"url\":\"https:\/\/secure.gravatar.com\/avatar\/e0c9edcfb42567c720cc449d4b1e0812298e8172a5a7e4296127a0adba7e705b?s=96&d=mm&r=g\",\"contentUrl\":\"https:\/\/secure.gravatar.com\/avatar\/e0c9edcfb42567c720cc449d4b1e0812298e8172a5a7e4296127a0adba7e705b?s=96&d=mm&r=g\",\"caption\":\"sri_user@2021\"},\"sameAs\":[\"http:\/\/whiteriversmediasolutions.com\/staging\/SRI\"]}]}<\/script>\n","yoast_head_json":{"title":"Enhancing Whisper\u2019s Accuracy and Speed for Indian Languages through Prompt-Tuning and Tokenization - Sony Research India","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/","og_locale":"en_US","og_type":"article","og_title":"Enhancing Whisper\u2019s Accuracy and Speed for Indian Languages through Prompt-Tuning and Tokenization - Sony Research India","og_description":"Kumud Tripathi summarises paper titled Enhancing Whisper\u2019s...","og_url":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/","og_site_name":"Sony Research India","article_published_time":"2025-05-20T10:33:43+00:00","article_modified_time":"2025-05-20T10:56:38+00:00","og_image":[{"width":380,"height":190,"url":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/BlogCover_-Kumud_-ICASSP-25.png","type":"image\/png"}],"author":"sri_user@2021","twitter_card":"summary_large_image","twitter_misc":{"Written by":"sri_user@2021","Est. reading time":"4 minutes"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#article","isPartOf":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/"},"author":{"name":"sri_user@2021","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/person\/589cf1e285a7c37cf0cb9feba7ae4338"},"headline":"Enhancing Whisper\u2019s Accuracy and Speed for Indian Languages through Prompt-Tuning and Tokenization","datePublished":"2025-05-20T10:33:43+00:00","dateModified":"2025-05-20T10:56:38+00:00","mainEntityOfPage":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/"},"wordCount":537,"publisher":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#organization"},"image":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#primaryimage"},"thumbnailUrl":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/BlogCover_-Kumud_-ICASSP-25.png","articleSection":["All Blogs","Technology"],"inLanguage":"en-US"},{"@type":"WebPage","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/","url":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/","name":"Enhancing Whisper\u2019s Accuracy and Speed for Indian Languages through Prompt-Tuning and Tokenization - Sony Research India","isPartOf":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#website"},"primaryImageOfPage":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#primaryimage"},"image":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#primaryimage"},"thumbnailUrl":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/BlogCover_-Kumud_-ICASSP-25.png","datePublished":"2025-05-20T10:33:43+00:00","dateModified":"2025-05-20T10:56:38+00:00","breadcrumb":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#breadcrumb"},"inLanguage":"en-US","potentialAction":[{"@type":"ReadAction","target":["https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/"]}]},{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#primaryimage","url":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/BlogCover_-Kumud_-ICASSP-25.png","contentUrl":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2025\/05\/BlogCover_-Kumud_-ICASSP-25.png","width":380,"height":190},{"@type":"BreadcrumbList","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/enhancing-whispers-accuracy-and-speed-for-indian-languages-through-prompt-tuning-and-tokenization\/#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"Home","item":"https:\/\/whiteriversmediasolutions.com\/Sony\/"},{"@type":"ListItem","position":2,"name":"Enhancing Whisper\u2019s Accuracy and Speed for Indian Languages through Prompt-Tuning and Tokenization"}]},{"@type":"WebSite","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#website","url":"https:\/\/whiteriversmediasolutions.com\/Sony\/","name":"Sony Research India","description":"","publisher":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#organization"},"potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/whiteriversmediasolutions.com\/Sony\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"en-US"},{"@type":"Organization","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#organization","name":"sonyresearchindia","url":"https:\/\/whiteriversmediasolutions.com\/Sony\/","logo":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/logo\/image\/","url":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2023\/03\/Sony_Logo.png","contentUrl":"https:\/\/whiteriversmediasolutions.com\/Sony\/uvaftoap\/2023\/03\/Sony_Logo.png","width":168,"height":31,"caption":"sonyresearchindia"},"image":{"@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/logo\/image\/"}},{"@type":"Person","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/person\/589cf1e285a7c37cf0cb9feba7ae4338","name":"sri_user@2021","image":{"@type":"ImageObject","inLanguage":"en-US","@id":"https:\/\/whiteriversmediasolutions.com\/Sony\/#\/schema\/person\/image\/","url":"https:\/\/secure.gravatar.com\/avatar\/e0c9edcfb42567c720cc449d4b1e0812298e8172a5a7e4296127a0adba7e705b?s=96&d=mm&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/e0c9edcfb42567c720cc449d4b1e0812298e8172a5a7e4296127a0adba7e705b?s=96&d=mm&r=g","caption":"sri_user@2021"},"sameAs":["http:\/\/whiteriversmediasolutions.com\/staging\/SRI"]}]}},"_links":{"self":[{"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/posts\/13180","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/comments?post=13180"}],"version-history":[{"count":21,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/posts\/13180\/revisions"}],"predecessor-version":[{"id":13216,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/posts\/13180\/revisions\/13216"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/media\/13188"}],"wp:attachment":[{"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/media?parent=13180"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/categories?post=13180"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/whiteriversmediasolutions.com\/Sony\/wp-json\/wp\/v2\/tags?post=13180"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}