import glob
import sys
import os
import re
import datetime
import time
import requests
import asyncio

from google.cloud import speech_v1
from google.cloud import speech
import io
from google.cloud import storage

from scipy.io.wavfile import read as read_wav

GCI_JSON= '/home/ubuntu/google_cred/'
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/home/ubuntu/google_cred/formatrad-7ed5e4c68b97.json"
UPLOAD_AUDIO="/var/www/html/api/google_transcirption_test/"
def upload_to_bucket(blob_name, path_to_file, bucket_name):
    """ Upload data to a bucket"""

    # Explicitly use service account credentials by specifying the private key
    # file.
    client = storage_client = storage.Client.from_service_account_json(os.path.join(GCI_JSON, 'gcs_uri.json'))

    #print(buckets = list(storage_client.list_buckets())

    bucket = client.get_bucket(bucket_name)
    blob = bucket.blob(blob_name)
    blob.upload_from_filename(path_to_file)

    #returns a public url
    return blob.public_url
    

def sample_long_running_recognize(local_file_path,filename,transcript_path):
    """
    Transcribe a long audio file using asynchronous speech recognition

    Args:
      local_file_path Path to local audio file, e.g. /path/audio.wav
    """

    client = speech_v1.SpeechClient()

    # local_file_path = 'resources/brooklyn_bridge.raw'

    # The language of the supplied audio
    language_code = "en-US"

    # Sample rate in Hertz of the audio data sent
    sample_rate_hertz, data=read_wav(local_file_path)

    # Encoding of audio data sent. This sample sets this explicitly.
    # This field is optional for FLAC and WAV audio formats.
    storage_uri = upload_to_bucket(filename,local_file_path,'bucket_gcuri')
    #The path should be gs://<bucket_name>/<file_path_inside_bucket>.
    storage_uri = 'gs://bucket_gcuri/'+filename
    print("sotrage={}".format(storage_uri))
    encoding = speech.RecognitionConfig.AudioEncoding.LINEAR16
    diarization_config = speech.SpeakerDiarizationConfig(
      enable_speaker_diarization=True,
      min_speaker_count=1,
      max_speaker_count=1,
    )

    config = {
        "language_code": language_code,
        "sample_rate_hertz": sample_rate_hertz,
        "encoding": encoding,
        "model" : "video",
        "diarization_config":diarization_config

    }
    with io.open(local_file_path, "rb") as f:
        content = f.read()
    audio = {"uri": storage_uri}

    operation = client.long_running_recognize(config = config, audio = audio)

    print(u"Waiting for operation to complete...")
    response = operation.result()
    os.system(f"touch {UPLOAD_AUDIO}{filename.replace('.wav','.txt')}")
    outfile = open(f"{UPLOAD_AUDIO}{filename.replace('.wav','.txt')}", 'w')
    #print(copy_line)

    text_transcript_prof = ""
    text_transcript_stag = ''
    #words_info = result.alternatives[0].words
    # word_info.speaker_tag
    dict_speaker = {}
    dict_speaker[0] = ''
    dict_speaker[1] = ''
    for result in response.results:
        # First alternative is the most probable result
        alternative = result.alternatives[0]
        words_info = result.alternatives[0].words

        
        print(u"Transcript: {}".format(alternative.transcript))

        for word_info in words_info:
            print("word: '{}', speaker_tag: {}".format(word_info.word, word_info.speaker_tag))

            dict_speaker[int(word_info.speaker_tag)] = dict_speaker[int(word_info.speaker_tag)] + word_info.word + ' '


            if (int(word_info.speaker_tag) == 1):
                text_transcript_prof = text_transcript_prof + word_info.word + ' '
            elif (int(word_info.speaker_tag) == 0):
                text_transcript_stag = text_transcript_stag + word_info.word + ' '

        text_transcript_prof = text_transcript_prof + alternative.transcript + '\n'


    outfile.write(text_transcript_stag)

    outfile.close()
    return dict_speaker[0], dict_speaker[1]


sample_long_running_recognize("4626_10013_2023-11-27-18-46-442.wav", "4626_10013_2023-11-27-18-46-442.wav", "4626_10013_2023-11-27-18-46-442.txt")