Wednesday, 25 August 2021

Download the IBM Watson Assistant Conversation Logs

Logs Python script

The export_logs.py file is a Python script that you can use to export logs from a workspace, and convert them into CSV format.

Steps before downloading the logs

  1. You must have the python install in your local machine.
  2. Once your python machine will get executed, you must have to download the python libraries to proceed further.
  3. The respective libraries are like pandas, Watson_devloper_cloud. 
  4. Install these libraries with the command of pip install pandas and pip install ibm-watson
  5. If you get any other error, just put it over google and follow the stack overflow for the workaround. The errors are usually related to module. Just simply install it to your machine by using install command.
  6. Command to execute the python script:  python export_logs.py <IBM Workspace SKILL ID> --apikey <IBM Workspace API KEY>
  7. In order to put the filter on IBM data you can use this argument under # Set up arguments section.

#parser.add_argument('--filter', help='Search filter to use.', type=str, default='response_timestamp>='+str(last_monday)+'T00:00:00.000Z'+',response_timestamp<='+str(today))

---------------------------------------------------------------------------

import pandas as pd

import argparse

import json

from datetime import date

import datetime

import watson_developer_cloud

from pandas.io.json import json_normalize

from watson_developer_cloud import AssistantV1 as WatsonAssistant

from urllib.parse import urlparse, parse_qs

from ibm_watson import AssistantV1

from ibm_cloud_sdk_core.authenticators import IAMAuthenticator

from ibm_watson import ApiException

import urllib3

urllib3.disable_warnings()


today = date.today()

print("Today's date:", today)

last_monday=today- datetime.timedelta(days=(7))

print('From: '+str(last_monday)+' To: '+str(today))


# Set up arguments. 

parser = argparse.ArgumentParser()

parser.add_argument('workspace_id', help='Watson Assistant workspace ID', type=str)

parser.add_argument('--userpass', help='Watson Assistant service username:password. Cannot be used with --apikey', type=str, default=None)

parser.add_argument('--apikey', help='Watson Assistant API Key. Cannot be used with --userpass', type=str, default='<IBM Workspaace Skill ID>')

parser.add_argument('--filename', help='Output file name.',type=str, default='Watson_Log_'+str(today)+'.csv')

parser.add_argument('--filetype', help='Output file type. Can be: CSV, TSV, XLSX, JSON (default)', type=str, default='CSV', choices=['CSV','TSV','XLSX','JSON'])

parser.add_argument('--url', help='Default is https://gateway-fra.watsonplatform.net/assistant/api', type=str, default='https://gateway-fra.watsonplatform.net/assistant/api')

parser.add_argument('--version', help='Default = 2018-09-20', type=str, default='2020-04-01')

parser.add_argument('--totalpages', help='Maximum number of pages to pull. Default is 999', type=int, default=999)

parser.add_argument('--pagelimit', help='Maximum number of records to a page. Default is 200.', type=int, default=200)

parser.add_argument('--filter', help='Search filter to use.', type=str, default='')

#parser.add_argument('--filter', help='Search filter to use.', type=str, default='response_timestamp>='+str(last_monday)+'T00:00:00.000Z'+',response_timestamp<='+str(today))

args = parser.parse_args()


#python export.py <IBM Workspace SKILL ID> --apikey <IBM Workspace API KEY>


## This part is used for saving dataframes. 

f_conversation_id = 'conversation_id'

f_request_timestamp = 'request_timestamp'

f_response_timestamp = 'response_timestamp'

f_user_input = 'User Input'

f_output = 'Output'

f_intent = 'Intent'

f_confidence = 'Confidence'

f_exit_reason = 'Exit Reason'

f_logging = 'Logging'

f_context = 'Context'

f_chatbot_name = 'chatbot_name'

f_sys_full_name = 'sys-full-name'

f_country = 'country'

f_locationName = 'locationName'

f_cityName= 'cityName'

f_loginName = 'loginName'

f_chatId= 'chatId'

f_sessionId= 'sessionId'


columns = [

    f_chatbot_name, f_conversation_id, f_chatId, f_sessionId, f_sys_full_name, f_loginName, f_country, f_cityName, f_locationName, f_request_timestamp, f_response_timestamp, 

    f_user_input, f_output, f_intent, f_confidence, f_exit_reason, f_logging, f_context

]


## Saving methods. 

def save_json(data=None,file_name=None):

    with open(file_name, 'w') as out:

        json.dump(data,out)


def save_xsv(data=None, sep=',', file_name=None):

    df = convert_json_to_dataframe(data)

    if df is not None:

        df.to_csv(args.filename,encoding='utf8',sep=sep,index=False)


def save_xlsx(data=None, file_name=None):

    df = convert_json_to_dataframe(data)

    if df is not None:

        df.to_excel(args.filename,index=False)


def convert_json_to_dataframe(data=None):

    rows = []


    if data == [[]]:

        print('No Logs found. :(')

        return None


    for data_records in data:

        for o in data_records:

            row = {}

            

            # Let's shorthand the response and system object.

            r = o['response']

            s = r['context']['system']

                

            row[f_conversation_id] = r['context'][f_conversation_id]

            row[f_request_timestamp] = o[f_request_timestamp]

            row[f_response_timestamp] = o[f_response_timestamp]

            row[f_chatbot_name] = r['context'][f_chatbot_name]

            row[f_sys_full_name] = r['context'][f_sys_full_name]

            row[f_loginName] = r['context'][f_loginName]

            # row[f_country] = r['context'][f_country]

            # row[f_cityName] = r['context'][f_cityName]

            # row[f_locationName] = r['context'][f_locationName]

            row[f_chatId] = r['context'][f_chatId]

            row[f_sessionId] = r['context'][f_sessionId]

            

            if 'text' in r['input']: row[f_user_input] = r['input']['text']

            if 'text' in r['output']:row[f_output] = ' '.join(r['output']['text'])

                

            if len(r['intents']) > 0:

                row[f_confidence] = r['intents'][0]['confidence']

                row[f_intent] = r['intents'][0]['intent']


           

            if 'branch_exited_reason' in s: row[f_exit_reason] = s['branch_exited_reason']

            

            if 'log_messaging' in r['output']: row[f_logging] = r['output']['log_messaging']

            

            row[f_context] = json.dumps(r['context'])

            

            rows.append(row)


    # Build the dataframe. 

    df = pd.DataFrame(rows,columns=columns)


    # cleaning up dataframe. Removing NaN and converting date fields. 

    df = df.fillna('')

    df[f_request_timestamp] = pd.to_datetime(df[f_request_timestamp])

    df[f_response_timestamp] = pd.to_datetime(df[f_response_timestamp])


    # Lastly sort by conversation ID, and then request, so that the logs become readable. 

    df = df.sort_values([f_conversation_id, f_request_timestamp], ascending=[True, True])


    return df


## Make connection to conversation. 

if args.userpass != None and args.apikey == None:

    up = args.userpass.split(':')

    username = up[0]

    password = up[1]

    c = WatsonAssistant(url=args.url, version=args.version, username=username, password=password)


elif args.apikey != None and args.userpass == None:

        

    authenticator = IAMAuthenticator(args.apikey)

    assistant = AssistantV1(

    version='2020-04-01',

    authenticator = authenticator

    )


    assistant.set_service_url('https://api.au-syd.assistant.watson.cloud.ibm.com')


    response=assistant.list_logs(

    workspace_id=args.workspace_id

    ).get_result()


    #print(json.dumps(response, indent=2))

    

else:

    print('You must set --userpass or --apikey to run. Exiting.')

    exit(1)




## Download the logs.

j = []

page_count = 1

cursor = None

count = 0


x = { 'pagination': 'DUMMY' }

while x['pagination']:

    if page_count > args.totalpages: 

        break


    print('Reading page {}.'.format(page_count))

    x = assistant.list_logs(workspace_id=args.workspace_id,cursor=cursor,page_limit=args.pagelimit, filter=args.filter)

    x = x.result  # Assistant V2 update.

    

    j.append(x['logs'])

    count = count + len(x['logs'])


    page_count = page_count + 1


    if 'pagination' in x and 'next_url' in x['pagination']:

        p = x['pagination']['next_url']

        u = urlparse(p)

        query = parse_qs(u.query)

        cursor = query['cursor'][0]

    

## Determine how the file should be saved. 

args.filetype = args.filetype.upper()

if args.filetype == 'CSV':

    save_xsv(data=j,sep=',',file_name=args.filename)

elif args.filetype == 'TSV':

    save_xsv(data=j,sep='\t',file_name=args.filename)

elif args.filetype == 'XLSX':

    save_xlsx(data=j, file_name=args.filename)

else:

    save_json(data=j,file_name=args.filename),

 

print('Writing {} records to: {} as file type: {}'.format(count, args.filename, args.filetype))


--------------------------------------------------

Code Execution





No comments:

Post a Comment

Thanks for the comments. I`ll get back to you shortly.

Regards
Tushar