#!/usr/bin/python3

# This file is part of Cockpit.
#
# Copyright (C) 2021 Red Hat, Inc.
#
# Cockpit is free software; you can redistribute it and/or modify it
# under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation; either version 2.1 of the License, or
# (at your option) any later version.
#
# Cockpit is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
# Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with Cockpit; If not, see <http://www.gnu.org/licenses/>.

import subprocess
import os
import sys

# import Cockpit's machinery for test VMs and its browser test API
TEST_DIR = os.path.dirname(__file__)
sys.path.append(os.path.join(TEST_DIR, "common"))
sys.path.append(os.path.join(os.path.dirname(TEST_DIR), "bots/machine"))

from machineslib import VirtualMachinesCase  # noqa
from testlib import nondestructive, skipImage, test_main, wait  # noqa

distrosWithoutSnapshot = ["centos-8-stream", "debian-stable", "rhel-8-4", "rhel-8-5", "ubuntu-2004"]


@nondestructive
class TestMachinesLifecycle(VirtualMachinesCase):

    def setUp(self):
        super().setUp()

        # older cockpit-system versions try to authorize; wheel is not a thing on Debian/Ubuntu
        if self.machine.image in ["ubuntu-2004"]:
            self.allow_journal_messages(".* is not in the sudoers file.  This incident will be reported.",
                                        "Error executing command as another user: Not authorized",
                                        "This incident has been reported.",
                                        "Sorry, try again.",
                                        ".*incorrect password attempt.*",
                                        "sudo:.*",
                                        "cannot reauthorize identity.*")

    def testBasic(self):
        self._testBasic()

    def createUser(self, user_group):
        user_name = 'test_' + user_group + '_user'
        self.machine.execute(
            'useradd -G {0} {1} && echo "{1}:foobar" | chpasswd'.format(user_group, user_name))
        # user libvirtd instance tends to SIGABRT with "Failed to find user record for uid .." on shutdown during cleanup
        # so make sure that there are no leftover user processes that bleed into the next test
        self.addCleanup(self.machine.execute, "pkill -u {0}; while pgrep -u {0}; do sleep 0.5; done".format(user_name))
        # HACK: ...but it still tends to crash during shutdown (without known stack trace)
        self.allow_journal_messages('Process .*libvirtd.* of user 10.* dumped core.*')

        return user_name

    # FIXME remove this skipImage
    @skipImage('Fails with Rejected send message, 1 matched rules; type="method_call"',
               "rhel-8-4", "rhel-8-5", "ubuntu-stable", "ubuntu-2004", "debian-testing", "debian-stable", "centos-8-stream")
    def testBasicLibvirtUserUnprivileged(self):
        user = self.createUser(user_group='libvirt')
        self._testBasic(user, False)

    def testBasicWheelUserUnprivileged(self):
        user = self.createUser(user_group='wheel')
        self._testBasic(user, False, True)

    def _testBasic(self, user=None, superuser=True, expect_empty_list=False):
        b = self.browser
        m = self.machine

        args = self.createVm("subVmTest1")

        self.login_and_go("/machines", user=user, superuser=superuser)

        if expect_empty_list:
            with b.wait_timeout(20):
                b.wait_in_text("#virtual-machines-listing .pf-c-empty-state", "No VM is running")
                return
        b.wait_in_text("body", "Virtual machines")
        self.waitVmRow("subVmTest1")

        b.wait_in_text("#vm-subVmTest1-state", "Running")
        self.goToVmPage("subVmTest1")
        b.wait_in_text("#vm-subVmTest1-vcpus-count", "1")

        b.wait_in_text("#vm-subVmTest1-boot-order", "disk,network")
        emulated_machine = b.text("#vm-subVmTest1-emulated-machine")
        self.assertTrue(len(emulated_machine) > 0)  # emulated machine varies across test machines

        # switch to and check Usage
        b.click("#vm-subVmTest1-usage")
        b.wait_in_text(".memory-usage-chart .pf-c-progress__status > .pf-c-progress__measure", "128 MiB")
        b.wait_not_in_text(".memory-usage-chart .pf-c-progress__status > .pf-c-progress__measure", "0 /")
        usage = b.text(".memory-usage-chart .pf-c-progress__status > .pf-c-progress__measure").split("/ 128 MiB")[0]
        wait(lambda: float(usage) > 0.0, delay=3)

        b.wait_in_text(".vcpu-usage-chart .pf-c-progress__status > .pf-c-progress__measure", "1 vCPU")
        usage = b.text(".vcpu-usage-chart .pf-c-progress__status > .pf-c-progress__measure").split("% of 1 vCPU")[0]
        # CPU usage cannot be nonzero with blank image, so just ensure it's a percentage
        wait(lambda: float(usage) <= 100.0, delay=3)

        # suspend/resume
        m.execute("virsh suspend subVmTest1")
        b.wait_in_text("#vm-subVmTest1-state", "Paused")
        # resume sometimes fails with "unable to execute QEMU command 'cont': Resetting the Virtual Machine is required"
        m.execute('virsh resume subVmTest1 || { virsh destroy subVmTest1 && virsh start subVmTest1; }')
        b.wait_in_text("#vm-subVmTest1-state", "Running")

        # Wait for the system to completely start
        wait(lambda: "login as 'cirros' user." in self.machine.execute("cat {0}".format(args["logfile"])), delay=3)

        # Send Non-Maskable Interrupt (no change in VM state is expected)
        self.performAction("subVmTest1", "sendNMI")

        b.wait(lambda: "NMI received" in self.machine.execute("cat {0}".format(args["logfile"])))

        # pause
        self.performAction("subVmTest1", "pause")
        # check removing button of disk and network interface when the VM is paused
        b.wait_visible("#delete-subVmTest1-disk-vda:disabled")
        b.wait_visible("#delete-vm-subVmTest1-iface-1:disabled")

        # resume
        self.performAction("subVmTest1", "resume")

        # reboot
        self.machine.execute("echo '' > {0}".format(args["logfile"]))
        self.performAction("subVmTest1", "reboot")
        wait(lambda: "reboot: Power down" in self.machine.execute("cat {0}".format(args["logfile"])), delay=3)
        b.wait_in_text("#vm-subVmTest1-state", "Running")

        # force reboot
        self.machine.execute("echo '' > {0}".format(args["logfile"]))
        self.performAction("subVmTest1", "forceReboot")
        wait(lambda: "Initializing cgroup subsys" in self.machine.execute("cat {0}".format(args["logfile"])), delay=3)
        b.wait_in_text("#vm-subVmTest1-state", "Running")

        # shut off
        self.performAction("subVmTest1", "forceOff")

        # continue shut off validation - usage should drop to zero
        b.wait_in_text(".memory-usage-chart .pf-c-progress__status > .pf-c-progress__measure", "0 /")
        b.wait_in_text(".vcpu-usage-chart .pf-c-progress__status > .pf-c-progress__measure", "0%")

        # shut off of a transient VM will redirect us to the main page
        m.execute("virsh dumpxml subVmTest1 > /tmp/subVmTest1.xml")
        m.execute("virsh start {0} && virsh undefine {0}".format("subVmTest1"))
        b.wait_visible("div[data-vm-transient=\"true\"]")
        self.performAction("subVmTest1", "forceOff", False)
        b.wait_in_text("#virtual-machines-listing .pf-c-empty-state", "No VM is running")
        m.execute("virsh define --file /tmp/subVmTest1.xml")

        # clone
        self.performAction("subVmTest1", "clone")

        b.wait_text(".pf-c-modal-box__title-text", "Create a clone VM based on subVmTest1")
        b.click("footer button.pf-m-primary")
        b.wait_not_present(".pf-c-modal-box")
        self.waitVmRow("subVmTest1-clone")

        # start another one, should appear automatically
        self.createVm("subVmTest2")
        b.wait_in_text("#vm-subVmTest2-state", "Running")
        self.goToVmPage("subVmTest2")
        b.wait_in_text("#vm-subVmTest2-vcpus-count", "1")
        b.wait_in_text("#vm-subVmTest2-boot-order", "disk,network")

        self.goToMainPage()
        self.waitVmRow("subVmTest1")
        self.waitVmRow("subVmTest2")

        # Debian 10's libvirtd goes insane when restarting it with running VMs
        if superuser and m.image not in ["debian-stable"]:
            # restart libvirtd
            m.execute("systemctl stop libvirtd.service")
            b.wait_in_text(".pf-c-empty-state", "Virtualization service (libvirt) is not active")
            m.execute("systemctl start libvirtd.service")
            b.wait_in_text("body", "Virtual machines")
            self.waitVmRow("subVmTest1")
            self.waitVmRow("subVmTest2")
            b.wait_in_text("#vm-subVmTest1-state", "Shut off")
            b.wait_in_text("#vm-subVmTest2-state", "Running")

        # stop second VM, event handling should still work
        self.performAction("subVmTest2", "forceOff")

        b.click("#vm-subVmTest2-run")

        b.set_input_text("#text-search", "subVmTest2")
        self.waitVmRow("subVmTest2")
        self.waitVmRow("subVmTest1", "system", False)

        b.select_PF4("#vm-state-select-toggle", "Running")
        self.waitVmRow("subVmTest1", "system", False)
        self.waitVmRow("subVmTest2")

        b.set_input_text("#text-search", "")
        self.waitVmRow("subVmTest1", "system", False)
        self.waitVmRow("subVmTest2")

        b.select_PF4("#vm-state-select-toggle", "Shut off")
        self.waitVmRow("subVmTest1")
        self.waitVmRow("subVmTest2", "system", False)

        b.select_PF4("#vm-state-select-toggle", "All")
        self.waitVmRow("subVmTest1")
        self.waitVmRow("subVmTest2")

        # Check correctness of the toast notifications list
        # We 'll create errors by starting to start domains when the default network in inactive
        self.createVm("subVmTest3")
        m.execute("virsh destroy subVmTest2 && virsh destroy subVmTest3 && virsh net-destroy default")

        def tryRunDomain(index, name):
            self.waitVmRow(name)

            b.click("#vm-{0}-run".format(name))

        # Try to run subVmTest1 - it will fail because of inactive default network
        tryRunDomain(1, 'subVmTest1')
        b.click('#vm-subVmTest1-state button:contains("view more")')
        b.wait_in_text(".pf-c-popover", "VM subVmTest1 failed to start")
        b.click('#vm-subVmTest1-state button[aria-label=label-close-button]')

        # Try to run subVmTest2
        tryRunDomain(2, 'subVmTest2')
        b.click('#vm-subVmTest2-state button:contains("view more")')
        b.wait_in_text(".pf-c-popover", "VM subVmTest2 failed to start")
        b.click('#vm-subVmTest2-state button[aria-label=label-close-button]')

    def testLibvirt(self):
        b = self.browser
        m = self.machine

        self.allow_restart_journal_messages()

        libvirtServiceName = "libvirtd.service"

        # disable journal core dumps and traces for this test -- libvirt tends to crash on stopping
        core_pattern = m.execute("cat /proc/sys/kernel/core_pattern").strip()
        m.execute("echo core > /proc/sys/kernel/core_pattern")
        self.addCleanup(m.execute, f"echo '{core_pattern}' > /proc/sys/kernel/core_pattern")

        def checkLibvirtEnabled():
            try:
                m.execute("systemctl -q is-enabled {0}".format(libvirtServiceName))
                return True
            except subprocess.CalledProcessError:  # return code != 0
                return False

        def stopLibvirtSocket():
            # newer libvirtd versions use socket activation
            # we should test that separately, but here we test only using the service unit
            if m.image not in ["debian-stable", "rhel-8-4"]:
                m.execute("systemctl stop libvirtd-ro.socket libvirtd.socket libvirtd-admin.socket")
                self.addCleanup(m.execute, "systemctl start libvirtd-ro.socket libvirtd.socket libvirtd-admin.socket")

        def stopLibvirtService():
            m.execute("systemctl stop {0}".format(libvirtServiceName))

        def disableLibvirtService():
            m.execute("systemctl disable {0}".format(libvirtServiceName))

        def hack_libvirtd_crash():
            # work around libvirtd crashing when stopped too quickly; https://bugzilla.redhat.com/show_bug.cgi?id=1828207
            m.execute("virsh domifaddr 1")

        def waitEmptyState():
            with b.wait_timeout(15):
                b.wait_in_text(".pf-c-empty-state", "Virtualization service (libvirt) is not active")

        # Test that empty state appears when stopping libvirt service while page was loaded
        self.createVm("subVmTest1")
        self.login_and_go("/machines")

        b.wait_in_text("body", "Virtual machines")
        self.waitVmRow("subVmTest1")

        disableLibvirtService()
        hack_libvirtd_crash()
        stopLibvirtSocket()
        stopLibvirtService()

        waitEmptyState()

        # Test that starting libvirt service from the UI updates the page
        b.wait_visible("#enable-libvirt:checked")
        b.click(".pf-c-empty-state button.pf-m-primary")  # Start libvirt
        b.wait(lambda: checkLibvirtEnabled())
        b.wait_in_text("body", "Virtual machines")
        b.wait_not_present(".pf-c-empty-state")
        self.waitVmRow("subVmTest1")

        # Ensure that create VM dialog does not crash after starting libvirt from the UI
        # https://bugzilla.redhat.com/show_bug.cgi?id=1974228
        b.click("#create-new-vm")
        b.wait_visible("#create-vm-dialog")

        b.switch_to_top()
        b.wait_not_visible("#navbar-oops")

        # Reload the page in order to start from an empty app state
        b.reload()
        b.enter_page('/machines')
        b.wait_in_text("body", "Virtual machines")

        # Test that disabling the libvirt service from the UI has effect
        hack_libvirtd_crash()
        stopLibvirtSocket()
        stopLibvirtService()
        waitEmptyState()
        b.wait_visible("#enable-libvirt:checked")
        b.set_checked("#enable-libvirt", False)
        b.click(".pf-c-empty-state button.pf-m-primary")  # Start libvirt
        b.wait(lambda: not checkLibvirtEnabled())
        b.wait_in_text("body", "Virtual machines")
        b.wait_not_present(".pf-c-empty-state")
        self.waitVmRow("subVmTest1")

        # Test that enabling the service from the CLI has effect in the UI
        m.execute("systemctl enable {0}".format(libvirtServiceName))
        hack_libvirtd_crash()
        stopLibvirtSocket()
        stopLibvirtService()
        waitEmptyState()
        b.wait_visible("#enable-libvirt:checked")

        # Test that resources are shown after starting libvirt service from the UI
        stopLibvirtSocket()
        stopLibvirtService()
        b.reload()
        b.enter_page('/machines')
        waitEmptyState()
        b.click(".pf-c-empty-state button.pf-m-primary")  # Start libvirt
        self.waitVmRow("subVmTest1")

        stopLibvirtSocket()
        stopLibvirtService()
        waitEmptyState()
        b.click(".pf-c-empty-state button.pf-m-link")  # Troubleshoot
        b.leave_page()
        url_location = "/system/services#/{0}".format(libvirtServiceName)
        b.wait(lambda: url_location in b.eval_js("window.location.href"))

        # Make sure that unprivileged users can see the VM list when libvirtd is not running
        stopLibvirtService()
        m.execute("useradd nonadmin; echo nonadmin:foobar | chpasswd")
        # user libvirtd instance tends to SIGABRT with "Failed to find user record for uid .." on shutdown during cleanup
        # so make sure that there are no leftover user processes that bleed into the next test
        self.addCleanup(self.machine.execute, "pkill -u nonadmin; while pgrep -u nonadmin; do sleep 0.5; done")
        self.login_and_go("/machines", user="nonadmin", superuser=False)
        b.wait_in_text("body", "Virtual machines")
        b.wait_in_text("#virtual-machines-listing .pf-c-empty-state", "No VM is running")

    def testDelete(self):
        b = self.browser
        m = self.machine

        name = "subVmTest1"
        img2 = "/var/lib/libvirt/images/{0}-2.img".format(name)

        args = self.createVm(name, graphics='vnc')

        self.login_and_go("/machines")
        b.wait_in_text("body", "Virtual machines")
        self.waitVmRow(name)

        m.execute("test -f {0}".format(img2))

        self.goToVmPage("subVmTest1")

        def addDisk(volName, poolName):
            # Virsh does not offer some option to create disks of type volume
            # We have to do this from cockpit UI
            b.click("#vm-subVmTest1-disks-adddisk")  # button
            b.wait_visible("#vm-subVmTest1-disks-adddisk-dialog-modal-window")
            b.wait_visible("label:contains(Create new)")  # radio button label in the modal dialog

            b.select_from_dropdown("#vm-subVmTest1-disks-adddisk-new-select-pool", poolName)
            b.set_input_text("#vm-subVmTest1-disks-adddisk-new-name", volName)
            b.set_input_text("#vm-subVmTest1-disks-adddisk-new-size", "10")
            b.select_from_dropdown("#vm-subVmTest1-disks-adddisk-new-unit", "MiB")
            b.click("#vm-subVmTest1-disks-adddisk-permanent")

            b.click("#vm-subVmTest1-disks-adddisk-dialog-add")
            b.wait_not_present("#vm-subVmTest1-disks-adddisk-dialog-modal-window")

            b.wait_visible("#vm-subVmTest1-disks-vdb-source-volume")
            b.wait_visible("#vm-subVmTest1-disks-vdb-source-pool")

        secondDiskVolName = "mydisk"
        poolName = "images"
        secondDiskPoolPath = "/var/lib/libvirt/images/"

        addDisk(secondDiskVolName, poolName)

        self.performAction(name, "delete")

        b.wait_visible("#vm-{0}-delete-modal-dialog .modal-body:contains(The VM is running)".format(name))
        b.wait_visible("#vm-{1}-delete-modal-dialog ul li:first-child #disk-source-file:contains({0})".format(img2, name))
        # virsh attach-disk does not create disks of type volume
        b.wait_visible("#vm-{1}-delete-modal-dialog #disk-source-volume:contains({0})".format(secondDiskVolName, name))
        b.wait_visible("#vm-{1}-delete-modal-dialog #disk-source-pool:contains({0})".format(poolName, name))
        b.click("#vm-{0}-delete-modal-dialog button:contains(Delete)".format(name))
        b.wait_not_present("#vm-{0}-delete-modal-dialog".format(name))

        self.waitVmRow(name, "system", False)

        m.execute("while test -f {0}; do sleep 1; done".format(img2))
        m.execute("while test -f {0}; do sleep 1; done".format(secondDiskPoolPath + secondDiskVolName))

        self.assertNotIn(name, m.execute("virsh list --all --name"))

        # Try to delete a paused VM
        name = "paused-test-vm"
        args = self.createVm(name)

        self.goToVmPage(name)

        # Make sure that the VM booted normally before attempting to suspend it
        wait(lambda: "login as 'cirros' user" in self.machine.execute("cat {0}".format(args["logfile"])), delay=3)

        self.machine.execute("virsh -c qemu:///system suspend {0}".format(name))
        b.wait_in_text("#vm-{0}-state".format(name), "Paused")
        self.performAction(name, "delete")
        b.click("#vm-{0}-delete-modal-dialog button:contains(Delete)".format(name))
        self.waitVmRow(name, 'system', False)

        if m.image not in distrosWithoutSnapshot:
            # Delete a VM with snapshots and ensure the qcow2 image overlays get also cleaned up
            name = "vm-with-snapshots"
            self.createVm(name, running=False)

            m.execute("virsh snapshot-create-as --domain {0} --name snapshotA --description 'Description of snapshotA'".format(name))

            qemu_img_info = m.execute("qemu-img info /var/lib/libvirt/images/{0}-2.img".format(name))
            self.assertIn("snapshotA", qemu_img_info)

            # snapshots events not available yet: https://gitlab.com/libvirt/libvirt/-/issues/44
            b.reload()
            b.enter_page('/machines')
            self.goToVmPage(name)
            b.wait_in_text("#vm-{0}-snapshot-0-name".format(name), "snapshotA")

            self.performAction(name, "delete")

            # Unselect the disks - we don't want to delete them
            b.click("#vm-vm-with-snapshots-delete-modal-dialog ul li:first-child #disk-source-file")
            b.click("#vm-vm-with-snapshots-delete-modal-dialog button:contains(Delete)")
            b.wait_not_present("#vm-{0}-delete-modal-dialog".format(name))

            qemu_img_info = m.execute("qemu-img info /var/lib/libvirt/images/{0}-2.img".format(name))
            self.assertNotIn("snapshotA", qemu_img_info)

        # Try to delete a transient VM
        name = "transient-VM"
        args = self.createVm(name)
        m.execute("virsh undefine {0}".format(name))
        b.wait_visible("tr[data-row-id=vm-{0}-system][data-vm-transient=true]".format(name))
        b.click("#vm-{0}-action-kebab button".format(name))
        b.wait_visible("#vm-{0}-delete a.pf-m-disabled".format(name))
        b.click("#vm-{0}-forceOff".format(name))
        self.waitVmRow(name, 'system', False)
        b.wait_not_present('#vm-{0}-state button:contains("view more")'.format(name))

        # Deleting a running guest will disconnect the serial console
        self.allow_browser_errors("Disconnection timed out.")
        self.allow_journal_messages(".* couldn't shutdown fd: Transport endpoint is not connected")


if __name__ == '__main__':
    test_main()
