/*********************************************************************
Based on the Arduino VGA library by https://simple-circuit.com/
The Arduino VGA library is also based on the VGAX Library at https://github.com/smaffer/vgax
*********************************************************************/
//Modified and optimised by K. Jardine for the MI2955 Composite Video & VGA board, 19th March 2021
//This code is specifically for VGA for PCB Version 3.1

#include <avr/pgmspace.h>
#if defined(ARDUINO) && ARDUINO >= 100
  #include "Arduino.h"
#else
  #include "WProgram.h"
#endif

#include <stdlib.h>
#include "VGA_PCBV3_V1.0.h"

static bool flip = true; //Flag used to alternate the line repetition

void VGA::begin() {
  //Set up the Output pins
  pinMode(VOE, OUTPUT); //Port D6, Video Out Enable as an Output
  PORTD &= ~_BV(VOE); //Port D6, Set the Video Out En pin low.
  pinMode(PE_A2, OUTPUT); //Port D11, Vertical Line Counter Scroll bit 2 output
  pinMode(PE_A1_HCC, OUTPUT); //Port D10, Vertical Line Counter Scroll bit 1 output, Horizontal Counter Clear Output   
  pinMode(VCClk, OUTPUT); //Port D7, Set Vertical Counter Clock as an Output
  pinMode(UserJ8pin1, OUTPUT); //Port D5, VGA Vertical Sync Output 
  pinMode(UserJ8pin6, OUTPUT); //Port D3, VGA Horizontal Sync Output
  pinMode(FS_LS, OUTPUT); //Port D9, Composite Video Frame Sync/Line Sync Output
  PORTD &= ~_BV(FS_LS); //Port D9, Disable the Composite Video Frame Sync/Line Sync drive by grounding the output  
  DDRC |= _BV(PE_A0_PortCpin0); //Port A0, Vertical Line Counter Scroll bit 0 output
  pinMode(PE_A3, OUTPUT); //Port D12, Vertical Line Counter Scroll bit 3 output
  pinMode(PE_A4, OUTPUT); //Port D13, Vertical Line Counter Scroll bit 4 output
  DDRC |= _BV(PE__PortCpin1); // Port A1, Vertical Line Counter Parallel Enable output
  PORTC |= _BV(PE__PortCpin1); //Port A1, Set the Vertical Line Counter Parallel Enable high
  
  // Disable TIMER0 interrupt
  TIMSK0 = 0;
  TCCR0A = 0;
  TCCR0B = 0;
  OCR0A  = 0;
  OCR0B  = 0;
  TCNT0  = 0;

  //Disable TIMER1 interrupt
  TIMSK1 = 0;  
  TCCR1A = 0;
  TCCR1B = 0;
  OCR1A = 0;
  OCR1B = 0;
  TCNT1 = 0;  

  //TIMER2 - Generates horizontal sync pulses
  TCCR2A = bit(WGM20) | bit(WGM21) | bit(COM2B1); //Pin3=COM2B1
  TCCR2B = bit(WGM22) | bit(CS21); //Divide by 8 prescaler giving a 0.5uS clock pulse for the timer.
  //Note that the values of 2 used below for HSYNCPeriod & HSYNCPulse derive from 1/(Timer2 clock pulse) ie 1/0.5 = 2
  OCR2A  = HSYNCPeriod; // 28.44uS x 2 = 56.88 (minus one) = 56 for HSYNC to pin D3 for 28.44uS period (35.15625 kHz)
  OCR2B  = HSYNCPulse; // 2uS x 2 = 4 (minus one) = 3 for HSYNC pulse width of 2uS duration. 
  TIFR2  = bit(OCF2B);  // Clear Compare Match B flag
  TIMSK2 = bit(OCIE2B); // Enable Compare Match B interrupt 

  //Enable Pin Change on Interrupts
  PCMSK0 = 0; //Disable all interrupts
  PCMSK1 = 0; //Disable all interrupts
  PCMSK2 = 0; //Disable all interrupts
  PCMSK0 = bit(PCINT0); //Enable Interrupts for Port D8 - User J8 pin 3 - CV/VGA select
  PCIFR = 0; //Clear all interrupt flags
  PCICR = bit(PCIE0); //Enable Pin Change on Interrupt for PCMSK0 only
  EICRA = 0;//Clear external interrupts for INT0 & INT1
  EIMSK = 0;//Disable external interrupts for INT0 & INT1       
  sei(); //Enable interrupts
}

// Horizontal Sync interrupt
ISR(TIMER2_COMPB_vect) { // Timer2 COMPB Interrupt Service Routine
  line++; //Increment the line counter
  if (line >= STARTLINE) { //Only process images greater than the Start line
    if (line <= ENDLINE) { //Only process images less than the End line
    width = IMAGEWIDTH; //Sets the image width
		//Controls the image placement across the line and down the lines
		//Every line is repeated so that the 256 x MI2955 lines become 512 lines
		//The two code paths for each line should have equal timing so that no image distortion occurs.
		//The delay values have been optimised to minimise flicker
	    asm volatile (
  			//This enables the start point of the image across the line
        "delay10\n\t" //This provides a fine tuning start point delay        
  			"svprts76 %[port]\n\t" //Save PORTD to R16 and set the Vertical Counter Clock & Video Output Enable bits in R16
  			"out %[port], r16 \n\t" //Write R16 to port D
  			//Test for a line to repeat
  			"tst %[FLIP]\n\t"
  			"breq loop2\n\t"
        "delay1\n\t"     
  		"loop1:\n\t"
  			//Waits until we reach the end of the image across the line
  			"dec %[WIDE]\n\t"	
  			"delay5\n\t" //Delays x clock cycles which when looped by the width becomes a delay of (x+3) x width clock cycles
  			"brne loop1\n\t"
  			"delay3\n\t" //This provides a fine tuning delay ie (x+3) x width + 3 clock cycles
  			//Skips a line whilst inhibiting the output display
  			"svprtc6 %[port]\n\t"	//Save PORTD to R16 and clear Video Output Enable bit in R16
  			"out %[port], r16 \n\t" //Write R16 to port D	
  			"rjmp end1\n\t"	
  		"loop2:\n\t"
  			//Waits until we reach the end of the image across the line		
  			"dec %[WIDE]\n\t"	
        "delay5\n\t" //Delays x clock cycles which when looped by the width becomes a delay of (x+3) x width clock cycles
  			"brne loop2\n\t"
   			"delay3\n\t" //This provides a fine tuning delay ie (x+3) x width + 3 clock cycles	
  			//Clocks the Vertical Line Counter to the next line and inhibits the output display	
  			"svprtc76 %[port]\n\t"	//Save PORTD and clear Vertical Counter Clock & Video Output Enable bits in R16
  			"out %[port], r16 \n\t" //Write R16 to Port D	
  		"end1:\n\t"
  	:
  	: [port] "I" (_SFR_IO_ADDR(PORTD)),
  	[WIDE] "r" (width),
  	[LINE] "r" (line),
  	[FLIP] "r" (flip)
  	: "r16" //clobber
  	);
	
  	//Resets the Horizontal Address Counters after each line is displayed
  	PORTB &= ~(_BV(PE_A1_HCCPortBpin2)); //Port D10, Set the PE_A1_HCC pin Low. ie Clear the Horizontal Counter to zero			
  	PORTB |= _BV(PE_A1_HCCPortBpin2); //Port D10, Set the PE_A1_HCC pin high. ie Release the Horizontal Counter to count
    
  	flip=!flip; //Flips between the lines to repeat lines
    return; 
	  }
  }
  // The Vertical Sync frame is 625 lines for a 600 line display
  if(line >= VSYNCLineStart) {
    PORTD &= ~(_BV(UserJ8pin1)); //Port D5, Set the Vertical Sync pulse low. The Vertical Sync pulse duration = 2 lines
    if(line >= VSYNCLineStop) {
      PORTD |= _BV(UserJ8pin1); //Port D5, Set the Vertical Sync pulse high
      PORTB |= _BV(PE_A1_HCCPortBpin2); //Port D10, Set the PE_A1_HCC pin high. ie Release the Horizontal Counter to count
      line = 0; //Reset the line counter
      flip = true; //Reset the flag used to alternate the line repetition
 
      PORTC |= _BV(PE_A0_PortCpin0); //Port A0, Set the Vertical Line Counter Scroll bit 0 output high
      PORTB &= ~(_BV(PE_A1_HCCPortBpin2)); //Port D10, Vertical Line Counter Scroll bit 1, Horizontal Counter Clear Output, PortB Pin 2, output low
      PORTB &= ~(_BV(PE_A2_PortBpin3)); //Port D11, Vertical Line Counter Scroll bit 2 output, PortB Pin 3 output low    
      PORTB &= ~(_BV(PE_A3_PortBpin4)); //Port D12, Vertical Line Counter Scroll bit 3 output, PortB Pin 4 output low
      PORTB &= ~(_BV(PE_A4_PortBpin5)); //Port D13, Vertical Line Counter Scroll bit 4 output, PortB Pin 5 output low

      PORTD &= ~(_BV(VOE));  //Port D6, Set the Video Out Enable low
      PORTC &= ~(_BV(PE__PortCpin1)); //Port C1, Set the Parallel Load pin low. Set up for the parallel scroll data.
      PORTD |= _BV(VCClk); //Port D7, Set the Vertical Counter Clock Output High.  Set up for clocking the parallel data in.
      PORTD &= ~(_BV(VCClk)); //Port D7, Set the Vertical Counter Clock Output Low. Latch the parallel data in. 
      PORTC |= _BV(PE__PortCpin1); //Port C1, Set the Parallel Load pin high to disable the parallel loading.

      PORTB |= _BV(PE_A1_HCCPortBpin2); //Port D10, Vertical Line Counter Scroll bit 1, Horizontal Counter Clear Output, PortB Pin 2, output high to ensure that it is ready to count.    
    }
    return;    
  }
}
