;Output driver for three-colour (RGB) 144-pixel (BlinkenLights) projects
;with 1/18-Mux (across columns) and seperate rows for R, G and B
;Supported projects:
;  * BlinkenLEDs Plus Colour
;  * Blinkstroem Colour
;  * TicTacLights Colour
;Supported controllers: Mega162
;
;PORTS (ATmega162, 144 RGB pixels, SD card using hardware SPI):
;  PA0   = LED Row 8B (bottom)
;  PA1   = LED Row 7G
;  PA2   = LED Row 7R
;  PA3   = LED Row 7B
;  PA4   = LED Row 6G
;  PA5   = LED Row 6R
;  PA6   = LED Row 6B
;  PA7   = LED Row 5G
;  PB0   = LED Row 8R (bottom)
;  PB1   = LED Row 8G (bottom)
;  PB2-3 = [unused]
;  PB4   = SD-Card #CS (voltage divider 1k/2.2k)
;  PB5   = SD-Card D_in (MOSI) (voltage divider 1k/2.2k)
;  PB6   = SD-Card D_out (MISO) (series resistor 1k)
;  PB7   = SD-Card Clock (voltage divider 1k/2.2k)
;  PC0   = LED Row 3B
;  PC1   = LED Row 3R
;  PC2   = LED Row 3G
;  PC3   = LED Row 4B
;  PC4   = LED Row 4R
;  PC5   = LED Row 4G
;  PC6   = LED Row 5B
;  PC7   = LED Row 5R
;  PD0/1 = RS232 Rx/Tx
;  PD2   = LED Row 1B (top)
;  PD3   = LED Row 1R (top)
;  PD4   = LED Row 1G (top)
;  PD5   = LED Row 2B
;  PD6   = LED Row 2R
;  PD7   = LED Row 2G
;  PE0   = SD-Card Detect Switch (low = card is present)
;  PE1   = LED Column shift register data (Q1 = col 18 ... Q18 = col 1 = left)
;  PE2   = LED Column shift register clock
;
;Frame Timing:
;  Controller frequency   : 14.7456 MHz
;  Timer prescaler        : 64
;  PWM "steps" per column : 128
;  Multiplexing           : 18 columns
;    => 14.7456 MHz / 64 / 128 / 18 = 100 Hz (error: 0 %)
;
;Example PWM durations:
;0, 4, 9, 14, 21, 37, 69, 128 - equivalent to old optimized exponential
;absolute minimum step between two values: 4 TODO: check

.equ	PWMVAL_0 = 0 ;don't change this value
.equ	PWMVAL_1 = 4
.equ	PWMVAL_2 = 9
.equ	PWMVAL_3 = 14
.equ	PWMVAL_4 = 21
.equ	PWMVAL_5 = 37
.equ	PWMVAL_6 = 69
.equ	PWMVAL_7 = 128 ;don't change this value (timing depends on it)

;===============================================================================

;SD card pins
.equ	USE_HARDWARE_SPI = 1
.equ	SD_PORT = PORTB
.equ	SD_CS = 4
.equ	SDCARD_HAS_DETECT_PIN = 1
.equ	SD_DETECT_PIN = PINE
.equ	SD_DETECT = 0 ;Card Detect switch pin on PORTE

;shift register pins (PORTE)
.equ	COL_DATA = 1
.equ	COL_CLOCK = 2

;other settings
.equ	OUT_T0_DIV = 0x03 ;Clk/64
.equ	DISALLOW_UART = 0
.equ	DISALLOW_UART_TX = 0

;===============================================================================

.if (MCU != MCU_MEGA162)
	.error "Output module for BlinkenLights RGB only works with ATmega162!"
.endif

oc0:	;Timer 0 output compare interrupt (PWM steps)
	in	sreg_backup, SREG
	push	R0 ;TODO: use W1~W4 instead of R0~R3 (?)
	push	R1
	push	R2
	push	R3
	push	temp
	push	temp2
	_push_w	Z
	
	;decrement column
.if (OUT_INVERT_COLUMNS)
	sbi	PORTE, COL_DATA
.else
	cbi	PORTE, COL_DATA
.endif
	dec	mux
	brne	oc0_colinc_end
	ldi	mux, WIDTH
.if (OUT_INVERT_COLUMNS)
	cbi	PORTE, COL_DATA
.else
	sbi	PORTE, COL_DATA
.endif
	;increment PWM value ('pwm' register stores pwm value in high nibble!)
	subi	pwm, -0x30 ;PWM value sequence: 3, 6, 1, 4, 7, 2, 5, (0)
	andi	pwm, 0x70
	brne	oc0_pwminc_end ;zero: end of PWM sequence
	ldi	pwm, 0x30 ;start with first value of PWM sequence
	tick_100hz
oc0_pwminc_end:
	;load start address
	activeframe Z ;max. 6 cycles
	adiw	ZH:ZL, WIDTH * CHANNELS / 2 ;start: last column (first row)
	_sts_w	RAM_MuxAddress, Z
oc0_colinc_end:
	
	;determine next interval
	mov	temp, pwm
	swap	temp
	_ldi_w	Z, (FLASH_OFFSET + pwm_duration) * 2
	add	ZL, temp
	adc	ZH, zero
	lpm	temp, Z
	out	OCR0, temp
	
	;load framebuffer address for current column
	_lds_w	Z, RAM_MuxAddress
	
	;check if odd or even column
	ldi	temp, 0xFF
	mov	R3, temp
	sbrc	mux, 0
	rjmp	oc0_odd_column
	
	;even column (18, 16, ..., 2) (103 cycles + 2 cycles for rjmp)
	;Z pointing to last byte of the first row
	;byte alignment: [Z-2] = { x | (R << 4) } , [Z-1] = { G | (B << 4) }
	;PD2-7 = rows 1 and 2
	clr	R0
	ld	temp, -Z
	ld	temp2, -Z
	cp	temp, pwm ;row 1 blue => R0 bit 2
	ror	R0
	cp	temp2, pwm ;row 1 red => R0 bit 3
	ror	R0
	swap	temp
	cp	temp, pwm ;row 1 green => R0 bit 4
	ror	R0
	adiw	ZH:ZL, WIDTH * CHANNELS / 2 + 2
	ld	temp, -Z
	ld	temp2, -Z
	cp	temp, pwm ;row 2 blue => R0 bit 5
	ror	R0
	cp	temp2, pwm ;row 2 red => R0 bit 6
	ror	R0
	swap	temp
	cp	temp, pwm ;row 2 green => R0 bit 7
	ror	R0
	adiw	ZH:ZL, WIDTH * CHANNELS / 2 + 2
	;PC0-7 = rows 3, 4 and 5BR
	ld	temp, -Z
	ld	temp2, -Z
	cp	temp, pwm ;row 3 blue => R1 bit 0
	ror	R1
	cp	temp2, pwm ;row 3 red => R1 bit 1
	ror	R1
	swap	temp
	cp	temp, pwm ;row 3 green => R1 bit 2
	ror	R1
	adiw	ZH:ZL, WIDTH * CHANNELS / 2 + 2
	ld	temp, -Z
	ld	temp2, -Z
	cp	temp, pwm ;row 4 blue => R1 bit 3
	ror	R1
	cp	temp2, pwm ;row 4 red => R1 bit 4
	ror	R1
	swap	temp
	cp	temp, pwm ;row 4 green => R1 bit 5
	ror	R1
	adiw	ZH:ZL, WIDTH * CHANNELS / 2 + 2
	ld	temp, -Z
	ld	temp2, -Z
	cp	temp, pwm ;row 5 blue => R1 bit 6
	ror	R1
	cp	temp2, pwm ;row 5 red => R1 bit 7
	ror	R1
	;PA7-0 = rows 5G, 6, 7 and 8B
	swap	temp
	cp	temp, pwm ;row 5 green => R2 bit 7
	rol	R2
	adiw	ZH:ZL, WIDTH * CHANNELS / 2 + 2
	ld	temp, -Z
	ld	temp2, -Z
	cp	temp, pwm ;row 6 blue => R2 bit 6
	rol	R2
	cp	temp2, pwm ;row 6 red => R2 bit 5
	rol	R2
	swap	temp
	cp	temp, pwm ;row 6 green => R2 bit 4
	rol	R2
	adiw	ZH:ZL, WIDTH * CHANNELS / 2 + 2
	ld	temp, -Z
	ld	temp2, -Z
	cp	temp, pwm ;row 7 blue => R2 bit 3
	rol	R2
	cp	temp2, pwm ;row 7 red => R2 bit 2
	rol	R2
	swap	temp
	cp	temp, pwm ;row 7 green => R2 bit 1
	rol	R2
	adiw	ZH:ZL, WIDTH * CHANNELS / 2 + 2
	ld	temp, -Z
	ld	temp2, -Z
	cp	temp, pwm ;row 8 blue => R2 bit 0
	rol	R2
	;PB0-1 = row 8RG
	swap	temp
	cp	temp, pwm ;row 8 green => R3 bit 1
	rol	R3
	cp	temp2, pwm ;row 8 red => R3 bit 0
	rol	R3
	rjmp	oc0_column_end

oc0_odd_column:
	;odd column (17, 15, ..., 1) (112 cycles)
	;Z pointing to last byte of the last row
	;byte alignment: [Z-1] = { R | (G << 4) } , [Z] = { B | (x << 4) }
	;PB1-0 = row 8GR
	ld	temp, Z
	ld	temp2, -Z
	cp	temp2, pwm ;row 8 green => R3 bit 1
	rol	R3
	swap	temp2
	cp	temp2, pwm ;row 8 red => R3 bit 0
	rol	R3
	;PA0-7 = rows 8B, 7, 6 and 5G
	swap	temp
	cp	temp, pwm ;row 8 blue => R2 bit 0
	ror	R2
	sbiw	ZH:ZL, WIDTH * CHANNELS / 2 - 1
	ld	temp, Z
	ld	temp2, -Z
	cp	temp2, pwm ;row 7 green => R2 bit 1
	ror	R2
	swap	temp2
	cp	temp2, pwm ;row 7 red => R2 bit 2
	ror	R2
	swap	temp
	cp	temp, pwm ;row 7 blue => R2 bit 3
	ror	R2
	sbiw	ZH:ZL, WIDTH * CHANNELS / 2 - 1
	ld	temp, Z
	ld	temp2, -Z
	cp	temp2, pwm ;row 6 green => R2 bit 4
	ror	R2
	swap	temp2
	cp	temp2, pwm ;row 6 red => R2 bit 5
	ror	R2
	swap	temp
	cp	temp, pwm ;row 6 blue => R2 bit 6
	ror	R2
	sbiw	ZH:ZL, WIDTH * CHANNELS / 2 - 1
	ld	temp, Z
	ld	temp2, -Z
	cp	temp2, pwm ;row 5 green => R2 bit 7
	ror	R2
	;PC7-0 = rows 5RB, 4 and 3
	swap	temp2
	cp	temp2, pwm ;row 5 red => R1 bit 7
	rol	R1
	swap	temp
	cp	temp, pwm ;row 5 blue => R1 bit 6
	rol	R1
	sbiw	ZH:ZL, WIDTH * CHANNELS / 2 - 1
	ld	temp, Z
	ld	temp2, -Z
	cp	temp2, pwm ;row 4 green => R1 bit 5
	rol	R1
	swap	temp2
	cp	temp2, pwm ;row 4 red => R1 bit 4
	rol	R1
	swap	temp
	cp	temp, pwm ;row 4 blue => R1 bit 3
	rol	R1
	sbiw	ZH:ZL, WIDTH * CHANNELS / 2 - 1
	ld	temp, Z
	ld	temp2, -Z
	cp	temp2, pwm ;row 3 green => R1 bit 2
	rol	R1
	swap	temp2
	cp	temp2, pwm ;row 3 red => R1 bit 1
	rol	R1
	swap	temp
	cp	temp, pwm ;row 3 blue => R1 bit 0
	rol	R1
	;PD7-2 = rows 2 and 1
	sbiw	ZH:ZL, WIDTH * CHANNELS / 2 - 1
	ld	temp, Z
	ld	temp2, -Z
	cp	temp2, pwm ;row 2 green => R0 bit 7
	rol	R0
	swap	temp2
	cp	temp2, pwm ;row 2 red => R0 bit 6
	rol	R0
	swap	temp
	cp	temp, pwm ;row 2 blue => R0 bit 5
	rol	R0
	sbiw	ZH:ZL, WIDTH * CHANNELS / 2 - 1
	ld	temp, Z
	ld	temp2, -Z
	cp	temp2, pwm ;row 1 green => R0 bit 4
	rol	R0
	swap	temp2
	cp	temp2, pwm ;row 1 red => R0 bit 3
	rol	R0
	swap	temp
	cp	temp, pwm ;row 1 blue => R0 bit 2
	rol	R0
	lsl	R0
	lsl	R0
	
oc0_column_end:
	
	;LEDs off
	in	temp, PORTB
.if (OUT_INVERT_ROWS)
	ori	temp, 0x0F ;LED outputs high, SPI outputs unchanged
	ldi	temp2, 0xFF
.else
	andi	temp, 0xF0 ;LED outputs low, SPI outputs unchanged
	ldi	temp2, 0x00
.endif
	out	PORTB, temp
	out	PORTA, temp2
	out	PORTC, temp2
	andi	temp2, 0xFC ;PD0 = UART TxD = low (don't enable pull-up)
	out	PORTD, temp2
	
	;delay until transistors/LEDs have switched off
.if (OUT_INVERT_ROWS)
	and	R3, temp
	rjmp	PC+1
	rjmp	PC+1
.else
	com	R0
	com	R1
	com	R2
	com	R3
	or	R3, temp
.endif
	_sts_w	RAM_MuxAddress, Z ;store framebuffer address for next column
	_pop_w	Z
	pop	temp2
	pop	temp
	out	SREG, sreg_backup
	
	;select next column
	sbi	PORTE, COL_CLOCK
	cbi	PORTE, COL_CLOCK
	
	;LEDs on with new data
	out	PORTB, R3
	out	PORTA, R2
	out	PORTC, R1
	nop ;'andi' instruction causes 1 cycle delay for LED switch-off on PORTD
	out	PORTD, R0
	
	;return
	pop	R3
	pop	R2
	pop	R1
	pop	R0
	reti

;===============================================================================

.macro init_output
	
	;initialize registers
	ldi	mux, 1 ;last column in sequence
	ldi	pwm, 0x50 ;last PWM value in sequence

	;initialize ports
	ldi	temp, 0xFF
	out	DDRA, temp
	out	DDRC, temp

  .if (OUT_INVERT_ROWS)
	ldi	temp, 0xFF
  .else
	ldi	temp, 0x00
  .endif
	out	PORTA, temp
	out	PORTC, temp
	
	ldi	temp, 0xBF
	out	DDRB, temp
	ldi	temp, 0xFE
	out	DDRD, temp
	
  .if (OUT_INVERT_ROWS)
	ldi	temp, 0x7F ;SPI: all high except SCK
	ldi	temp2, 0xFD ;enable pull-up for UART Rx
  .else
	ldi	temp, 0x70 ;SPI: all high except SCK
	ldi	temp2, 0x01 ;enable pull-up for UART Rx
  .endif
	out	PORTB, temp
	out	PORTD, temp2
	
	ldi	temp, 0x06
	out	DDRE, temp
	ldi	temp, 0x01
	out	PORTE, temp

	;initialize data in column shift register (all rows off)
  .if (OUT_INVERT_COLUMNS)
	sbi	PORTE, COL_DATA
  .endif
	ldi	temp, 18
init_shift_loop:
	sbi	PORTE, COL_CLOCK
	cbi	PORTE, COL_CLOCK
	dec	temp
	brne	init_shift_loop

.endmacro
